diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 385 |
1 files changed, 224 insertions, 161 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 4e9be35001ad..26c309eed800 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -34,7 +34,9 @@ using namespace llvm; using VectorParts = SmallVector<Value *, 2>; +namespace llvm { extern cl::opt<bool> EnableVPlanNativePath; +} #define LV_NAME "loop-vectorize" #define DEBUG_TYPE LV_NAME @@ -50,14 +52,16 @@ bool VPRecipeBase::mayWriteToMemory() const { ->mayWriteToMemory(); case VPBranchOnMaskSC: case VPScalarIVStepsSC: + case VPPredInstPHISC: return false; - case VPWidenIntOrFpInductionSC: + case VPBlendSC: + case VPReductionSC: case VPWidenCanonicalIVSC: + case VPWidenCastSC: + case VPWidenGEPSC: + case VPWidenIntOrFpInductionSC: case VPWidenPHISC: - case VPBlendSC: case VPWidenSC: - case VPWidenGEPSC: - case VPReductionSC: case VPWidenSelectSC: { const Instruction *I = dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); @@ -82,14 +86,16 @@ bool VPRecipeBase::mayReadFromMemory() const { ->mayReadFromMemory(); case VPBranchOnMaskSC: case VPScalarIVStepsSC: + case VPPredInstPHISC: return false; - case VPWidenIntOrFpInductionSC: + case VPBlendSC: + case VPReductionSC: case VPWidenCanonicalIVSC: + case VPWidenCastSC: + case VPWidenGEPSC: + case VPWidenIntOrFpInductionSC: case VPWidenPHISC: - case VPBlendSC: case VPWidenSC: - case VPWidenGEPSC: - case VPReductionSC: case VPWidenSelectSC: { const Instruction *I = dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); @@ -108,16 +114,20 @@ bool VPRecipeBase::mayHaveSideEffects() const { case VPDerivedIVSC: case VPPredInstPHISC: return false; - case VPWidenIntOrFpInductionSC: - case VPWidenPointerInductionSC: + case VPWidenCallSC: + return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) + ->mayHaveSideEffects(); + case VPBlendSC: + case VPReductionSC: + case VPScalarIVStepsSC: case VPWidenCanonicalIVSC: + case VPWidenCastSC: + case VPWidenGEPSC: + case VPWidenIntOrFpInductionSC: case VPWidenPHISC: - case VPBlendSC: + case VPWidenPointerInductionSC: case VPWidenSC: - case VPWidenGEPSC: - case VPReductionSC: - case VPWidenSelectSC: - case VPScalarIVStepsSC: { + case VPWidenSelectSC: { const Instruction *I = dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); (void)I; @@ -125,6 +135,13 @@ bool VPRecipeBase::mayHaveSideEffects() const { "underlying instruction has side-effects"); return false; } + case VPWidenMemoryInstructionSC: + assert(cast<VPWidenMemoryInstructionRecipe>(this) + ->getIngredient() + .mayHaveSideEffects() == mayWriteToMemory() && + "mayHaveSideffects result for ingredient differs from this " + "implementation"); + return mayWriteToMemory(); case VPReplicateSC: { auto *R = cast<VPReplicateRecipe>(this); return R->getUnderlyingInstr()->mayHaveSideEffects(); @@ -143,6 +160,16 @@ void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { State.Builder.GetInsertBlock()); } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const { + O << "Live-out "; + getPhi()->printAsOperand(O); + O << " = "; + getOperand(0)->printAsOperand(O, SlotTracker); + O << "\n"; +} +#endif + void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { assert(!Parent && "Recipe already in some VPBasicBlock"); assert(InsertPos->getParent() && @@ -189,55 +216,44 @@ void VPRecipeBase::moveBefore(VPBasicBlock &BB, insertBefore(BB, I); } -void VPInstruction::generateInstruction(VPTransformState &State, - unsigned Part) { +Value *VPInstruction::generateInstruction(VPTransformState &State, + unsigned Part) { IRBuilderBase &Builder = State.Builder; Builder.SetCurrentDebugLocation(DL); if (Instruction::isBinaryOp(getOpcode())) { Value *A = State.get(getOperand(0), Part); Value *B = State.get(getOperand(1), Part); - Value *V = - Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name); - State.set(this, V, Part); - return; + return Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name); } switch (getOpcode()) { case VPInstruction::Not: { Value *A = State.get(getOperand(0), Part); - Value *V = Builder.CreateNot(A, Name); - State.set(this, V, Part); - break; + return Builder.CreateNot(A, Name); } case VPInstruction::ICmpULE: { Value *IV = State.get(getOperand(0), Part); Value *TC = State.get(getOperand(1), Part); - Value *V = Builder.CreateICmpULE(IV, TC, Name); - State.set(this, V, Part); - break; + return Builder.CreateICmpULE(IV, TC, Name); } case Instruction::Select: { Value *Cond = State.get(getOperand(0), Part); Value *Op1 = State.get(getOperand(1), Part); Value *Op2 = State.get(getOperand(2), Part); - Value *V = Builder.CreateSelect(Cond, Op1, Op2, Name); - State.set(this, V, Part); - break; + return Builder.CreateSelect(Cond, Op1, Op2, Name); } case VPInstruction::ActiveLaneMask: { // Get first lane of vector induction variable. Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); // Get the original loop tripcount. - Value *ScalarTC = State.get(getOperand(1), Part); + Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0)); auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); auto *PredTy = VectorType::get(Int1Ty, State.VF); - Instruction *Call = Builder.CreateIntrinsic( - Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()}, - {VIVElem0, ScalarTC}, nullptr, Name); - State.set(this, Call, Part); - break; + return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask, + {PredTy, ScalarTC->getType()}, + {VIVElem0, ScalarTC}, nullptr, Name); } case VPInstruction::FirstOrderRecurrenceSplice: { // Generate code to combine the previous and current values in vector v3. @@ -255,18 +271,22 @@ void VPInstruction::generateInstruction(VPTransformState &State, // For the first part, use the recurrence phi (v1), otherwise v2. auto *V1 = State.get(getOperand(0), 0); Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); - if (!PartMinus1->getType()->isVectorTy()) { - State.set(this, PartMinus1, Part); - } else { - Value *V2 = State.get(getOperand(1), Part); - State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1, Name), - Part); - } - break; + if (!PartMinus1->getType()->isVectorTy()) + return PartMinus1; + Value *V2 = State.get(getOperand(1), Part); + return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name); + } + case VPInstruction::CalculateTripCountMinusVF: { + Value *ScalarTC = State.get(getOperand(0), {0, 0}); + Value *Step = + createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF); + Value *Sub = Builder.CreateSub(ScalarTC, Step); + Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step); + Value *Zero = ConstantInt::get(ScalarTC->getType(), 0); + return Builder.CreateSelect(Cmp, Sub, Zero); } case VPInstruction::CanonicalIVIncrement: case VPInstruction::CanonicalIVIncrementNUW: { - Value *Next = nullptr; if (Part == 0) { bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW; auto *Phi = State.get(getOperand(0), 0); @@ -274,34 +294,26 @@ void VPInstruction::generateInstruction(VPTransformState &State, // elements) times the unroll factor (num of SIMD instructions). Value *Step = createStepForVF(Builder, Phi->getType(), State.VF, State.UF); - Next = Builder.CreateAdd(Phi, Step, Name, IsNUW, false); - } else { - Next = State.get(this, 0); + return Builder.CreateAdd(Phi, Step, Name, IsNUW, false); } - - State.set(this, Next, Part); - break; + return State.get(this, 0); } case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::CanonicalIVIncrementForPartNUW: { bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementForPartNUW; auto *IV = State.get(getOperand(0), VPIteration(0, 0)); - if (Part == 0) { - State.set(this, IV, Part); - break; - } + if (Part == 0) + return IV; // The canonical IV is incremented by the vectorization factor (num of SIMD // elements) times the unroll part. Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part); - Value *Next = Builder.CreateAdd(IV, Step, Name, IsNUW, false); - State.set(this, Next, Part); - break; + return Builder.CreateAdd(IV, Step, Name, IsNUW, false); } case VPInstruction::BranchOnCond: { if (Part != 0) - break; + return nullptr; Value *Cond = State.get(getOperand(0), VPIteration(Part, 0)); VPRegionBlock *ParentRegion = getParent()->getParent(); @@ -318,11 +330,11 @@ void VPInstruction::generateInstruction(VPTransformState &State, CondBr->setSuccessor(0, nullptr); Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); - break; + return CondBr; } case VPInstruction::BranchOnCount: { if (Part != 0) - break; + return nullptr; // First create the compare. Value *IV = State.get(getOperand(0), Part); Value *TC = State.get(getOperand(1), Part); @@ -342,7 +354,7 @@ void VPInstruction::generateInstruction(VPTransformState &State, State.CFG.VPBB2IRBB[Header]); CondBr->setSuccessor(0, nullptr); Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); - break; + return CondBr; } default: llvm_unreachable("Unsupported opcode for instruction"); @@ -353,8 +365,13 @@ void VPInstruction::execute(VPTransformState &State) { assert(!State.Instance && "VPInstruction executing an Instance"); IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); State.Builder.setFastMathFlags(FMF); - for (unsigned Part = 0; Part < State.UF; ++Part) - generateInstruction(State, Part); + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *GeneratedValue = generateInstruction(State, Part); + if (!hasResult()) + continue; + assert(GeneratedValue && "generateInstruction must produce a value"); + State.set(this, GeneratedValue, Part); + } } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -400,6 +417,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::BranchOnCond: O << "branch-on-cond"; break; + case VPInstruction::CalculateTripCountMinusVF: + O << "TC > VF ? TC - VF : 0"; + break; case VPInstruction::CanonicalIVIncrementForPart: O << "VF * Part + "; break; @@ -438,18 +458,19 @@ void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) { } void VPWidenCallRecipe::execute(VPTransformState &State) { + assert(State.VF.isVector() && "not widening"); auto &CI = *cast<CallInst>(getUnderlyingInstr()); assert(!isa<DbgInfoIntrinsic>(CI) && "DbgInfoIntrinsic should have been dropped during VPlan construction"); State.setDebugLocFromInst(&CI); - SmallVector<Type *, 4> Tys; - for (Value *ArgOperand : CI.args()) - Tys.push_back( - ToVectorTy(ArgOperand->getType(), State.VF.getKnownMinValue())); - for (unsigned Part = 0; Part < State.UF; ++Part) { - SmallVector<Type *, 2> TysForDecl = {CI.getType()}; + SmallVector<Type *, 2> TysForDecl; + // Add return type if intrinsic is overloaded on it. + if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) { + TysForDecl.push_back( + VectorType::get(CI.getType()->getScalarType(), State.VF)); + } SmallVector<Value *, 4> Args; for (const auto &I : enumerate(operands())) { // Some intrinsics have a scalar argument - don't replace it with a @@ -468,21 +489,16 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { Function *VectorF; if (VectorIntrinsicID != Intrinsic::not_intrinsic) { // Use vector version of the intrinsic. - if (State.VF.isVector()) - TysForDecl[0] = - VectorType::get(CI.getType()->getScalarType(), State.VF); Module *M = State.Builder.GetInsertBlock()->getModule(); VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl); assert(VectorF && "Can't retrieve vector intrinsic."); } else { - // Use vector version of the function call. - const VFShape Shape = VFShape::get(CI, State.VF, false /*HasGlobalPred*/); #ifndef NDEBUG - assert(VFDatabase(CI).getVectorizedFunction(Shape) != nullptr && - "Can't create vector function."); + assert(Variant != nullptr && "Can't create vector function."); #endif - VectorF = VFDatabase(CI).getVectorizedFunction(Shape); + VectorF = Variant; } + SmallVector<OperandBundleDef, 1> OpBundles; CI.getOperandBundlesAsDefs(OpBundles); CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles); @@ -514,8 +530,12 @@ void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, if (VectorIntrinsicID) O << " (using vector intrinsic)"; - else - O << " (using library function)"; + else { + O << " (using library function"; + if (Variant->hasName()) + O << ": " << Variant->getName(); + O << ")"; + } } void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, @@ -528,7 +548,7 @@ void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, getOperand(1)->printAsOperand(O, SlotTracker); O << ", "; getOperand(2)->printAsOperand(O, SlotTracker); - O << (InvariantCond ? " (condition is loop invariant)" : ""); + O << (isInvariantCond() ? " (condition is loop invariant)" : ""); } #endif @@ -541,10 +561,10 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) { // We have to take the 'vectorized' value and pick the first lane. // Instcombine will make this a no-op. auto *InvarCond = - InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr; + isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr; for (unsigned Part = 0; Part < State.UF; ++Part) { - Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part); + Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part); Value *Op0 = State.get(getOperand(1), Part); Value *Op1 = State.get(getOperand(2), Part); Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1); @@ -553,6 +573,33 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) { } } +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const { + switch (OpType) { + case OperationType::PossiblyExactOp: + if (ExactFlags.IsExact) + O << " exact"; + break; + case OperationType::OverflowingBinOp: + if (WrapFlags.HasNUW) + O << " nuw"; + if (WrapFlags.HasNSW) + O << " nsw"; + break; + case OperationType::FPMathOp: + getFastMathFlags().print(O); + break; + case OperationType::GEPOp: + if (GEPFlags.IsInBounds) + O << " inbounds"; + break; + case OperationType::Other: + break; + } + O << " "; +} +#endif + void VPWidenRecipe::execute(VPTransformState &State) { auto &I = *cast<Instruction>(getUnderlyingValue()); auto &Builder = State.Builder; @@ -592,17 +639,8 @@ void VPWidenRecipe::execute(VPTransformState &State) { Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops); - if (auto *VecOp = dyn_cast<Instruction>(V)) { - VecOp->copyIRFlags(&I); - - // If the instruction is vectorized and was in a basic block that needed - // predication, we can't propagate poison-generating flags (nuw/nsw, - // exact, etc.). The control flow has been linearized and the - // instruction is no longer guarded by the predicate, which could make - // the flag properties to no longer hold. - if (State.MayGeneratePoisonRecipes.contains(this)) - VecOp->dropPoisonGeneratingFlags(); - } + if (auto *VecOp = dyn_cast<Instruction>(V)) + setFlags(VecOp); // Use this vector value for all users of the original instruction. State.set(this, V, Part); @@ -646,35 +684,6 @@ void VPWidenRecipe::execute(VPTransformState &State) { break; } - - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::FPExt: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::SIToFP: - case Instruction::UIToFP: - case Instruction::Trunc: - case Instruction::FPTrunc: - case Instruction::BitCast: { - auto *CI = cast<CastInst>(&I); - State.setDebugLocFromInst(CI); - - /// Vectorize casts. - Type *DestTy = (State.VF.isScalar()) - ? CI->getType() - : VectorType::get(CI->getType(), State.VF); - - for (unsigned Part = 0; Part < State.UF; ++Part) { - Value *A = State.get(getOperand(0), Part); - Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy); - State.set(this, Cast, Part); - State.addMetadata(Cast, &I); - } - break; - } default: // This instruction is not vectorized by simple widening. LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I); @@ -687,11 +696,40 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, O << Indent << "WIDEN "; printAsOperand(O, SlotTracker); const Instruction *UI = getUnderlyingInstr(); - O << " = " << UI->getOpcodeName() << " "; + O << " = " << UI->getOpcodeName(); + printFlags(O); if (auto *Cmp = dyn_cast<CmpInst>(UI)) - O << CmpInst::getPredicateName(Cmp->getPredicate()) << " "; + O << Cmp->getPredicate() << " "; printOperands(O, SlotTracker); } +#endif + +void VPWidenCastRecipe::execute(VPTransformState &State) { + auto *I = cast_or_null<Instruction>(getUnderlyingValue()); + if (I) + State.setDebugLocFromInst(I); + auto &Builder = State.Builder; + /// Vectorize casts. + assert(State.VF.isVector() && "Not vectorizing?"); + Type *DestTy = VectorType::get(getResultType(), State.VF); + + for (unsigned Part = 0; Part < State.UF; ++Part) { + Value *A = State.get(getOperand(0), Part); + Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy); + State.set(this, Cast, Part); + State.addMetadata(Cast, I); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "WIDEN-CAST "; + printAsOperand(O, SlotTracker); + O << " = " << Instruction::getOpcodeName(Opcode) << " "; + printOperands(O, SlotTracker); + O << " to " << *getResultType(); +} void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { @@ -710,8 +748,13 @@ void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, #endif bool VPWidenIntOrFpInductionRecipe::isCanonical() const { + // The step may be defined by a recipe in the preheader (e.g. if it requires + // SCEV expansion), but for the canonical induction the step is required to be + // 1, which is represented as live-in. + if (getStepValue()->getDefiningRecipe()) + return false; + auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue()); auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); - auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep()); return StartC && StartC->isZero() && StepC && StepC->isOne(); } @@ -743,6 +786,7 @@ void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, #endif void VPWidenGEPRecipe::execute(VPTransformState &State) { + assert(State.VF.isVector() && "not widening"); auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr()); // Construct a vector GEP by widening the operands of the scalar GEP as // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP @@ -750,7 +794,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { // is vector-typed. Thus, to keep the representation compact, we only use // vector-typed operands for loop-varying values. - if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) { + if (areAllOperandsInvariant()) { // If we are vectorizing, but the GEP has only loop-invariant operands, // the GEP we build (by only using vector-typed operands for // loop-varying values) would be a scalar pointer. Thus, to ensure we @@ -763,9 +807,15 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { // required. We would add the scalarization decision to // collectLoopScalars() and teach getVectorValue() to broadcast // the lane-zero scalar value. - auto *Clone = State.Builder.Insert(GEP->clone()); + SmallVector<Value *> Ops; + for (unsigned I = 0, E = getNumOperands(); I != E; I++) + Ops.push_back(State.get(getOperand(I), VPIteration(0, 0))); + + auto *NewGEP = + State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0], + ArrayRef(Ops).drop_front(), "", isInBounds()); for (unsigned Part = 0; Part < State.UF; ++Part) { - Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone); + Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP); State.set(this, EntryPart, Part); State.addMetadata(EntryPart, GEP); } @@ -780,7 +830,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { for (unsigned Part = 0; Part < State.UF; ++Part) { // The pointer operand of the new GEP. If it's loop-invariant, we // won't broadcast it. - auto *Ptr = IsPtrLoopInvariant + auto *Ptr = isPointerLoopInvariant() ? State.get(getOperand(0), VPIteration(0, 0)) : State.get(getOperand(0), Part); @@ -789,24 +839,16 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { SmallVector<Value *, 4> Indices; for (unsigned I = 1, E = getNumOperands(); I < E; I++) { VPValue *Operand = getOperand(I); - if (IsIndexLoopInvariant[I - 1]) + if (isIndexLoopInvariant(I - 1)) Indices.push_back(State.get(Operand, VPIteration(0, 0))); else Indices.push_back(State.get(Operand, Part)); } - // If the GEP instruction is vectorized and was in a basic block that - // needed predication, we can't propagate the poison-generating 'inbounds' - // flag. The control flow has been linearized and the GEP is no longer - // guarded by the predicate, which could make the 'inbounds' properties to - // no longer hold. - bool IsInBounds = - GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0; - // Create the new GEP. Note that this GEP may be a scalar if VF == 1, // but it should be a vector, otherwise. auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr, - Indices, "", IsInBounds); + Indices, "", isInBounds()); assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && "NewGEP is not a pointer vector"); State.set(this, NewGEP, Part); @@ -819,14 +861,14 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) { void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "WIDEN-GEP "; - O << (IsPtrLoopInvariant ? "Inv" : "Var"); - size_t IndicesNumber = IsIndexLoopInvariant.size(); - for (size_t I = 0; I < IndicesNumber; ++I) - O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]"; + O << (isPointerLoopInvariant() ? "Inv" : "Var"); + for (size_t I = 0; I < getNumOperands() - 1; ++I) + O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]"; O << " "; printAsOperand(O, SlotTracker); - O << " = getelementptr "; + O << " = getelementptr"; + printFlags(O); printOperands(O, SlotTracker); } #endif @@ -911,7 +953,21 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, O << " (with final reduction value stored in invariant address sank " "outside of loop)"; } +#endif + +bool VPReplicateRecipe::shouldPack() const { + // Find if the recipe is used by a widened recipe via an intervening + // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector. + return any_of(users(), [](const VPUser *U) { + if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U)) + return any_of(PredR->users(), [PredR](const VPUser *U) { + return !U->usesScalars(PredR); + }); + return false; + }); +} +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); @@ -921,18 +977,21 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, O << " = "; } if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) { - O << "call @" << CB->getCalledFunction()->getName() << "("; + O << "call"; + printFlags(O); + O << "@" << CB->getCalledFunction()->getName() << "("; interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)), O, [&O, &SlotTracker](VPValue *Op) { Op->printAsOperand(O, SlotTracker); }); O << ")"; } else { - O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " "; + O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()); + printFlags(O); printOperands(O, SlotTracker); } - if (AlsoPack) + if (shouldPack()) O << " (S->V)"; } #endif @@ -1053,20 +1112,22 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, } #endif -bool VPCanonicalIVPHIRecipe::isCanonical(const InductionDescriptor &ID, - Type *Ty) const { - if (Ty != getScalarType()) +bool VPCanonicalIVPHIRecipe::isCanonical( + InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step, + Type *Ty) const { + // The types must match and it must be an integer induction. + if (Ty != getScalarType() || Kind != InductionDescriptor::IK_IntInduction) return false; - // The start value of ID must match the start value of this canonical - // induction. - if (getStartValue()->getLiveInIRValue() != ID.getStartValue()) + // Start must match the start value of this canonical induction. + if (Start != getStartValue()) return false; - ConstantInt *Step = ID.getConstIntStepValue(); - // ID must also be incremented by one. IK_IntInduction always increment the - // induction by Step, but the binary op may not be set. - return ID.getKind() == InductionDescriptor::IK_IntInduction && Step && - Step->isOne(); + // If the step is defined by a recipe, it is not a ConstantInt. + if (Step->getDefiningRecipe()) + return false; + + ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue()); + return StepC && StepC->isOne(); } bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) { @@ -1092,9 +1153,11 @@ void VPExpandSCEVRecipe::execute(VPTransformState &State) { Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), &*State.Builder.GetInsertPoint()); - + assert(!State.ExpandedSCEVs.contains(Expr) && + "Same SCEV expanded multiple times"); + State.ExpandedSCEVs[Expr] = Res; for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) - State.set(this, Res, Part); + State.set(this, Res, {Part, 0}); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
