diff options
Diffstat (limited to 'lib')
374 files changed, 11756 insertions, 4184 deletions
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index b52a1d7b24d62..e682a644ef2c1 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -1006,7 +1006,7 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1, // Because they cannot partially overlap and because fields in an array // cannot overlap, if we can prove the final indices are different between // GEP1 and GEP2, we can conclude GEP1 and GEP2 don't alias. - + // If the last indices are constants, we've already checked they don't // equal each other so we can exit early. if (C1 && C2) diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 23d5a887c34af..a329e5ad48c94 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -538,7 +538,7 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB, // InstCombine canonicalizes X <= 0 into X < 1. // X <= 0 -> Unlikely isProb = false; - } else if (CV->isAllOnesValue()) { + } else if (CV->isMinusOne()) { switch (CI->getPredicate()) { case CmpInst::ICMP_EQ: // X == -1 -> Unlikely diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp index 9d4521221f477..3ddefc6520a78 100644 --- a/lib/Analysis/CGSCCPassManager.cpp +++ b/lib/Analysis/CGSCCPassManager.cpp @@ -196,18 +196,117 @@ FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC &C, bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate( LazyCallGraph::SCC &C, const PreservedAnalyses &PA, CGSCCAnalysisManager::Invalidator &Inv) { - for (LazyCallGraph::Node &N : C) - FAM->invalidate(N.getFunction(), PA); + // If literally everything is preserved, we're done. + if (PA.areAllPreserved()) + return false; // This is still a valid proxy. + + // If this proxy isn't marked as preserved, then even if the result remains + // valid, the key itself may no longer be valid, so we clear everything. + // + // Note that in order to preserve this proxy, a module pass must ensure that + // the FAM has been completely updated to handle the deletion of functions. + // Specifically, any FAM-cached results for those functions need to have been + // forcibly cleared. When preserved, this proxy will only invalidate results + // cached on functions *still in the module* at the end of the module pass. + auto PAC = PA.getChecker<FunctionAnalysisManagerCGSCCProxy>(); + if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<LazyCallGraph::SCC>>()) { + for (LazyCallGraph::Node &N : C) + FAM->clear(N.getFunction()); + + return true; + } + + // Directly check if the relevant set is preserved. + bool AreFunctionAnalysesPreserved = + PA.allAnalysesInSetPreserved<AllAnalysesOn<Function>>(); + + // Now walk all the functions to see if any inner analysis invalidation is + // necessary. + for (LazyCallGraph::Node &N : C) { + Function &F = N.getFunction(); + Optional<PreservedAnalyses> FunctionPA; + + // Check to see whether the preserved set needs to be pruned based on + // SCC-level analysis invalidation that triggers deferred invalidation + // registered with the outer analysis manager proxy for this function. + if (auto *OuterProxy = + FAM->getCachedResult<CGSCCAnalysisManagerFunctionProxy>(F)) + for (const auto &OuterInvalidationPair : + OuterProxy->getOuterInvalidations()) { + AnalysisKey *OuterAnalysisID = OuterInvalidationPair.first; + const auto &InnerAnalysisIDs = OuterInvalidationPair.second; + if (Inv.invalidate(OuterAnalysisID, C, PA)) { + if (!FunctionPA) + FunctionPA = PA; + for (AnalysisKey *InnerAnalysisID : InnerAnalysisIDs) + FunctionPA->abandon(InnerAnalysisID); + } + } + + // Check if we needed a custom PA set, and if so we'll need to run the + // inner invalidation. + if (FunctionPA) { + FAM->invalidate(F, *FunctionPA); + continue; + } - // This proxy doesn't need to handle invalidation itself. Instead, the - // module-level CGSCC proxy handles it above by ensuring that if the - // module-level FAM proxy becomes invalid the entire SCC layer, which - // includes this proxy, is cleared. + // Otherwise we only need to do invalidation if the original PA set didn't + // preserve all function analyses. + if (!AreFunctionAnalysesPreserved) + FAM->invalidate(F, PA); + } + + // Return false to indicate that this result is still a valid proxy. return false; } } // End llvm namespace +/// When a new SCC is created for the graph and there might be function +/// analysis results cached for the functions now in that SCC two forms of +/// updates are required. +/// +/// First, a proxy from the SCC to the FunctionAnalysisManager needs to be +/// created so that any subsequent invalidation events to the SCC are +/// propagated to the function analysis results cached for functions within it. +/// +/// Second, if any of the functions within the SCC have analysis results with +/// outer analysis dependencies, then those dependencies would point to the +/// *wrong* SCC's analysis result. We forcibly invalidate the necessary +/// function analyses so that they don't retain stale handles. +static void updateNewSCCFunctionAnalyses(LazyCallGraph::SCC &C, + LazyCallGraph &G, + CGSCCAnalysisManager &AM) { + // Get the relevant function analysis manager. + auto &FAM = + AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, G).getManager(); + + // Now walk the functions in this SCC and invalidate any function analysis + // results that might have outer dependencies on an SCC analysis. + for (LazyCallGraph::Node &N : C) { + Function &F = N.getFunction(); + + auto *OuterProxy = + FAM.getCachedResult<CGSCCAnalysisManagerFunctionProxy>(F); + if (!OuterProxy) + // No outer analyses were queried, nothing to do. + continue; + + // Forcibly abandon all the inner analyses with dependencies, but + // invalidate nothing else. + auto PA = PreservedAnalyses::all(); + for (const auto &OuterInvalidationPair : + OuterProxy->getOuterInvalidations()) { + const auto &InnerAnalysisIDs = OuterInvalidationPair.second; + for (AnalysisKey *InnerAnalysisID : InnerAnalysisIDs) + PA.abandon(InnerAnalysisID); + } + + // Now invalidate anything we found. + FAM.invalidate(F, PA); + } +} + namespace { /// Helper function to update both the \c CGSCCAnalysisManager \p AM and the \c /// CGSCCPassManager's \c CGSCCUpdateResult \p UR based on a range of newly @@ -236,7 +335,6 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, dbgs() << "Enqueuing the existing SCC in the worklist:" << *C << "\n"; SCC *OldC = C; - (void)OldC; // Update the current SCC. Note that if we have new SCCs, this must actually // change the SCC. @@ -245,6 +343,26 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, C = &*NewSCCRange.begin(); assert(G.lookupSCC(N) == C && "Failed to update current SCC!"); + // If we had a cached FAM proxy originally, we will want to create more of + // them for each SCC that was split off. + bool NeedFAMProxy = + AM.getCachedResult<FunctionAnalysisManagerCGSCCProxy>(*OldC) != nullptr; + + // We need to propagate an invalidation call to all but the newly current SCC + // because the outer pass manager won't do that for us after splitting them. + // FIXME: We should accept a PreservedAnalysis from the CG updater so that if + // there are preserved ananalyses we can avoid invalidating them here for + // split-off SCCs. + // We know however that this will preserve any FAM proxy so go ahead and mark + // that. + PreservedAnalyses PA; + PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); + AM.invalidate(*OldC, PA); + + // Ensure the now-current SCC's function analyses are updated. + if (NeedFAMProxy) + updateNewSCCFunctionAnalyses(*C, G, AM); + for (SCC &NewC : reverse(make_range(std::next(NewSCCRange.begin()), NewSCCRange.end()))) { assert(C != &NewC && "No need to re-visit the current SCC!"); @@ -252,6 +370,14 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, UR.CWorklist.insert(&NewC); if (DebugLogging) dbgs() << "Enqueuing a newly formed SCC:" << NewC << "\n"; + + // Ensure new SCCs' function analyses are updated. + if (NeedFAMProxy) + updateNewSCCFunctionAnalyses(NewC, G, AM); + + // Also propagate a normal invalidation to the new SCC as only the current + // will get one from the pass manager infrastructure. + AM.invalidate(NewC, PA); } return C; } @@ -349,14 +475,6 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( // For separate SCCs this is trivial. RC->switchTrivialInternalEdgeToRef(N, TargetN); } else { - // Otherwise we may end up re-structuring the call graph. First, - // invalidate any SCC analyses. We have to do this before we split - // functions into new SCCs and lose track of where their analyses are - // cached. - // FIXME: We should accept a more precise preserved set here. For - // example, it might be possible to preserve some function analyses - // even as the SCC structure is changed. - AM.invalidate(*C, PreservedAnalyses::none()); // Now update the call graph. C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G, N, C, AM, UR, DebugLogging); @@ -424,13 +542,6 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( continue; } - // Otherwise we may end up re-structuring the call graph. First, invalidate - // any SCC analyses. We have to do this before we split functions into new - // SCCs and lose track of where their analyses are cached. - // FIXME: We should accept a more precise preserved set here. For example, - // it might be possible to preserve some function analyses even as the SCC - // structure is changed. - AM.invalidate(*C, PreservedAnalyses::none()); // Now update the call graph. C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, *RefTarget), G, N, C, AM, UR, DebugLogging); @@ -459,25 +570,48 @@ LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( // Otherwise we are switching an internal ref edge to a call edge. This // may merge away some SCCs, and we add those to the UpdateResult. We also // need to make sure to update the worklist in the event SCCs have moved - // before the current one in the post-order sequence. + // before the current one in the post-order sequence + bool HasFunctionAnalysisProxy = false; auto InitialSCCIndex = RC->find(*C) - RC->begin(); - auto InvalidatedSCCs = RC->switchInternalEdgeToCall(N, *CallTarget); - if (!InvalidatedSCCs.empty()) { + bool FormedCycle = RC->switchInternalEdgeToCall( + N, *CallTarget, [&](ArrayRef<SCC *> MergedSCCs) { + for (SCC *MergedC : MergedSCCs) { + assert(MergedC != &TargetC && "Cannot merge away the target SCC!"); + + HasFunctionAnalysisProxy |= + AM.getCachedResult<FunctionAnalysisManagerCGSCCProxy>( + *MergedC) != nullptr; + + // Mark that this SCC will no longer be valid. + UR.InvalidatedSCCs.insert(MergedC); + + // FIXME: We should really do a 'clear' here to forcibly release + // memory, but we don't have a good way of doing that and + // preserving the function analyses. + auto PA = PreservedAnalyses::allInSet<AllAnalysesOn<Function>>(); + PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); + AM.invalidate(*MergedC, PA); + } + }); + + // If we formed a cycle by creating this call, we need to update more data + // structures. + if (FormedCycle) { C = &TargetC; assert(G.lookupSCC(N) == C && "Failed to update current SCC!"); - // Any analyses cached for this SCC are no longer precise as the shape - // has changed by introducing this cycle. - AM.invalidate(*C, PreservedAnalyses::none()); - - for (SCC *InvalidatedC : InvalidatedSCCs) { - assert(InvalidatedC != C && "Cannot invalidate the current SCC!"); - UR.InvalidatedSCCs.insert(InvalidatedC); + // If one of the invalidated SCCs had a cached proxy to a function + // analysis manager, we need to create a proxy in the new current SCC as + // the invaliadted SCCs had their functions moved. + if (HasFunctionAnalysisProxy) + AM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, G); - // Also clear any cached analyses for the SCCs that are dead. This - // isn't really necessary for correctness but can release memory. - AM.clear(*InvalidatedC); - } + // Any analyses cached for this SCC are no longer precise as the shape + // has changed by introducing this cycle. However, we have taken care to + // update the proxies so it remains valide. + auto PA = PreservedAnalyses::allInSet<AllAnalysesOn<Function>>(); + PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); + AM.invalidate(*C, PA); } auto NewSCCIndex = RC->find(*C) - RC->begin(); if (InitialSCCIndex < NewSCCIndex) { diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index 2093f0fdec123..3b0026ba10e90 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -94,8 +94,8 @@ namespace { // guarantee that 'I' never reaches 'BeforeHere' through a back-edge or // by its successors, i.e, prune if: // - // (1) BB is an entry block or have no sucessors. - // (2) There's no path coming back through BB sucessors. + // (1) BB is an entry block or have no successors. + // (2) There's no path coming back through BB successors. if (BB == &BB->getParent()->getEntryBlock() || !BB->getTerminator()->getNumSuccessors()) return true; diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp index 926b28d6094a5..9c53f9140ca33 100644 --- a/lib/Analysis/DemandedBits.cpp +++ b/lib/Analysis/DemandedBits.cpp @@ -143,9 +143,8 @@ void DemandedBits::determineLiveOperandBits( break; case Instruction::Shl: if (OperandNo == 0) - if (ConstantInt *CI = - dyn_cast<ConstantInt>(UserI->getOperand(1))) { - uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); + if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) { + uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1); AB = AOut.lshr(ShiftAmt); // If the shift is nuw/nsw, then the high bits are not dead @@ -159,9 +158,8 @@ void DemandedBits::determineLiveOperandBits( break; case Instruction::LShr: if (OperandNo == 0) - if (ConstantInt *CI = - dyn_cast<ConstantInt>(UserI->getOperand(1))) { - uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); + if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) { + uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1); AB = AOut.shl(ShiftAmt); // If the shift is exact, then the low bits are not dead @@ -172,9 +170,8 @@ void DemandedBits::determineLiveOperandBits( break; case Instruction::AShr: if (OperandNo == 0) - if (ConstantInt *CI = - dyn_cast<ConstantInt>(UserI->getOperand(1))) { - uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); + if (auto *ShiftAmtC = dyn_cast<ConstantInt>(UserI->getOperand(1))) { + uint64_t ShiftAmt = ShiftAmtC->getLimitedValue(BitWidth - 1); AB = AOut.shl(ShiftAmt); // Because the high input bit is replicated into the // high-order bits of the result, if we need any of those diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index e4d58bf1b4eb1..34eccc07f2655 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -3342,7 +3342,8 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, UsefulGEP = isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())) && - (SrcGEP->getNumOperands() == DstGEP->getNumOperands()); + (SrcGEP->getNumOperands() == DstGEP->getNumOperands()) && + isKnownPredicate(CmpInst::ICMP_EQ, SrcPtrSCEV, DstPtrSCEV); } unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; SmallVector<Subscript, 4> Pair(Pairs); @@ -3371,7 +3372,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, if (Delinearize && CommonLevels > 1) { if (tryDelinearize(Src, Dst, Pair)) { - DEBUG(dbgs() << " delinerized GEP\n"); + DEBUG(dbgs() << " delinearized GEP\n"); Pairs = Pair.size(); } } @@ -3796,7 +3797,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, if (Delinearize && CommonLevels > 1) { if (tryDelinearize(Src, Dst, Pair)) { - DEBUG(dbgs() << " delinerized GEP\n"); + DEBUG(dbgs() << " delinearized GEP\n"); Pairs = Pair.size(); } } diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index d9e32a3c417e0..f6632020b8fc7 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -560,7 +560,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return Y; /// i1 add -> xor. - if (MaxRecurse && Op0->getType()->getScalarType()->isIntegerTy(1)) + if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1)) if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) return V; @@ -598,7 +598,7 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// folding. static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V, bool AllowNonInbounds = false) { - assert(V->getType()->getScalarType()->isPointerTy()); + assert(V->getType()->isPtrOrPtrVectorTy()); Type *IntPtrTy = DL.getIntPtrType(V->getType())->getScalarType(); APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth()); @@ -627,8 +627,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V, } break; } - assert(V->getType()->getScalarType()->isPointerTy() && - "Unexpected operand type!"); + assert(V->getType()->isPtrOrPtrVectorTy() && "Unexpected operand type!"); } while (Visited.insert(V).second); Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset); @@ -771,7 +770,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return ConstantExpr::getIntegerCast(Result, Op0->getType(), true); // i1 sub -> xor. - if (MaxRecurse && Op0->getType()->getScalarType()->isIntegerTy(1)) + if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1)) if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) return V; @@ -902,7 +901,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, return X; // i1 mul -> and. - if (MaxRecurse && Op0->getType()->getScalarType()->isIntegerTy(1)) + if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1)) if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1)) return V; @@ -998,7 +997,7 @@ static Value *simplifyDivRem(Value *Op0, Value *Op1, bool IsDiv) { // X % 1 -> 0 // If this is a boolean op (single-bit element type), we can't have // division-by-zero or remainder-by-zero, so assume the divisor is 1. - if (match(Op1, m_One()) || Ty->getScalarType()->isIntegerTy(1)) + if (match(Op1, m_One()) || Ty->isIntOrIntVectorTy(1)) return IsDiv ? Op0 : Constant::getNullValue(Ty); return nullptr; @@ -2251,7 +2250,7 @@ static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const SimplifyQuery &Q) { Type *ITy = GetCompareTy(LHS); // The return type. Type *OpTy = LHS->getType(); // The operand type. - if (!OpTy->getScalarType()->isIntegerTy(1)) + if (!OpTy->isIntOrIntVectorTy(1)) return nullptr; // A boolean compared to true/false can be simplified in 14 out of the 20 diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp index b6a9436cc1ec3..a4c3e43b4b0c2 100644 --- a/lib/Analysis/LazyCallGraph.cpp +++ b/lib/Analysis/LazyCallGraph.cpp @@ -456,8 +456,10 @@ updatePostorderSequenceForEdgeInsertion( return make_range(SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx); } -SmallVector<LazyCallGraph::SCC *, 1> -LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { +bool +LazyCallGraph::RefSCC::switchInternalEdgeToCall( + Node &SourceN, Node &TargetN, + function_ref<void(ArrayRef<SCC *> MergeSCCs)> MergeCB) { assert(!(*SourceN)[TargetN].isCall() && "Must start with a ref edge!"); SmallVector<SCC *, 1> DeletedSCCs; @@ -475,7 +477,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { // we've just added more connectivity. if (&SourceSCC == &TargetSCC) { SourceN->setEdgeKind(TargetN, Edge::Call); - return DeletedSCCs; + return false; // No new cycle. } // At this point we leverage the postorder list of SCCs to detect when the @@ -488,7 +490,7 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { int TargetIdx = SCCIndices[&TargetSCC]; if (TargetIdx < SourceIdx) { SourceN->setEdgeKind(TargetN, Edge::Call); - return DeletedSCCs; + return false; // No new cycle. } // Compute the SCCs which (transitively) reach the source. @@ -555,12 +557,16 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { SourceSCC, TargetSCC, SCCs, SCCIndices, ComputeSourceConnectedSet, ComputeTargetConnectedSet); + // Run the user's callback on the merged SCCs before we actually merge them. + if (MergeCB) + MergeCB(makeArrayRef(MergeRange.begin(), MergeRange.end())); + // If the merge range is empty, then adding the edge didn't actually form any // new cycles. We're done. if (MergeRange.begin() == MergeRange.end()) { // Now that the SCC structure is finalized, flip the kind to call. SourceN->setEdgeKind(TargetN, Edge::Call); - return DeletedSCCs; + return false; // No new cycle. } #ifndef NDEBUG @@ -596,8 +602,8 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) { // Now that the SCC structure is finalized, flip the kind to call. SourceN->setEdgeKind(TargetN, Edge::Call); - // And we're done! - return DeletedSCCs; + // And we're done, but we did form a new cycle. + return true; } void LazyCallGraph::RefSCC::switchTrivialInternalEdgeToRef(Node &SourceN, diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 9713588537b39..ada600a69b872 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -405,7 +405,7 @@ void Lint::visitMemoryReference(Instruction &I, Assert(!isa<UndefValue>(UnderlyingObject), "Undefined behavior: Undef pointer dereference", &I); Assert(!isa<ConstantInt>(UnderlyingObject) || - !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(), + !cast<ConstantInt>(UnderlyingObject)->isMinusOne(), "Unusual: All-ones pointer dereference", &I); Assert(!isa<ConstantInt>(UnderlyingObject) || !cast<ConstantInt>(UnderlyingObject)->isOne(), diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index ff68810abb827..baf932432a0a4 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -131,13 +131,13 @@ PHINode *Loop::getCanonicalInductionVariable() const { PHINode *PN = cast<PHINode>(I); if (ConstantInt *CI = dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming))) - if (CI->isNullValue()) + if (CI->isZero()) if (Instruction *Inc = dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge))) if (Inc->getOpcode() == Instruction::Add && Inc->getOperand(0) == PN) if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1))) - if (CI->equalsInt(1)) + if (CI->isOne()) return PN; } return nullptr; @@ -460,7 +460,7 @@ protected: void UnloopUpdater::updateBlockParents() { if (Unloop.getNumBlocks()) { // Perform a post order CFG traversal of all blocks within this loop, - // propagating the nearest loop from sucessors to predecessors. + // propagating the nearest loop from successors to predecessors. LoopBlocksTraversal Traversal(DFS, LI); for (BasicBlock *POI : Traversal) { diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index f88d54b21e1e3..7327c07499bed 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -505,6 +505,22 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { return unknown(); } +/// When we're compiling N-bit code, and the user uses parameters that are +/// greater than N bits (e.g. uint64_t on a 32-bit build), we can run into +/// trouble with APInt size issues. This function handles resizing + overflow +/// checks for us. Check and zext or trunc \p I depending on IntTyBits and +/// I's value. +bool ObjectSizeOffsetVisitor::CheckedZextOrTrunc(APInt &I) { + // More bits than we can handle. Checking the bit width isn't necessary, but + // it's faster than checking active bits, and should give `false` in the + // vast majority of cases. + if (I.getBitWidth() > IntTyBits && I.getActiveBits() > IntTyBits) + return false; + if (I.getBitWidth() != IntTyBits) + I = I.zextOrTrunc(IntTyBits); + return true; +} + SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { if (!I.getAllocatedType()->isSized()) return unknown(); @@ -515,8 +531,14 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { Value *ArraySize = I.getArraySize(); if (const ConstantInt *C = dyn_cast<ConstantInt>(ArraySize)) { - Size *= C->getValue().zextOrSelf(IntTyBits); - return std::make_pair(align(Size, I.getAlignment()), Zero); + APInt NumElems = C->getValue(); + if (!CheckedZextOrTrunc(NumElems)) + return unknown(); + + bool Overflow; + Size = Size.umul_ov(NumElems, Overflow); + return Overflow ? unknown() : std::make_pair(align(Size, I.getAlignment()), + Zero); } return unknown(); } @@ -561,21 +583,6 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { if (!Arg) return unknown(); - // When we're compiling N-bit code, and the user uses parameters that are - // greater than N bits (e.g. uint64_t on a 32-bit build), we can run into - // trouble with APInt size issues. This function handles resizing + overflow - // checks for us. - auto CheckedZextOrTrunc = [&](APInt &I) { - // More bits than we can handle. Checking the bit width isn't necessary, but - // it's faster than checking active bits, and should give `false` in the - // vast majority of cases. - if (I.getBitWidth() > IntTyBits && I.getActiveBits() > IntTyBits) - return false; - if (I.getBitWidth() != IntTyBits) - I = I.zextOrTrunc(IntTyBits); - return true; - }; - APInt Size = Arg->getValue(); if (!CheckedZextOrTrunc(Size)) return unknown(); diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp index 095647e1bd20b..e9e354ebb88f9 100644 --- a/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -266,7 +266,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, // sample PGO, to enable the same inlines as the profiled optimized binary. for (auto &I : F.getImportGUIDs()) CallGraphEdges[Index.getOrInsertValueInfo(I)].updateHotness( - CalleeInfo::HotnessType::Hot); + CalleeInfo::HotnessType::Critical); bool NonRenamableLocal = isNonRenamableLocal(F); bool NotEligibleForImport = diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 678ad3af5e852..3fb1ab980add8 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -326,7 +326,7 @@ bool SCEV::isOne() const { bool SCEV::isAllOnesValue() const { if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) - return SC->getValue()->isAllOnesValue(); + return SC->getValue()->isMinusOne(); return false; } @@ -2743,7 +2743,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, } // If we are left with a constant one being multiplied, strip it off. - if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) { + if (cast<SCEVConstant>(Ops[0])->getValue()->isOne()) { Ops.erase(Ops.begin()); --Idx; } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) { @@ -2939,7 +2939,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, "SCEVUDivExpr operand types don't match!"); if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { - if (RHSC->getValue()->equalsInt(1)) + if (RHSC->getValue()->isOne()) return LHS; // X udiv 1 --> x // If the denominator is zero, the result of the udiv is undefined. Don't // try to analyze it, because the resolution chosen here may differ from @@ -5421,9 +5421,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // For an expression like x&255 that merely masks off the high bits, // use zext(trunc(x)) as the SCEV expression. if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) { - if (CI->isNullValue()) + if (CI->isZero()) return getSCEV(BO->RHS); - if (CI->isAllOnesValue()) + if (CI->isMinusOne()) return getSCEV(BO->LHS); const APInt &A = CI->getValue(); @@ -5498,7 +5498,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { case Instruction::Xor: if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) { // If the RHS of xor is -1, then this is a not operation. - if (CI->isAllOnesValue()) + if (CI->isMinusOne()) return getNotSCEV(getSCEV(BO->LHS)); // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask. @@ -5577,7 +5577,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { if (CI->getValue().uge(BitWidth)) break; - if (CI->isNullValue()) + if (CI->isZero()) return getSCEV(BO->LHS); // shift by zero --> noop uint64_t AShrAmt = CI->getZExtValue(); @@ -7626,7 +7626,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step. // We have not yet seen any such cases. const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step); - if (!StepC || StepC->getValue()->equalsInt(0)) + if (!StepC || StepC->getValue()->isZero()) return getCouldNotCompute(); // For positive steps (counting up until unsigned overflow): @@ -7640,7 +7640,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, // Handle unitary steps, which cannot wraparound. // 1*N = -Start; -1*N = Start (mod 2^BW), so: // N = Distance (as unsigned) - if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) { + if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()) { APInt MaxBECount = getUnsignedRangeMax(Distance); // When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated, @@ -7696,7 +7696,7 @@ ScalarEvolution::howFarToNonZero(const SCEV *V, const Loop *L) { // If the value is a constant, check to see if it is known to be non-zero // already. If so, the backedge will execute zero times. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { - if (!C->getValue()->isNullValue()) + if (!C->getValue()->isZero()) return getZero(C->getType()); return getCouldNotCompute(); // Otherwise it will loop infinitely. } diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index f938a9a520650..94bbc58541a77 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -16,6 +16,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include <utility> @@ -23,6 +24,11 @@ using namespace llvm; #define DEBUG_TYPE "tti" +static cl::opt<bool> UseWideMemcpyLoopLowering( + "use-wide-memcpy-loop-lowering", cl::init(false), + cl::desc("Enables the new wide memcpy loop lowering in Transforms/Utils."), + cl::Hidden); + namespace { /// \brief No-op implementation of the TTI interface using the utility base /// classes. @@ -482,6 +488,25 @@ Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic( return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); } +Type *TargetTransformInfo::getMemcpyLoopLoweringType(LLVMContext &Context, + Value *Length, + unsigned SrcAlign, + unsigned DestAlign) const { + return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign, + DestAlign); +} + +void TargetTransformInfo::getMemcpyLoopResidualLoweringType( + SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, + unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const { + TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes, + SrcAlign, DestAlign); +} + +bool TargetTransformInfo::useWideIRMemcpyLoopLowering() const { + return UseWideMemcpyLoopLowering; +} + bool TargetTransformInfo::areInlineCompatible(const Function *Caller, const Function *Callee) const { return TTIImpl->areInlineCompatible(Caller, Callee); diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index fd6e3a643bf03..9e042da8801db 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -1500,12 +1500,10 @@ void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = Known.getBitWidth(); - assert((V->getType()->isIntOrIntVectorTy() || - V->getType()->getScalarType()->isPointerTy()) && + assert((V->getType()->isIntOrIntVectorTy(BitWidth) || + V->getType()->isPtrOrPtrVectorTy()) && "Not integer or pointer type!"); - assert((Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) && - (!V->getType()->isIntOrIntVectorTy() || - V->getType()->getScalarSizeInBits() == BitWidth) && + assert(Q.DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth && "V and Known should have same BitWidth"); (void)BitWidth; @@ -1952,7 +1950,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { } // Check if all incoming values are non-zero constant. bool AllNonZeroConstants = all_of(PN->operands(), [](Value *V) { - return isa<ConstantInt>(V) && !cast<ConstantInt>(V)->isZeroValue(); + return isa<ConstantInt>(V) && !cast<ConstantInt>(V)->isZero(); }); if (AllNonZeroConstants) return true; @@ -4393,7 +4391,7 @@ isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, const Value *ALHS, } Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS, - const DataLayout &DL, bool InvertAPred, + const DataLayout &DL, bool LHSIsFalse, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { @@ -4402,26 +4400,51 @@ Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS, return None; Type *OpTy = LHS->getType(); - assert(OpTy->getScalarType()->isIntegerTy(1)); + assert(OpTy->isIntOrIntVectorTy(1)); // LHS ==> RHS by definition - if (!InvertAPred && LHS == RHS) - return true; + if (LHS == RHS) + return !LHSIsFalse; if (OpTy->isVectorTy()) // TODO: extending the code below to handle vectors return None; assert(OpTy->isIntegerTy(1) && "implied by above"); - ICmpInst::Predicate APred, BPred; - Value *ALHS, *ARHS; Value *BLHS, *BRHS; + ICmpInst::Predicate BPred; + // We expect the RHS to be an icmp. + if (!match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS)))) + return None; - if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS))) || - !match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS)))) + Value *ALHS, *ARHS; + ICmpInst::Predicate APred; + // The LHS can be an 'or', 'and', or 'icmp'. + if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS)))) { + // The remaining tests are all recursive, so bail out if we hit the limit. + if (Depth == MaxDepth) + return None; + // If the result of an 'or' is false, then we know both legs of the 'or' are + // false. Similarly, if the result of an 'and' is true, then we know both + // legs of the 'and' are true. + if ((LHSIsFalse && match(LHS, m_Or(m_Value(ALHS), m_Value(ARHS)))) || + (!LHSIsFalse && match(LHS, m_And(m_Value(ALHS), m_Value(ARHS))))) { + if (Optional<bool> Implication = isImpliedCondition( + ALHS, RHS, DL, LHSIsFalse, Depth + 1, AC, CxtI, DT)) + return Implication; + if (Optional<bool> Implication = isImpliedCondition( + ARHS, RHS, DL, LHSIsFalse, Depth + 1, AC, CxtI, DT)) + return Implication; + return None; + } return None; + } + // All of the below logic assumes both LHS and RHS are icmps. + assert(isa<ICmpInst>(LHS) && isa<ICmpInst>(RHS) && "Expected icmps."); - if (InvertAPred) + // The rest of the logic assumes the LHS condition is true. If that's not the + // case, invert the predicate to make it so. + if (LHSIsFalse) APred = CmpInst::getInversePredicate(APred); // Can we infer anything when the two compares have matching operands? diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp index 0ace8fa382bc7..554d132c2ab77 100644 --- a/lib/Analysis/VectorUtils.cpp +++ b/lib/Analysis/VectorUtils.cpp @@ -301,7 +301,7 @@ const llvm::Value *llvm::getSplatValue(const Value *V) { auto *InsertEltInst = dyn_cast<InsertElementInst>(ShuffleInst->getOperand(0)); if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) || - !cast<ConstantInt>(InsertEltInst->getOperand(2))->isNullValue()) + !cast<ConstantInt>(InsertEltInst->getOperand(2))->isZero()) return nullptr; return InsertEltInst->getOperand(1); diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index a49276099f194..428bb21fbf517 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -542,7 +542,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(release); KEYWORD(acq_rel); KEYWORD(seq_cst); - KEYWORD(singlethread); + KEYWORD(syncscope); KEYWORD(nnan); KEYWORD(ninf); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 9ad31125f4b8c..717eb0e00f4f4 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -1919,20 +1919,42 @@ bool LLParser::parseAllocSizeArguments(unsigned &BaseSizeArg, } /// ParseScopeAndOrdering -/// if isAtomic: ::= 'singlethread'? AtomicOrdering +/// if isAtomic: ::= SyncScope? AtomicOrdering /// else: ::= /// /// This sets Scope and Ordering to the parsed values. -bool LLParser::ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope, +bool LLParser::ParseScopeAndOrdering(bool isAtomic, SyncScope::ID &SSID, AtomicOrdering &Ordering) { if (!isAtomic) return false; - Scope = CrossThread; - if (EatIfPresent(lltok::kw_singlethread)) - Scope = SingleThread; + return ParseScope(SSID) || ParseOrdering(Ordering); +} + +/// ParseScope +/// ::= syncscope("singlethread" | "<target scope>")? +/// +/// This sets synchronization scope ID to the ID of the parsed value. +bool LLParser::ParseScope(SyncScope::ID &SSID) { + SSID = SyncScope::System; + if (EatIfPresent(lltok::kw_syncscope)) { + auto StartParenAt = Lex.getLoc(); + if (!EatIfPresent(lltok::lparen)) + return Error(StartParenAt, "Expected '(' in syncscope"); + + std::string SSN; + auto SSNAt = Lex.getLoc(); + if (ParseStringConstant(SSN)) + return Error(SSNAt, "Expected synchronization scope name"); - return ParseOrdering(Ordering); + auto EndParenAt = Lex.getLoc(); + if (!EatIfPresent(lltok::rparen)) + return Error(EndParenAt, "Expected ')' in syncscope"); + + SSID = Context.getOrInsertSyncScopeID(SSN); + } + + return false; } /// ParseOrdering @@ -3061,7 +3083,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { } else { assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!"); if (!Val0->getType()->isIntOrIntVectorTy() && - !Val0->getType()->getScalarType()->isPointerTy()) + !Val0->getType()->isPtrOrPtrVectorTy()) return Error(ID.Loc, "icmp requires pointer or integer operands"); ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1); } @@ -3210,7 +3232,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { if (Opc == Instruction::GetElementPtr) { if (Elts.size() == 0 || - !Elts[0]->getType()->getScalarType()->isPointerTy()) + !Elts[0]->getType()->isPtrOrPtrVectorTy()) return Error(ID.Loc, "base of getelementptr must be a pointer"); Type *BaseType = Elts[0]->getType(); @@ -3226,7 +3248,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end()); for (Constant *Val : Indices) { Type *ValTy = Val->getType(); - if (!ValTy->getScalarType()->isIntegerTy()) + if (!ValTy->isIntOrIntVectorTy()) return Error(ID.Loc, "getelementptr index must be an integer"); if (ValTy->isVectorTy()) { unsigned ValNumEl = ValTy->getVectorNumElements(); @@ -5697,7 +5719,7 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS, } else { assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!"); if (!LHS->getType()->isIntOrIntVectorTy() && - !LHS->getType()->getScalarType()->isPointerTy()) + !LHS->getType()->isPtrOrPtrVectorTy()) return Error(Loc, "icmp requires integer operands"); Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS); } @@ -6100,7 +6122,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { bool AteExtraComma = false; bool isAtomic = false; AtomicOrdering Ordering = AtomicOrdering::NotAtomic; - SynchronizationScope Scope = CrossThread; + SyncScope::ID SSID = SyncScope::System; if (Lex.getKind() == lltok::kw_atomic) { isAtomic = true; @@ -6118,7 +6140,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { if (ParseType(Ty) || ParseToken(lltok::comma, "expected comma after load's type") || ParseTypeAndValue(Val, Loc, PFS) || - ParseScopeAndOrdering(isAtomic, Scope, Ordering) || + ParseScopeAndOrdering(isAtomic, SSID, Ordering) || ParseOptionalCommaAlign(Alignment, AteExtraComma)) return true; @@ -6134,7 +6156,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { return Error(ExplicitTypeLoc, "explicit pointee type doesn't match operand's pointee type"); - Inst = new LoadInst(Ty, Val, "", isVolatile, Alignment, Ordering, Scope); + Inst = new LoadInst(Ty, Val, "", isVolatile, Alignment, Ordering, SSID); return AteExtraComma ? InstExtraComma : InstNormal; } @@ -6149,7 +6171,7 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) { bool AteExtraComma = false; bool isAtomic = false; AtomicOrdering Ordering = AtomicOrdering::NotAtomic; - SynchronizationScope Scope = CrossThread; + SyncScope::ID SSID = SyncScope::System; if (Lex.getKind() == lltok::kw_atomic) { isAtomic = true; @@ -6165,7 +6187,7 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) { if (ParseTypeAndValue(Val, Loc, PFS) || ParseToken(lltok::comma, "expected ',' after store operand") || ParseTypeAndValue(Ptr, PtrLoc, PFS) || - ParseScopeAndOrdering(isAtomic, Scope, Ordering) || + ParseScopeAndOrdering(isAtomic, SSID, Ordering) || ParseOptionalCommaAlign(Alignment, AteExtraComma)) return true; @@ -6181,7 +6203,7 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) { Ordering == AtomicOrdering::AcquireRelease) return Error(Loc, "atomic store cannot use Acquire ordering"); - Inst = new StoreInst(Val, Ptr, isVolatile, Alignment, Ordering, Scope); + Inst = new StoreInst(Val, Ptr, isVolatile, Alignment, Ordering, SSID); return AteExtraComma ? InstExtraComma : InstNormal; } @@ -6193,7 +6215,7 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) { bool AteExtraComma = false; AtomicOrdering SuccessOrdering = AtomicOrdering::NotAtomic; AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic; - SynchronizationScope Scope = CrossThread; + SyncScope::ID SSID = SyncScope::System; bool isVolatile = false; bool isWeak = false; @@ -6208,7 +6230,7 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) { ParseTypeAndValue(Cmp, CmpLoc, PFS) || ParseToken(lltok::comma, "expected ',' after cmpxchg cmp operand") || ParseTypeAndValue(New, NewLoc, PFS) || - ParseScopeAndOrdering(true /*Always atomic*/, Scope, SuccessOrdering) || + ParseScopeAndOrdering(true /*Always atomic*/, SSID, SuccessOrdering) || ParseOrdering(FailureOrdering)) return true; @@ -6231,7 +6253,7 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) { if (!New->getType()->isFirstClassType()) return Error(NewLoc, "cmpxchg operand must be a first class value"); AtomicCmpXchgInst *CXI = new AtomicCmpXchgInst( - Ptr, Cmp, New, SuccessOrdering, FailureOrdering, Scope); + Ptr, Cmp, New, SuccessOrdering, FailureOrdering, SSID); CXI->setVolatile(isVolatile); CXI->setWeak(isWeak); Inst = CXI; @@ -6245,7 +6267,7 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { Value *Ptr, *Val; LocTy PtrLoc, ValLoc; bool AteExtraComma = false; AtomicOrdering Ordering = AtomicOrdering::NotAtomic; - SynchronizationScope Scope = CrossThread; + SyncScope::ID SSID = SyncScope::System; bool isVolatile = false; AtomicRMWInst::BinOp Operation; @@ -6271,7 +6293,7 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { if (ParseTypeAndValue(Ptr, PtrLoc, PFS) || ParseToken(lltok::comma, "expected ',' after atomicrmw address") || ParseTypeAndValue(Val, ValLoc, PFS) || - ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering)) + ParseScopeAndOrdering(true /*Always atomic*/, SSID, Ordering)) return true; if (Ordering == AtomicOrdering::Unordered) @@ -6288,7 +6310,7 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { " integer"); AtomicRMWInst *RMWI = - new AtomicRMWInst(Operation, Ptr, Val, Ordering, Scope); + new AtomicRMWInst(Operation, Ptr, Val, Ordering, SSID); RMWI->setVolatile(isVolatile); Inst = RMWI; return AteExtraComma ? InstExtraComma : InstNormal; @@ -6298,8 +6320,8 @@ int LLParser::ParseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) { /// ::= 'fence' 'singlethread'? AtomicOrdering int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) { AtomicOrdering Ordering = AtomicOrdering::NotAtomic; - SynchronizationScope Scope = CrossThread; - if (ParseScopeAndOrdering(true /*Always atomic*/, Scope, Ordering)) + SyncScope::ID SSID = SyncScope::System; + if (ParseScopeAndOrdering(true /*Always atomic*/, SSID, Ordering)) return true; if (Ordering == AtomicOrdering::Unordered) @@ -6307,7 +6329,7 @@ int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) { if (Ordering == AtomicOrdering::Monotonic) return TokError("fence cannot be monotonic"); - Inst = new FenceInst(Context, Ordering, Scope); + Inst = new FenceInst(Context, Ordering, SSID); return InstNormal; } @@ -6349,7 +6371,7 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { break; } if (ParseTypeAndValue(Val, EltLoc, PFS)) return true; - if (!Val->getType()->getScalarType()->isIntegerTy()) + if (!Val->getType()->isIntOrIntVectorTy()) return Error(EltLoc, "getelementptr index must be an integer"); if (Val->getType()->isVectorTy()) { diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 4616c2e86947c..d5b059355c423 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -241,8 +241,9 @@ namespace llvm { bool ParseOptionalCallingConv(unsigned &CC); bool ParseOptionalAlignment(unsigned &Alignment); bool ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes); - bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope, + bool ParseScopeAndOrdering(bool isAtomic, SyncScope::ID &SSID, AtomicOrdering &Ordering); + bool ParseScope(SyncScope::ID &SSID); bool ParseOrdering(AtomicOrdering &Ordering); bool ParseOptionalStackAlignment(unsigned &Alignment); bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 6c8ed7da495d1..9c7a06de81b45 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -93,7 +93,7 @@ enum Kind { kw_release, kw_acq_rel, kw_seq_cst, - kw_singlethread, + kw_syncscope, kw_nnan, kw_ninf, kw_nsz, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 1ebef31731353..2b4970a80cddb 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -513,6 +513,7 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { TBAAVerifier TBAAVerifyHelper; std::vector<std::string> BundleTags; + SmallVector<SyncScope::ID, 8> SSIDs; public: BitcodeReader(BitstreamCursor Stream, StringRef Strtab, @@ -648,6 +649,7 @@ private: Error parseTypeTable(); Error parseTypeTableBody(); Error parseOperandBundleTags(); + Error parseSyncScopeNames(); Expected<Value *> recordValue(SmallVectorImpl<uint64_t> &Record, unsigned NameIndex, Triple &TT); @@ -668,6 +670,8 @@ private: Error findFunctionInStream( Function *F, DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator); + + SyncScope::ID getDecodedSyncScopeID(unsigned Val); }; /// Class to manage reading and parsing function summary index bitcode @@ -998,14 +1002,6 @@ static AtomicOrdering getDecodedOrdering(unsigned Val) { } } -static SynchronizationScope getDecodedSynchScope(unsigned Val) { - switch (Val) { - case bitc::SYNCHSCOPE_SINGLETHREAD: return SingleThread; - default: // Map unknown scopes to cross-thread. - case bitc::SYNCHSCOPE_CROSSTHREAD: return CrossThread; - } -} - static Comdat::SelectionKind getDecodedComdatSelectionKind(unsigned Val) { switch (Val) { default: // Map unknown selection kinds to any. @@ -1745,6 +1741,44 @@ Error BitcodeReader::parseOperandBundleTags() { } } +Error BitcodeReader::parseSyncScopeNames() { + if (Stream.EnterSubBlock(bitc::SYNC_SCOPE_NAMES_BLOCK_ID)) + return error("Invalid record"); + + if (!SSIDs.empty()) + return error("Invalid multiple synchronization scope names blocks"); + + SmallVector<uint64_t, 64> Record; + while (true) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + if (SSIDs.empty()) + return error("Invalid empty synchronization scope names block"); + return Error::success(); + case BitstreamEntry::Record: + // The interesting case. + break; + } + + // Synchronization scope names are implicitly mapped to synchronization + // scope IDs by their order. + + if (Stream.readRecord(Entry.ID, Record) != bitc::SYNC_SCOPE_NAME) + return error("Invalid record"); + + SmallString<16> SSN; + if (convertToString(Record, 0, SSN)) + return error("Invalid record"); + + SSIDs.push_back(Context.getOrInsertSyncScopeID(SSN)); + Record.clear(); + } +} + /// Associate a value with its name from the given index in the provided record. Expected<Value *> BitcodeReader::recordValue(SmallVectorImpl<uint64_t> &Record, unsigned NameIndex, Triple &TT) { @@ -3132,6 +3166,10 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit, if (Error Err = parseOperandBundleTags()) return Err; break; + case bitc::SYNC_SCOPE_NAMES_BLOCK_ID: + if (Error Err = parseSyncScopeNames()) + return Err; + break; } continue; @@ -4204,7 +4242,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { break; } case bitc::FUNC_CODE_INST_LOADATOMIC: { - // LOADATOMIC: [opty, op, align, vol, ordering, synchscope] + // LOADATOMIC: [opty, op, align, vol, ordering, ssid] unsigned OpNum = 0; Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op) || @@ -4226,12 +4264,12 @@ Error BitcodeReader::parseFunctionBody(Function *F) { return error("Invalid record"); if (Ordering != AtomicOrdering::NotAtomic && Record[OpNum] == 0) return error("Invalid record"); - SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 3]); + SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]); unsigned Align; if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; - I = new LoadInst(Op, "", Record[OpNum+1], Align, Ordering, SynchScope); + I = new LoadInst(Op, "", Record[OpNum+1], Align, Ordering, SSID); InstructionList.push_back(I); break; @@ -4260,7 +4298,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { } case bitc::FUNC_CODE_INST_STOREATOMIC: case bitc::FUNC_CODE_INST_STOREATOMIC_OLD: { - // STOREATOMIC: [ptrty, ptr, val, align, vol, ordering, synchscope] + // STOREATOMIC: [ptrty, ptr, val, align, vol, ordering, ssid] unsigned OpNum = 0; Value *Val, *Ptr; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || @@ -4280,20 +4318,20 @@ Error BitcodeReader::parseFunctionBody(Function *F) { Ordering == AtomicOrdering::Acquire || Ordering == AtomicOrdering::AcquireRelease) return error("Invalid record"); - SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 3]); + SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]); if (Ordering != AtomicOrdering::NotAtomic && Record[OpNum] == 0) return error("Invalid record"); unsigned Align; if (Error Err = parseAlignmentValue(Record[OpNum], Align)) return Err; - I = new StoreInst(Val, Ptr, Record[OpNum+1], Align, Ordering, SynchScope); + I = new StoreInst(Val, Ptr, Record[OpNum+1], Align, Ordering, SSID); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_CMPXCHG_OLD: case bitc::FUNC_CODE_INST_CMPXCHG: { - // CMPXCHG:[ptrty, ptr, cmp, new, vol, successordering, synchscope, + // CMPXCHG:[ptrty, ptr, cmp, new, vol, successordering, ssid, // failureordering?, isweak?] unsigned OpNum = 0; Value *Ptr, *Cmp, *New; @@ -4310,7 +4348,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (SuccessOrdering == AtomicOrdering::NotAtomic || SuccessOrdering == AtomicOrdering::Unordered) return error("Invalid record"); - SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 2]); + SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 2]); if (Error Err = typeCheckLoadStoreInst(Cmp->getType(), Ptr->getType())) return Err; @@ -4322,7 +4360,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { FailureOrdering = getDecodedOrdering(Record[OpNum + 3]); I = new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering, FailureOrdering, - SynchScope); + SSID); cast<AtomicCmpXchgInst>(I)->setVolatile(Record[OpNum]); if (Record.size() < 8) { @@ -4339,7 +4377,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) { break; } case bitc::FUNC_CODE_INST_ATOMICRMW: { - // ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, synchscope] + // ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, ssid] unsigned OpNum = 0; Value *Ptr, *Val; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || @@ -4356,13 +4394,13 @@ Error BitcodeReader::parseFunctionBody(Function *F) { if (Ordering == AtomicOrdering::NotAtomic || Ordering == AtomicOrdering::Unordered) return error("Invalid record"); - SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 3]); - I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SynchScope); + SyncScope::ID SSID = getDecodedSyncScopeID(Record[OpNum + 3]); + I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SSID); cast<AtomicRMWInst>(I)->setVolatile(Record[OpNum+1]); InstructionList.push_back(I); break; } - case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, synchscope] + case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, ssid] if (2 != Record.size()) return error("Invalid record"); AtomicOrdering Ordering = getDecodedOrdering(Record[0]); @@ -4370,8 +4408,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) { Ordering == AtomicOrdering::Unordered || Ordering == AtomicOrdering::Monotonic) return error("Invalid record"); - SynchronizationScope SynchScope = getDecodedSynchScope(Record[1]); - I = new FenceInst(Context, Ordering, SynchScope); + SyncScope::ID SSID = getDecodedSyncScopeID(Record[1]); + I = new FenceInst(Context, Ordering, SSID); InstructionList.push_back(I); break; } @@ -4567,6 +4605,14 @@ Error BitcodeReader::findFunctionInStream( return Error::success(); } +SyncScope::ID BitcodeReader::getDecodedSyncScopeID(unsigned Val) { + if (Val == SyncScope::SingleThread || Val == SyncScope::System) + return SyncScope::ID(Val); + if (Val >= SSIDs.size()) + return SyncScope::System; // Map unknown synchronization scopes to system. + return SSIDs[Val]; +} + //===----------------------------------------------------------------------===// // GVMaterializer implementation //===----------------------------------------------------------------------===// diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index b2b1ea6de374c..0e518d2bbc8ff 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -114,6 +114,8 @@ class ModuleBitcodeWriter : public BitcodeWriterBase { /// True if a module hash record should be written. bool GenerateHash; + SHA1 Hasher; + /// If non-null, when GenerateHash is true, the resulting hash is written /// into ModHash. When GenerateHash is false, that specified value /// is used as the hash instead of computing from the generated bitcode. @@ -176,6 +178,8 @@ public: private: uint64_t bitcodeStartBit() { return BitcodeStartBit; } + size_t addToStrtab(StringRef Str); + void writeAttributeGroupTable(); void writeAttributeTable(); void writeTypeTable(); @@ -262,6 +266,7 @@ private: const GlobalObject &GO); void writeModuleMetadataKinds(); void writeOperandBundleTags(); + void writeSyncScopeNames(); void writeConstants(unsigned FirstVal, unsigned LastVal, bool isGlobal); void writeModuleConstants(); bool pushValueAndType(const Value *V, unsigned InstID, @@ -312,6 +317,10 @@ private: return VE.getValueID(VI.getValue()); } std::map<GlobalValue::GUID, unsigned> &valueIds() { return GUIDToValueIdMap; } + + unsigned getEncodedSyncScopeID(SyncScope::ID SSID) { + return unsigned(SSID); + } }; /// Class to manage the bitcode writing for a combined index. @@ -481,14 +490,6 @@ static unsigned getEncodedOrdering(AtomicOrdering Ordering) { llvm_unreachable("Invalid ordering"); } -static unsigned getEncodedSynchScope(SynchronizationScope SynchScope) { - switch (SynchScope) { - case SingleThread: return bitc::SYNCHSCOPE_SINGLETHREAD; - case CrossThread: return bitc::SYNCHSCOPE_CROSSTHREAD; - } - llvm_unreachable("Invalid synch scope"); -} - static void writeStringRecord(BitstreamWriter &Stream, unsigned Code, StringRef Str, unsigned AbbrevToUse) { SmallVector<unsigned, 64> Vals; @@ -947,11 +948,17 @@ static unsigned getEncodedUnnamedAddr(const GlobalValue &GV) { llvm_unreachable("Invalid unnamed_addr"); } +size_t ModuleBitcodeWriter::addToStrtab(StringRef Str) { + if (GenerateHash) + Hasher.update(Str); + return StrtabBuilder.add(Str); +} + void ModuleBitcodeWriter::writeComdats() { SmallVector<unsigned, 64> Vals; for (const Comdat *C : VE.getComdats()) { // COMDAT: [strtab offset, strtab size, selection_kind] - Vals.push_back(StrtabBuilder.add(C->getName())); + Vals.push_back(addToStrtab(C->getName())); Vals.push_back(C->getName().size()); Vals.push_back(getEncodedComdatSelectionKind(*C)); Stream.EmitRecord(bitc::MODULE_CODE_COMDAT, Vals, /*AbbrevToUse=*/0); @@ -1122,7 +1129,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { // linkage, alignment, section, visibility, threadlocal, // unnamed_addr, externally_initialized, dllstorageclass, // comdat, attributes] - Vals.push_back(StrtabBuilder.add(GV.getName())); + Vals.push_back(addToStrtab(GV.getName())); Vals.push_back(GV.getName().size()); Vals.push_back(VE.getTypeID(GV.getValueType())); Vals.push_back(GV.getType()->getAddressSpace() << 2 | 2 | GV.isConstant()); @@ -1161,7 +1168,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { // linkage, paramattrs, alignment, section, visibility, gc, // unnamed_addr, prologuedata, dllstorageclass, comdat, // prefixdata, personalityfn] - Vals.push_back(StrtabBuilder.add(F.getName())); + Vals.push_back(addToStrtab(F.getName())); Vals.push_back(F.getName().size()); Vals.push_back(VE.getTypeID(F.getFunctionType())); Vals.push_back(F.getCallingConv()); @@ -1191,7 +1198,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { for (const GlobalAlias &A : M.aliases()) { // ALIAS: [strtab offset, strtab size, alias type, aliasee val#, linkage, // visibility, dllstorageclass, threadlocal, unnamed_addr] - Vals.push_back(StrtabBuilder.add(A.getName())); + Vals.push_back(addToStrtab(A.getName())); Vals.push_back(A.getName().size()); Vals.push_back(VE.getTypeID(A.getValueType())); Vals.push_back(A.getType()->getAddressSpace()); @@ -1210,7 +1217,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { for (const GlobalIFunc &I : M.ifuncs()) { // IFUNC: [strtab offset, strtab size, ifunc type, address space, resolver // val#, linkage, visibility] - Vals.push_back(StrtabBuilder.add(I.getName())); + Vals.push_back(addToStrtab(I.getName())); Vals.push_back(I.getName().size()); Vals.push_back(VE.getTypeID(I.getValueType())); Vals.push_back(I.getType()->getAddressSpace()); @@ -2032,6 +2039,24 @@ void ModuleBitcodeWriter::writeOperandBundleTags() { Stream.ExitBlock(); } +void ModuleBitcodeWriter::writeSyncScopeNames() { + SmallVector<StringRef, 8> SSNs; + M.getContext().getSyncScopeNames(SSNs); + if (SSNs.empty()) + return; + + Stream.EnterSubblock(bitc::SYNC_SCOPE_NAMES_BLOCK_ID, 2); + + SmallVector<uint64_t, 64> Record; + for (auto SSN : SSNs) { + Record.append(SSN.begin(), SSN.end()); + Stream.EmitRecord(bitc::SYNC_SCOPE_NAME, Record, 0); + Record.clear(); + } + + Stream.ExitBlock(); +} + static void emitSignedInt64(SmallVectorImpl<uint64_t> &Vals, uint64_t V) { if ((int64_t)V >= 0) Vals.push_back(V << 1); @@ -2648,7 +2673,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Vals.push_back(cast<LoadInst>(I).isVolatile()); if (cast<LoadInst>(I).isAtomic()) { Vals.push_back(getEncodedOrdering(cast<LoadInst>(I).getOrdering())); - Vals.push_back(getEncodedSynchScope(cast<LoadInst>(I).getSynchScope())); + Vals.push_back(getEncodedSyncScopeID(cast<LoadInst>(I).getSyncScopeID())); } break; case Instruction::Store: @@ -2662,7 +2687,8 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Vals.push_back(cast<StoreInst>(I).isVolatile()); if (cast<StoreInst>(I).isAtomic()) { Vals.push_back(getEncodedOrdering(cast<StoreInst>(I).getOrdering())); - Vals.push_back(getEncodedSynchScope(cast<StoreInst>(I).getSynchScope())); + Vals.push_back( + getEncodedSyncScopeID(cast<StoreInst>(I).getSyncScopeID())); } break; case Instruction::AtomicCmpXchg: @@ -2674,7 +2700,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Vals.push_back( getEncodedOrdering(cast<AtomicCmpXchgInst>(I).getSuccessOrdering())); Vals.push_back( - getEncodedSynchScope(cast<AtomicCmpXchgInst>(I).getSynchScope())); + getEncodedSyncScopeID(cast<AtomicCmpXchgInst>(I).getSyncScopeID())); Vals.push_back( getEncodedOrdering(cast<AtomicCmpXchgInst>(I).getFailureOrdering())); Vals.push_back(cast<AtomicCmpXchgInst>(I).isWeak()); @@ -2688,12 +2714,12 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Vals.push_back(cast<AtomicRMWInst>(I).isVolatile()); Vals.push_back(getEncodedOrdering(cast<AtomicRMWInst>(I).getOrdering())); Vals.push_back( - getEncodedSynchScope(cast<AtomicRMWInst>(I).getSynchScope())); + getEncodedSyncScopeID(cast<AtomicRMWInst>(I).getSyncScopeID())); break; case Instruction::Fence: Code = bitc::FUNC_CODE_INST_FENCE; Vals.push_back(getEncodedOrdering(cast<FenceInst>(I).getOrdering())); - Vals.push_back(getEncodedSynchScope(cast<FenceInst>(I).getSynchScope())); + Vals.push_back(getEncodedSyncScopeID(cast<FenceInst>(I).getSyncScopeID())); break; case Instruction::Call: { const CallInst &CI = cast<CallInst>(I); @@ -3648,7 +3674,6 @@ void ModuleBitcodeWriter::writeModuleHash(size_t BlockStartPos) { // Emit the module's hash. // MODULE_CODE_HASH: [5*i32] if (GenerateHash) { - SHA1 Hasher; uint32_t Vals[5]; Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&(Buffer)[BlockStartPos], Buffer.size() - BlockStartPos)); @@ -3707,6 +3732,7 @@ void ModuleBitcodeWriter::write() { writeUseListBlock(nullptr); writeOperandBundleTags(); + writeSyncScopeNames(); // Emit function bodies. DenseMap<const Function *, uint64_t> FunctionToBitcodeIndex; diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp index 344136b1f1956..aa9c8e94d08a3 100644 --- a/lib/CodeGen/AtomicExpandPass.cpp +++ b/lib/CodeGen/AtomicExpandPass.cpp @@ -361,7 +361,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { auto *NewLI = Builder.CreateLoad(NewAddr); NewLI->setAlignment(LI->getAlignment()); NewLI->setVolatile(LI->isVolatile()); - NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope()); + NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID()); DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); @@ -444,7 +444,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); NewSI->setAlignment(SI->getAlignment()); NewSI->setVolatile(SI->isVolatile()); - NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope()); + NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID()); DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); SI->eraseFromParent(); return NewSI; @@ -801,7 +801,7 @@ void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) { Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted); AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg( PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(), - CI->getFailureOrdering(), CI->getSynchScope()); + CI->getFailureOrdering(), CI->getSyncScopeID()); NewCI->setVolatile(CI->isVolatile()); // When we're building a strong cmpxchg, we need a loop, so you // might think we could use a weak cmpxchg inside. But, using strong @@ -924,7 +924,7 @@ AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst * auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal, CI->getSuccessOrdering(), CI->getFailureOrdering(), - CI->getSynchScope()); + CI->getSyncScopeID()); NewCI->setVolatile(CI->isVolatile()); NewCI->setWeak(CI->isWeak()); DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n"); diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index faa5f139cf7b3..b7fd45a3f6a66 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -78,6 +78,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializePreISelIntrinsicLoweringLegacyPassPass(Registry); initializeProcessImplicitDefsPass(Registry); initializeRABasicPass(Registry); + initializeRAFastPass(Registry); initializeRAGreedyPass(Registry); initializeRegisterCoalescerPass(Registry); initializeRenameIndependentSubregsPass(Registry); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index b50e76f2e3ba2..b7155ac2480a7 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -4270,6 +4270,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Value *Consensus = nullptr; unsigned NumUsesConsensus = 0; bool IsNumUsesConsensusValid = false; + bool PhiSeen = false; SmallVector<Instruction*, 16> AddrModeInsts; ExtAddrMode AddrMode; TypePromotionTransaction TPT(RemovedInsts); @@ -4289,6 +4290,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, if (PHINode *P = dyn_cast<PHINode>(V)) { for (Value *IncValue : P->incoming_values()) worklist.push_back(IncValue); + PhiSeen = true; continue; } @@ -4342,9 +4344,10 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, TPT.commit(); // If all the instructions matched are already in this BB, don't do anything. - if (none_of(AddrModeInsts, [&](Value *V) { + // If we saw Phi node then it is not local definitely. + if (!PhiSeen && none_of(AddrModeInsts, [&](Value *V) { return IsNonLocalValue(V, MemoryInst->getParent()); - })) { + })) { DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); return false; } @@ -4390,6 +4393,20 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, AddrMode.Scale = 0; } + // It is only safe to sign extend the BaseReg if we know that the math + // required to create it did not overflow before we extend it. Since + // the original IR value was tossed in favor of a constant back when + // the AddrMode was created we need to bail out gracefully if widths + // do not match instead of extending it. + // + // (See below for code to add the scale.) + if (AddrMode.Scale) { + Type *ScaledRegTy = AddrMode.ScaledReg->getType(); + if (cast<IntegerType>(IntPtrTy)->getBitWidth() > + cast<IntegerType>(ScaledRegTy)->getBitWidth()) + return false; + } + if (AddrMode.BaseGV) { if (ResultPtr) return false; @@ -4440,19 +4457,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Value *V = AddrMode.ScaledReg; if (V->getType() == IntPtrTy) { // done. - } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < - cast<IntegerType>(V->getType())->getBitWidth()) { - V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); } else { - // It is only safe to sign extend the BaseReg if we know that the math - // required to create it did not overflow before we extend it. Since - // the original IR value was tossed in favor of a constant back when - // the AddrMode was created we need to bail out gracefully if widths - // do not match instead of extending it. - Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex); - if (I && (ResultIndex != AddrMode.BaseReg)) - I->eraseFromParent(); - return false; + assert(cast<IntegerType>(IntPtrTy)->getBitWidth() < + cast<IntegerType>(V->getType())->getBitWidth() && + "We can't transform if ScaledReg is too narrow"); + V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); } if (AddrMode.Scale != 1) diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 521037f9d206b..ed1bd995e60be 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -345,7 +345,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { *MF->getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()), Flags, DL->getTypeStoreSize(LI.getType()), getMemOpAlignment(LI), AAMDNodes(), nullptr, - LI.getSynchScope(), LI.getOrdering())); + LI.getSyncScopeID(), LI.getOrdering())); return true; } @@ -363,7 +363,7 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { *MF->getMachineMemOperand( MachinePointerInfo(SI.getPointerOperand()), Flags, DL->getTypeStoreSize(SI.getValueOperand()->getType()), - getMemOpAlignment(SI), AAMDNodes(), nullptr, SI.getSynchScope(), + getMemOpAlignment(SI), AAMDNodes(), nullptr, SI.getSyncScopeID(), SI.getOrdering())); return true; } diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 860fc9a4f8b61..bf427225d6a96 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -16,7 +16,11 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/IR/Constants.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -26,6 +30,9 @@ using namespace llvm; +InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers) + : Renderers(MaxRenderers, nullptr), MIs() {} + InstructionSelector::InstructionSelector() = default; bool InstructionSelector::constrainOperandRegToRegClass( diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 84b0a0ac41579..49fb5e8f075b8 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -99,23 +99,19 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { llvm_unreachable("Unknown libcall function"); } -LegalizerHelper::LegalizeResult llvm::replaceWithLibcall( - MachineInstr &MI, MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, - const CallLowering::ArgInfo &Result, ArrayRef<CallLowering::ArgInfo> Args) { +LegalizerHelper::LegalizeResult +llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, + const CallLowering::ArgInfo &Result, + ArrayRef<CallLowering::ArgInfo> Args) { auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); const char *Name = TLI.getLibcallName(Libcall); + MIRBuilder.getMF().getFrameInfo().setHasCalls(true); - MIRBuilder.setInstr(MI); if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), MachineOperand::CreateES(Name), Result, Args)) return LegalizerHelper::UnableToLegalize; - // We're about to remove MI, so move the insert point after it. - MIRBuilder.setInsertPt(MIRBuilder.getMBB(), - std::next(MIRBuilder.getInsertPt())); - - MI.eraseFromParent(); return LegalizerHelper::Legalized; } @@ -123,10 +119,9 @@ static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType) { auto Libcall = getRTLibDesc(MI.getOpcode(), Size); - return replaceWithLibcall(MI, MIRBuilder, Libcall, - {MI.getOperand(0).getReg(), OpType}, - {{MI.getOperand(1).getReg(), OpType}, - {MI.getOperand(2).getReg(), OpType}}); + return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType}, + {{MI.getOperand(1).getReg(), OpType}, + {MI.getOperand(2).getReg(), OpType}}); } LegalizerHelper::LegalizeResult @@ -135,6 +130,8 @@ LegalizerHelper::libcall(MachineInstr &MI) { unsigned Size = LLTy.getSizeInBits(); auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + MIRBuilder.setInstr(MI); + switch (MI.getOpcode()) { default: return UnableToLegalize; @@ -143,15 +140,24 @@ LegalizerHelper::libcall(MachineInstr &MI) { case TargetOpcode::G_SREM: case TargetOpcode::G_UREM: { Type *HLTy = Type::getInt32Ty(Ctx); - return simpleLibcall(MI, MIRBuilder, Size, HLTy); + auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); + if (Status != Legalized) + return Status; + break; } case TargetOpcode::G_FADD: case TargetOpcode::G_FPOW: case TargetOpcode::G_FREM: { Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); - return simpleLibcall(MI, MIRBuilder, Size, HLTy); + auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); + if (Status != Legalized) + return Status; + break; } } + + MI.eraseFromParent(); + return Legalized; } LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 47c6214c05528..4636806c3f081 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -166,19 +166,24 @@ MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res, .addGlobalAddress(GV); } -MachineInstrBuilder MachineIRBuilder::buildAdd(unsigned Res, unsigned Op0, +MachineInstrBuilder MachineIRBuilder::buildBinaryOp(unsigned Opcode, unsigned Res, unsigned Op0, unsigned Op1) { assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && "invalid operand type"); assert(MRI->getType(Res) == MRI->getType(Op0) && MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); - return buildInstr(TargetOpcode::G_ADD) + return buildInstr(Opcode) .addDef(Res) .addUse(Op0) .addUse(Op1); } +MachineInstrBuilder MachineIRBuilder::buildAdd(unsigned Res, unsigned Op0, + unsigned Op1) { + return buildBinaryOp(TargetOpcode::G_ADD, Res, Op0, Op1); +} + MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0, unsigned Op1) { assert(MRI->getType(Res).isPointer() && @@ -222,41 +227,22 @@ MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0, MachineInstrBuilder MachineIRBuilder::buildSub(unsigned Res, unsigned Op0, unsigned Op1) { - assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && - "invalid operand type"); - assert(MRI->getType(Res) == MRI->getType(Op0) && - MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); - - return buildInstr(TargetOpcode::G_SUB) - .addDef(Res) - .addUse(Op0) - .addUse(Op1); + return buildBinaryOp(TargetOpcode::G_SUB, Res, Op0, Op1); } MachineInstrBuilder MachineIRBuilder::buildMul(unsigned Res, unsigned Op0, unsigned Op1) { - assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && - "invalid operand type"); - assert(MRI->getType(Res) == MRI->getType(Op0) && - MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); - - return buildInstr(TargetOpcode::G_MUL) - .addDef(Res) - .addUse(Op0) - .addUse(Op1); + return buildBinaryOp(TargetOpcode::G_MUL, Res, Op0, Op1); } MachineInstrBuilder MachineIRBuilder::buildAnd(unsigned Res, unsigned Op0, unsigned Op1) { - assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) && - "invalid operand type"); - assert(MRI->getType(Res) == MRI->getType(Op0) && - MRI->getType(Res) == MRI->getType(Op1) && "type mismatch"); + return buildBinaryOp(TargetOpcode::G_AND, Res, Op0, Op1); +} - return buildInstr(TargetOpcode::G_AND) - .addDef(Res) - .addUse(Op0) - .addUse(Op1); +MachineInstrBuilder MachineIRBuilder::buildOr(unsigned Res, unsigned Op0, + unsigned Op1) { + return buildBinaryOp(TargetOpcode::G_OR, Res, Op0, Op1); } MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) { diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp index 3746b74e0528a..f9ba4ffa6527c 100644 --- a/lib/CodeGen/LiveRegUnits.cpp +++ b/lib/CodeGen/LiveRegUnits.cpp @@ -67,7 +67,7 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) { } } -void LiveRegUnits::accumulateBackward(const MachineInstr &MI) { +void LiveRegUnits::accumulate(const MachineInstr &MI) { // Add defs, uses and regmask clobbers to the set. for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { if (O->isReg()) { diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp index 1f1ce6e8d7250..58a655a4dee4f 100644 --- a/lib/CodeGen/MIRParser/MILexer.cpp +++ b/lib/CodeGen/MIRParser/MILexer.cpp @@ -365,6 +365,14 @@ static Cursor maybeLexIRValue(Cursor C, MIToken &Token, return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback); } +static Cursor maybeLexStringConstant(Cursor C, MIToken &Token, + ErrorCallbackType ErrorCallback) { + if (C.peek() != '"') + return None; + return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0, + ErrorCallback); +} + static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { auto Range = C; C.advance(); // Skip '%' @@ -630,6 +638,8 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token, return R.remaining(); if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback)) return R.remaining(); + if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback)) + return R.remaining(); Token.reset(MIToken::Error, C.remaining()); ErrorCallback(C.location(), diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h index 3e9513111bf4f..08b82e59c4fc1 100644 --- a/lib/CodeGen/MIRParser/MILexer.h +++ b/lib/CodeGen/MIRParser/MILexer.h @@ -127,7 +127,8 @@ struct MIToken { NamedIRValue, IRValue, QuotedIRValue, // `<constant value>` - SubRegisterIndex + SubRegisterIndex, + StringConstant }; private: @@ -168,7 +169,8 @@ public: bool isMemoryOperandFlag() const { return Kind == kw_volatile || Kind == kw_non_temporal || - Kind == kw_dereferenceable || Kind == kw_invariant; + Kind == kw_dereferenceable || Kind == kw_invariant || + Kind == StringConstant; } bool is(TokenKind K) const { return Kind == K; } diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index c58d192284dd0..c68d87b15a317 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -141,6 +141,8 @@ class MIParser { StringMap<unsigned> Names2DirectTargetFlags; /// Maps from direct target flag names to the bitmask target flag values. StringMap<unsigned> Names2BitmaskTargetFlags; + /// Maps from MMO target flag names to MMO target flag values. + StringMap<MachineMemOperand::Flags> Names2MMOTargetFlags; public: MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error, @@ -229,6 +231,7 @@ public: bool parseMemoryOperandFlag(MachineMemOperand::Flags &Flags); bool parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV); bool parseMachinePointerInfo(MachinePointerInfo &Dest); + bool parseOptionalScope(LLVMContext &Context, SyncScope::ID &SSID); bool parseOptionalAtomicOrdering(AtomicOrdering &Order); bool parseMachineMemoryOperand(MachineMemOperand *&Dest); @@ -318,6 +321,18 @@ private: /// /// Return true if the name isn't a name of a bitmask target flag. bool getBitmaskTargetFlag(StringRef Name, unsigned &Flag); + + void initNames2MMOTargetFlags(); + + /// Try to convert a name of a MachineMemOperand target flag to the + /// corresponding target flag. + /// + /// Return true if the name isn't a name of a target MMO flag. + bool getMMOTargetFlag(StringRef Name, MachineMemOperand::Flags &Flag); + + /// parseStringConstant + /// ::= StringConstant + bool parseStringConstant(std::string &Result); }; } // end anonymous namespace @@ -2034,7 +2049,14 @@ bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) { case MIToken::kw_invariant: Flags |= MachineMemOperand::MOInvariant; break; - // TODO: parse the target specific memory operand flags. + case MIToken::StringConstant: { + MachineMemOperand::Flags TF; + if (getMMOTargetFlag(Token.stringValue(), TF)) + return error("use of undefined target MMO flag '" + Token.stringValue() + + "'"); + Flags |= TF; + break; + } default: llvm_unreachable("The current token should be a memory operand flag"); } @@ -2135,6 +2157,26 @@ bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) { return false; } +bool MIParser::parseOptionalScope(LLVMContext &Context, + SyncScope::ID &SSID) { + SSID = SyncScope::System; + if (Token.is(MIToken::Identifier) && Token.stringValue() == "syncscope") { + lex(); + if (expectAndConsume(MIToken::lparen)) + return error("expected '(' in syncscope"); + + std::string SSN; + if (parseStringConstant(SSN)) + return true; + + SSID = Context.getOrInsertSyncScopeID(SSN); + if (expectAndConsume(MIToken::rparen)) + return error("expected ')' in syncscope"); + } + + return false; +} + bool MIParser::parseOptionalAtomicOrdering(AtomicOrdering &Order) { Order = AtomicOrdering::NotAtomic; if (Token.isNot(MIToken::Identifier)) @@ -2174,12 +2216,10 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { Flags |= MachineMemOperand::MOStore; lex(); - // Optional "singlethread" scope. - SynchronizationScope Scope = SynchronizationScope::CrossThread; - if (Token.is(MIToken::Identifier) && Token.stringValue() == "singlethread") { - Scope = SynchronizationScope::SingleThread; - lex(); - } + // Optional synchronization scope. + SyncScope::ID SSID; + if (parseOptionalScope(MF.getFunction()->getContext(), SSID)) + return true; // Up to two atomic orderings (cmpxchg provides guarantees on failure). AtomicOrdering Order, FailureOrder; @@ -2244,7 +2284,7 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { if (expectAndConsume(MIToken::rparen)) return true; Dest = MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range, - Scope, Order, FailureOrder); + SSID, Order, FailureOrder); return false; } @@ -2457,6 +2497,35 @@ bool MIParser::getBitmaskTargetFlag(StringRef Name, unsigned &Flag) { return false; } +void MIParser::initNames2MMOTargetFlags() { + if (!Names2MMOTargetFlags.empty()) + return; + const auto *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Flags = TII->getSerializableMachineMemOperandTargetFlags(); + for (const auto &I : Flags) + Names2MMOTargetFlags.insert( + std::make_pair(StringRef(I.second), I.first)); +} + +bool MIParser::getMMOTargetFlag(StringRef Name, + MachineMemOperand::Flags &Flag) { + initNames2MMOTargetFlags(); + auto FlagInfo = Names2MMOTargetFlags.find(Name); + if (FlagInfo == Names2MMOTargetFlags.end()) + return true; + Flag = FlagInfo->second; + return false; +} + +bool MIParser::parseStringConstant(std::string &Result) { + if (Token.isNot(MIToken::StringConstant)) + return error("expected string constant"); + Result = Token.stringValue(); + lex(); + return false; +} + bool llvm::parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS, StringRef Src, SMDiagnostic &Error) { diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp index c524a9835f338..ddeacf1d1bfb1 100644 --- a/lib/CodeGen/MIRPrinter.cpp +++ b/lib/CodeGen/MIRPrinter.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" @@ -139,6 +140,8 @@ class MIPrinter { ModuleSlotTracker &MST; const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds; const DenseMap<int, FrameIndexOperand> &StackObjectOperandMapping; + /// Synchronization scope names registered with LLVMContext. + SmallVector<StringRef, 8> SSNs; bool canPredictBranchProbabilities(const MachineBasicBlock &MBB) const; bool canPredictSuccessors(const MachineBasicBlock &MBB) const; @@ -162,7 +165,9 @@ public: void print(const MachineOperand &Op, const TargetRegisterInfo *TRI, unsigned I, bool ShouldPrintRegisterTies, LLT TypeToPrint, bool IsDef = false); - void print(const MachineMemOperand &Op); + void print(const LLVMContext &Context, const TargetInstrInfo &TII, + const MachineMemOperand &Op); + void printSyncScope(const LLVMContext &Context, SyncScope::ID SSID); void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI); }; @@ -731,11 +736,12 @@ void MIPrinter::print(const MachineInstr &MI) { if (!MI.memoperands_empty()) { OS << " :: "; + const LLVMContext &Context = MF->getFunction()->getContext(); bool NeedComma = false; for (const auto *Op : MI.memoperands()) { if (NeedComma) OS << ", "; - print(*Op); + print(Context, *TII, *Op); NeedComma = true; } } @@ -1031,9 +1037,20 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, } } -void MIPrinter::print(const MachineMemOperand &Op) { +static const char *getTargetMMOFlagName(const TargetInstrInfo &TII, + unsigned TMMOFlag) { + auto Flags = TII.getSerializableMachineMemOperandTargetFlags(); + for (const auto &I : Flags) { + if (I.first == TMMOFlag) { + return I.second; + } + } + return nullptr; +} + +void MIPrinter::print(const LLVMContext &Context, const TargetInstrInfo &TII, + const MachineMemOperand &Op) { OS << '('; - // TODO: Print operand's target specific flags. if (Op.isVolatile()) OS << "volatile "; if (Op.isNonTemporal()) @@ -1042,6 +1059,15 @@ void MIPrinter::print(const MachineMemOperand &Op) { OS << "dereferenceable "; if (Op.isInvariant()) OS << "invariant "; + if (Op.getFlags() & MachineMemOperand::MOTargetFlag1) + OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag1) + << "\" "; + if (Op.getFlags() & MachineMemOperand::MOTargetFlag2) + OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag2) + << "\" "; + if (Op.getFlags() & MachineMemOperand::MOTargetFlag3) + OS << '"' << getTargetMMOFlagName(TII, MachineMemOperand::MOTargetFlag3) + << "\" "; if (Op.isLoad()) OS << "load "; else { @@ -1049,8 +1075,7 @@ void MIPrinter::print(const MachineMemOperand &Op) { OS << "store "; } - if (Op.getSynchScope() == SynchronizationScope::SingleThread) - OS << "singlethread "; + printSyncScope(Context, Op.getSyncScopeID()); if (Op.getOrdering() != AtomicOrdering::NotAtomic) OS << toIRString(Op.getOrdering()) << ' '; @@ -1119,6 +1144,23 @@ void MIPrinter::print(const MachineMemOperand &Op) { OS << ')'; } +void MIPrinter::printSyncScope(const LLVMContext &Context, SyncScope::ID SSID) { + switch (SSID) { + case SyncScope::System: { + break; + } + default: { + if (SSNs.empty()) + Context.getSyncScopeNames(SSNs); + + OS << "syncscope(\""; + PrintEscapedString(SSNs[SSID], OS); + OS << "\") "; + break; + } + } +} + static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS, const TargetRegisterInfo *TRI) { int Reg = TRI->getLLVMRegNum(DwarfReg, true); diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 2d4b95974cc64..447ad629885bf 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -1917,6 +1917,12 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, return; MachineBasicBlock *Top = *LoopChain.begin(); + MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); + + // If ExitingBB is already the last one in a chain then nothing to do. + if (Bottom == ExitingBB) + return; + bool ViableTopFallthrough = false; for (MachineBasicBlock *Pred : Top->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; @@ -1931,7 +1937,6 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, // bottom is a viable exiting block. If so, bail out as rotating will // introduce an unnecessary branch. if (ViableTopFallthrough) { - MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); for (MachineBasicBlock *Succ : Bottom->successors()) { BlockChain *SuccChain = BlockToChain[Succ]; if (!LoopBlockSet.count(Succ) && @@ -1944,6 +1949,36 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, if (ExitIt == LoopChain.end()) return; + // Rotating a loop exit to the bottom when there is a fallthrough to top + // trades the entry fallthrough for an exit fallthrough. + // If there is no bottom->top edge, but the chosen exit block does have + // a fallthrough, we break that fallthrough for nothing in return. + + // Let's consider an example. We have a built chain of basic blocks + // B1, B2, ..., Bn, where Bk is a ExitingBB - chosen exit block. + // By doing a rotation we get + // Bk+1, ..., Bn, B1, ..., Bk + // Break of fallthrough to B1 is compensated by a fallthrough from Bk. + // If we had a fallthrough Bk -> Bk+1 it is broken now. + // It might be compensated by fallthrough Bn -> B1. + // So we have a condition to avoid creation of extra branch by loop rotation. + // All below must be true to avoid loop rotation: + // If there is a fallthrough to top (B1) + // There was fallthrough from chosen exit block (Bk) to next one (Bk+1) + // There is no fallthrough from bottom (Bn) to top (B1). + // Please note that there is no exit fallthrough from Bn because we checked it + // above. + if (ViableTopFallthrough) { + assert(std::next(ExitIt) != LoopChain.end() && + "Exit should not be last BB"); + MachineBasicBlock *NextBlockInChain = *std::next(ExitIt); + if (ExitingBB->isSuccessor(NextBlockInChain)) + if (!Bottom->isSuccessor(Top)) + return; + } + + DEBUG(dbgs() << "Rotating loop to put exit " << getBlockName(ExitingBB) + << " at bottom\n"); std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end()); } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index bbdae6e1a49e5..f88e175a97762 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -305,11 +305,11 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { MachineMemOperand *MachineFunction::getMachineMemOperand( MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges, - SynchronizationScope SynchScope, AtomicOrdering Ordering, + SyncScope::ID SSID, AtomicOrdering Ordering, AtomicOrdering FailureOrdering) { return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges, - SynchScope, Ordering, FailureOrdering); + SSID, Ordering, FailureOrdering); } MachineMemOperand * @@ -320,13 +320,13 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, MachineMemOperand(MachinePointerInfo(MMO->getValue(), MMO->getOffset()+Offset), MMO->getFlags(), Size, MMO->getBaseAlignment(), - AAMDNodes(), nullptr, MMO->getSynchScope(), + AAMDNodes(), nullptr, MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering()); return new (Allocator) MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(), MMO->getOffset()+Offset), MMO->getFlags(), Size, MMO->getBaseAlignment(), - AAMDNodes(), nullptr, MMO->getSynchScope(), + AAMDNodes(), nullptr, MMO->getSyncScopeID(), MMO->getOrdering(), MMO->getFailureOrdering()); } @@ -359,7 +359,7 @@ MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin, (*I)->getFlags() & ~MachineMemOperand::MOStore, (*I)->getSize(), (*I)->getBaseAlignment(), (*I)->getAAInfo(), nullptr, - (*I)->getSynchScope(), (*I)->getOrdering(), + (*I)->getSyncScopeID(), (*I)->getOrdering(), (*I)->getFailureOrdering()); Result[Index] = JustLoad; } @@ -393,7 +393,7 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, (*I)->getFlags() & ~MachineMemOperand::MOLoad, (*I)->getSize(), (*I)->getBaseAlignment(), (*I)->getAAInfo(), nullptr, - (*I)->getSynchScope(), (*I)->getOrdering(), + (*I)->getSyncScopeID(), (*I)->getOrdering(), (*I)->getFailureOrdering()); Result[Index] = JustStore; } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index 81c6dace92e04..afea5575a3ae5 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -447,6 +447,14 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, SmallString<16> Str; getFPImm()->getValueAPF().toString(Str); OS << "quad " << Str; + } else if (getFPImm()->getType()->isX86_FP80Ty()) { + APFloat APF = getFPImm()->getValueAPF(); + OS << "x86_fp80 0xK"; + APInt API = APF.bitcastToAPInt(); + OS << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4, + /*Upper=*/true); + OS << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, + /*Upper=*/true); } else { OS << getFPImm()->getValueAPF().convertToDouble(); } @@ -606,7 +614,7 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, uint64_t s, unsigned int a, const AAMDNodes &AAInfo, const MDNode *Ranges, - SynchronizationScope SynchScope, + SyncScope::ID SSID, AtomicOrdering Ordering, AtomicOrdering FailureOrdering) : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1), @@ -617,8 +625,8 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f, assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); assert((isLoad() || isStore()) && "Not a load/store!"); - AtomicInfo.SynchScope = static_cast<unsigned>(SynchScope); - assert(getSynchScope() == SynchScope && "Value truncated"); + AtomicInfo.SSID = static_cast<unsigned>(SSID); + assert(getSyncScopeID() == SSID && "Value truncated"); AtomicInfo.Ordering = static_cast<unsigned>(Ordering); assert(getOrdering() == Ordering && "Value truncated"); AtomicInfo.FailureOrdering = static_cast<unsigned>(FailureOrdering); @@ -744,6 +752,12 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const { OS << "(dereferenceable)"; if (isInvariant()) OS << "(invariant)"; + if (getFlags() & MOTargetFlag1) + OS << "(flag1)"; + if (getFlags() & MOTargetFlag2) + OS << "(flag2)"; + if (getFlags() & MOTargetFlag3) + OS << "(flag3)"; } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index e65c256c1bb5a..fcb544806dda0 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -985,6 +985,14 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { report("Operand should be tied", MO, MONum); else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum)) report("Tied def doesn't match MCInstrDesc", MO, MONum); + else if (TargetRegisterInfo::isPhysicalRegister(MO->getReg())) { + const MachineOperand &MOTied = MI->getOperand(TiedTo); + if (!MOTied.isReg()) + report("Tied counterpart must be a register", &MOTied, TiedTo); + else if (TargetRegisterInfo::isPhysicalRegister(MOTied.getReg()) && + MO->getReg() != MOTied.getReg()) + report("Tied physical registers must match.", &MOTied, TiedTo); + } } else if (MO->isReg() && MO->isTied()) report("Explicit operand should not be tied", MO, MONum); } else { diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp index 5e279b065bbda..633a853b2c748 100644 --- a/lib/CodeGen/MacroFusion.cpp +++ b/lib/CodeGen/MacroFusion.cpp @@ -24,7 +24,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" STATISTIC(NumFused, "Number of instr pairs fused"); diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp index 425a59dc03752..4a50d895340a5 100644 --- a/lib/CodeGen/PostRAHazardRecognizer.cpp +++ b/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -23,7 +23,7 @@ /// This pass traverses all the instructions in a program in top-down order. /// In contrast to the instruction scheduling passes, this pass never resets /// the hazard recognizer to ensure it can correctly handles noop hazards at -/// the begining of blocks. +/// the beginning of blocks. // //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index c606b7b833104..d5538be4bba25 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -203,6 +203,8 @@ namespace { char RAFast::ID = 0; } +INITIALIZE_PASS(RAFast, "regallocfast", "Fast Register Allocator", false, false) + /// getStackSpaceFor - This allocates space for the specified virtual register /// to be held on the stack. int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { @@ -244,8 +246,15 @@ void RAFast::addKillFlag(const LiveReg &LR) { if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) { if (MO.getReg() == LR.PhysReg) MO.setIsKill(); - else - LR.LastUse->addRegisterKilled(LR.PhysReg, TRI, true); + // else, don't do anything we are problably redefining a + // subreg of this register and given we don't track which + // lanes are actually dead, we cannot insert a kill flag here. + // Otherwise we may end up in a situation like this: + // ... = (MO) physreg:sub1, physreg <implicit-use, kill> + // ... <== Here we would allow later pass to reuse physreg:sub1 + // which is potentially wrong. + // LR:sub0 = ... + // ... = LR.sub1 <== This is going to use physreg:sub1 } } diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 9562652556acb..020e81eca2dd2 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -2458,7 +2458,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { do { Reg = RecoloringCandidates.pop_back_val(); - // We cannot recolor physcal register. + // We cannot recolor physical register. if (TargetRegisterInfo::isPhysicalRegister(Reg)) continue; diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index e3baff4be4bcf..9778103575fab 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -924,5 +924,3 @@ FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) { FunctionPass* llvm::createDefaultPBQPRegisterAllocator() { return createPBQPRegisterAllocator(); } - -#undef DEBUG_TYPE diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index ff9bca092dbe5..a67d07b36474a 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -1227,6 +1227,34 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, SR->createDeadDef(DefIndex, Alloc); } } + + // Make sure that the subrange for resultant undef is removed + // For example: + // vreg1:sub1<def,read-undef> = LOAD CONSTANT 1 + // vreg2<def> = COPY vreg1 + // ==> + // vreg2:sub1<def, read-undef> = LOAD CONSTANT 1 + // ; Correct but need to remove the subrange for vreg2:sub0 + // ; as it is now undef + if (NewIdx != 0 && DstInt.hasSubRanges()) { + // The affected subregister segments can be removed. + SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI); + LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx); + bool UpdatedSubRanges = false; + for (LiveInterval::SubRange &SR : DstInt.subranges()) { + if ((SR.LaneMask & DstMask).none()) { + DEBUG(dbgs() << "Removing undefined SubRange " + << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n"); + // VNI is in ValNo - remove any segments in this SubRange that have this ValNo + if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) { + SR.removeValNo(RmValNo); + UpdatedSubRanges = true; + } + } + } + if (UpdatedSubRanges) + DstInt.removeEmptySubRanges(); + } } else if (NewMI.getOperand(0).getReg() != CopyDstReg) { // The New instruction may be defining a sub-register of what's actually // been asked for. If so it must implicitly define the whole thing. diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 05e641d9489d9..fc5105aadbffd 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -375,7 +375,8 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, static std::pair<MCPhysReg, MachineBasicBlock::iterator> findSurvivorBackwards(const MachineRegisterInfo &MRI, MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, - const LiveRegUnits &LiveOut, ArrayRef<MCPhysReg> AllocationOrder) { + const LiveRegUnits &LiveOut, ArrayRef<MCPhysReg> AllocationOrder, + bool RestoreAfter) { bool FoundTo = false; MCPhysReg Survivor = 0; MachineBasicBlock::iterator Pos; @@ -388,7 +389,7 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI, for (MachineBasicBlock::iterator I = From;; --I) { const MachineInstr &MI = *I; - Used.accumulateBackward(MI); + Used.accumulate(MI); if (I == To) { // See if one of the registers in RC wasn't used so far. @@ -401,6 +402,11 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI, // the register which is not defined/used for the longest time. FoundTo = true; Pos = To; + // Note: It was fine so far to start our search at From, however now that + // we have to spill, and can only place the restore after From then + // add the regs used/defed by std::next(From) to the set. + if (RestoreAfter) + Used.accumulate(*std::next(From)); } if (FoundTo) { if (Survivor == 0 || !Used.available(Survivor)) { @@ -575,7 +581,8 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator UseMI; ArrayRef<MCPhysReg> AllocationOrder = RC.getRawAllocationOrder(MF); std::pair<MCPhysReg, MachineBasicBlock::iterator> P = - findSurvivorBackwards(*MRI, MBBI, To, LiveUnits, AllocationOrder); + findSurvivorBackwards(*MRI, MBBI, To, LiveUnits, AllocationOrder, + RestoreAfter); MCPhysReg Reg = P.first; MachineBasicBlock::iterator SpillBefore = P.second; assert(Reg != 0 && "No register left to scavenge!"); @@ -626,7 +633,7 @@ static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS, assert(RealDef != nullptr && "Must have at least 1 Def"); #endif - // We should only have one definition of the register. However to accomodate + // We should only have one definition of the register. However to accommodate // the requirements of two address code we also allow definitions in // subsequent instructions provided they also read the register. That way // we get a single contiguous lifetime. diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 3cd270cec3a6d..5e95f760aaa24 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -67,6 +67,41 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { return &TII->get(Node->getMachineOpcode()); } +LLVM_DUMP_METHOD +raw_ostream &SDep::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { + switch (getKind()) { + case Data: OS << "Data"; break; + case Anti: OS << "Anti"; break; + case Output: OS << "Out "; break; + case Order: OS << "Ord "; break; + } + + switch (getKind()) { + case Data: + OS << " Latency=" << getLatency(); + if (TRI && isAssignedRegDep()) + OS << " Reg=" << PrintReg(getReg(), TRI); + break; + case Anti: + case Output: + OS << " Latency=" << getLatency(); + break; + case Order: + OS << " Latency=" << getLatency(); + switch(Contents.OrdKind) { + case Barrier: OS << " Barrier"; break; + case MayAliasMem: + case MustAliasMem: OS << " Memory"; break; + case Artificial: OS << " Artificial"; break; + case Weak: OS << " Weak"; break; + case Cluster: OS << " Cluster"; break; + } + break; + } + + return OS; +} + bool SUnit::addPred(const SDep &D, bool Required) { // If this node already has this dependence, don't add a redundant one. for (SDep &PredDep : Preds) { @@ -302,16 +337,24 @@ void SUnit::biasCriticalPath() { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD -void SUnit::print(raw_ostream &OS, const ScheduleDAG *DAG) const { - if (this == &DAG->ExitSU) - OS << "ExitSU"; - else if (this == &DAG->EntrySU) +raw_ostream &SUnit::print(raw_ostream &OS, + const SUnit *Entry, const SUnit *Exit) const { + if (this == Entry) OS << "EntrySU"; + else if (this == Exit) + OS << "ExitSU"; else OS << "SU(" << NodeNum << ")"; + return OS; +} + +LLVM_DUMP_METHOD +raw_ostream &SUnit::print(raw_ostream &OS, const ScheduleDAG *G) const { + return print(OS, &G->EntrySU, &G->ExitSU); } -LLVM_DUMP_METHOD void SUnit::dump(const ScheduleDAG *G) const { +LLVM_DUMP_METHOD +void SUnit::dump(const ScheduleDAG *G) const { print(dbgs(), G); dbgs() << ": "; G->dumpNode(this); @@ -333,40 +376,18 @@ LLVM_DUMP_METHOD void SUnit::dumpAll(const ScheduleDAG *G) const { if (Preds.size() != 0) { dbgs() << " Predecessors:\n"; - for (const SDep &SuccDep : Preds) { - dbgs() << " "; - switch (SuccDep.getKind()) { - case SDep::Data: dbgs() << "data "; break; - case SDep::Anti: dbgs() << "anti "; break; - case SDep::Output: dbgs() << "out "; break; - case SDep::Order: dbgs() << "ord "; break; - } - SuccDep.getSUnit()->print(dbgs(), G); - if (SuccDep.isArtificial()) - dbgs() << " *"; - dbgs() << ": Latency=" << SuccDep.getLatency(); - if (SuccDep.isAssignedRegDep()) - dbgs() << " Reg=" << PrintReg(SuccDep.getReg(), G->TRI); - dbgs() << "\n"; + for (const SDep &Dep : Preds) { + dbgs() << " "; + Dep.getSUnit()->print(dbgs(), G); dbgs() << ": "; + Dep.print(dbgs(), G->TRI); dbgs() << '\n'; } } if (Succs.size() != 0) { dbgs() << " Successors:\n"; - for (const SDep &SuccDep : Succs) { - dbgs() << " "; - switch (SuccDep.getKind()) { - case SDep::Data: dbgs() << "data "; break; - case SDep::Anti: dbgs() << "anti "; break; - case SDep::Output: dbgs() << "out "; break; - case SDep::Order: dbgs() << "ord "; break; - } - SuccDep.getSUnit()->print(dbgs(), G); - if (SuccDep.isArtificial()) - dbgs() << " *"; - dbgs() << ": Latency=" << SuccDep.getLatency(); - if (SuccDep.isAssignedRegDep()) - dbgs() << " Reg=" << PrintReg(SuccDep.getReg(), G->TRI); - dbgs() << "\n"; + for (const SDep &Dep : Succs) { + dbgs() << " "; + Dep.getSUnit()->print(dbgs(), G); dbgs() << ": "; + Dep.print(dbgs(), G->TRI); dbgs() << '\n'; } } } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 0f70b0e9ca077..ccd937950a743 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -63,7 +63,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::ZeroOrMore, cl::init(false), diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d901af7276860..71382c18fdf9d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -400,6 +400,7 @@ namespace { SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); SDValue reduceBuildVecToShuffle(SDNode *N); + SDValue reduceBuildVecToTrunc(SDNode *N); SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef<int> VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx); @@ -5267,14 +5268,40 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { } SDValue DAGCombiner::visitRotate(SDNode *N) { + SDLoc dl(N); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + + // fold (rot x, 0) -> x + if (isNullConstantOrNullSplatConstant(N1)) + return N0; + // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). - if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && - N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { - if (SDValue NewOp1 = - distributeTruncateThroughAnd(N->getOperand(1).getNode())) - return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), - N->getOperand(0), NewOp1); - } + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND) { + if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) + return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1); + } + + unsigned NextOp = N0.getOpcode(); + // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize) + if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) + if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) + if (SDNode *C2 = + DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { + bool SameSide = (N->getOpcode() == NextOp); + unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB; + if (SDValue CombinedShift = + DAG.FoldConstantArithmetic(CombineOp, dl, VT, C1, C2)) { + unsigned Bitsize = VT.getScalarSizeInBits(); + SDValue BitsizeC = DAG.getConstant(Bitsize, dl, VT); + SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( + ISD::SREM, dl, VT, CombinedShift.getNode(), BitsizeC.getNode()); + return DAG.getNode( + N->getOpcode(), dl, VT, N0->getOperand(0), CombinedShiftNorm); + } + } return SDValue(); } @@ -6091,19 +6118,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N2 = N->getOperand(2); EVT VT = N->getValueType(0); EVT VT0 = N0.getValueType(); + SDLoc DL(N); // fold (select C, X, X) -> X if (N1 == N2) return N1; + if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) { // fold (select true, X, Y) -> X // fold (select false, X, Y) -> Y return !N0C->isNullValue() ? N1 : N2; } + // fold (select X, X, Y) -> (or X, Y) // fold (select X, 1, Y) -> (or C, Y) if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); + return DAG.getNode(ISD::OR, DL, VT, N0, N2); if (SDValue V = foldSelectOfConstants(N)) return V; @@ -6112,22 +6142,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorklist(NOTNode.getNode()); - return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); + return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2); } // fold (select C, X, 1) -> (or (not C), X) if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) { SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorklist(NOTNode.getNode()); - return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); + return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1); } // fold (select X, Y, X) -> (and X, Y) // fold (select X, Y, 0) -> (and X, Y) if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) - return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); + return DAG.getNode(ISD::AND, DL, VT, N0, N1); // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N1, N2)) - return SDValue(N, 0); // Don't revisit N. + return SDValue(N, 0); // Don't revisit N. if (VT0 == MVT::i1) { // The code in this block deals with the following 2 equivalences: @@ -6138,27 +6168,27 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // to the right anyway if we find the inner select exists in the DAG anyway // and we always transform to the left side if we know that we can further // optimize the combination of the conditions. - bool normalizeToSequence - = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); + bool normalizeToSequence = + TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); // select (and Cond0, Cond1), X, Y // -> select Cond0, (select Cond1, X, Y), Y if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { SDValue Cond0 = N0->getOperand(0); SDValue Cond1 = N0->getOperand(1); - SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), - N1.getValueType(), Cond1, N1, N2); + SDValue InnerSelect = + DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2); if (normalizeToSequence || !InnerSelect.use_empty()) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, InnerSelect, N2); } // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { SDValue Cond0 = N0->getOperand(0); SDValue Cond1 = N0->getOperand(1); - SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), - N1.getValueType(), Cond1, N1, N2); + SDValue InnerSelect = + DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2); if (normalizeToSequence || !InnerSelect.use_empty()) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1, InnerSelect); } @@ -6170,15 +6200,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { // Create the actual and node if we can generate good code for it. if (!normalizeToSequence) { - SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), - N0, N1_0); - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, - N1_1, N2); + SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2); } // Otherwise see if we can optimize the "and" to a better pattern. if (SDValue Combined = visitANDLike(N0, N1_0, N)) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, - N1_1, N2); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1, + N2); } } // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y @@ -6189,15 +6217,13 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { // Create the actual or node if we can generate good code for it. if (!normalizeToSequence) { - SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), - N0, N2_0); - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, - N1, N2_2); + SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2); } // Otherwise see if we can optimize to a better pattern. if (SDValue Combined = visitORLike(N0, N2_0, N)) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, - N1, N2_2); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1, + N2_2); } } } @@ -6208,8 +6234,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) { SDValue Cond0 = N0->getOperand(0); if (C->isOne()) - return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), - Cond0, N2, N1); + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1); } } } @@ -6226,24 +6251,21 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // FIXME: Instead of testing for UnsafeFPMath, this should be checking for // no signed zeros as well as no nans. const TargetOptions &Options = DAG.getTarget().Options; - if (Options.UnsafeFPMath && - VT.isFloatingPoint() && N0.hasOneUse() && + if (Options.UnsafeFPMath && VT.isFloatingPoint() && N0.hasOneUse() && DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); - if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), - N0.getOperand(1), N1, N2, CC, - TLI, DAG)) + if (SDValue FMinMax = combineMinNumMaxNum( + DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) return FMinMax; } if ((!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || TLI.isOperationLegal(ISD::SELECT_CC, VT)) - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, - N0.getOperand(0), N0.getOperand(1), - N1, N2, N0.getOperand(2)); - return SimplifySelect(SDLoc(N), N0, N1, N2); + return DAG.getNode(ISD::SELECT_CC, DL, VT, N0.getOperand(0), + N0.getOperand(1), N1, N2, N0.getOperand(2)); + return SimplifySelect(DL, N0, N1, N2); } return SDValue(); @@ -11045,7 +11067,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) // // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the - // indexed load/store and the expresion that needs to be re-written. + // indexed load/store and the expression that needs to be re-written. // // Therefore, we have: // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 @@ -11379,7 +11401,7 @@ namespace { /// Shift = srl Ty1 Origin, CstTy Amount /// Inst = trunc Shift to Ty2 /// -/// Then, it will be rewriten into: +/// Then, it will be rewritten into: /// Slice = load SliceTy, Base + SliceOffset /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 /// @@ -12694,7 +12716,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); bool IsFast = false; if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { @@ -12706,7 +12728,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) && TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { @@ -12723,7 +12745,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { !NoVectors) { // Find a legal type for the vector store. EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1); - if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty) && + if (TLI.isTypeLegal(Ty) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) @@ -12781,7 +12804,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); bool IsFast; - if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty) && + if (TLI.isTypeLegal(Ty) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) @@ -12898,7 +12922,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1); bool IsFastSt, IsFastLd; if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFastSt) && IsFastSt && @@ -12912,7 +12936,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; StoreTy = EVT::getIntegerVT(Context, SizeInBits); if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFastSt) && IsFastSt && @@ -12926,7 +12950,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) && TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, @@ -14228,6 +14252,73 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { return Shuffles[0]; } +// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT +// operations which can be matched to a truncate. +SDValue DAGCombiner::reduceBuildVecToTrunc(SDNode *N) { + // TODO: Add support for big-endian. + if (DAG.getDataLayout().isBigEndian()) + return SDValue(); + if (N->getNumOperands() < 2) + return SDValue(); + SDLoc DL(N); + EVT VT = N->getValueType(0); + unsigned NumElems = N->getNumOperands(); + + if (!isTypeLegal(VT)) + return SDValue(); + + // If the input is something other than an EXTRACT_VECTOR_ELT with a constant + // index, bail out. + // TODO: Allow undef elements in some cases? + if (any_of(N->ops(), [VT](SDValue Op) { + return Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(Op.getOperand(1)) || + Op.getValueType() != VT.getVectorElementType(); + })) + return SDValue(); + + // Helper for obtaining an EXTRACT_VECTOR_ELT's constant index + auto GetExtractIdx = [](SDValue Extract) { + return cast<ConstantSDNode>(Extract.getOperand(1))->getSExtValue(); + }; + + // The first BUILD_VECTOR operand must be an an extract from index zero + // (assuming no undef and little-endian). + if (GetExtractIdx(N->getOperand(0)) != 0) + return SDValue(); + + // Compute the stride from the first index. + int Stride = GetExtractIdx(N->getOperand(1)); + SDValue ExtractedFromVec = N->getOperand(0).getOperand(0); + + // Proceed only if the stride and the types can be matched to a truncate. + if ((Stride == 1 || !isPowerOf2_32(Stride)) || + (ExtractedFromVec.getValueType().getVectorNumElements() != + Stride * NumElems) || + (VT.getScalarSizeInBits() * Stride > 64)) + return SDValue(); + + // Check remaining operands are consistent with the computed stride. + for (unsigned i = 1; i != NumElems; ++i) { + SDValue Op = N->getOperand(i); + + if ((Op.getOperand(0) != ExtractedFromVec) || + (GetExtractIdx(Op) != Stride * i)) + return SDValue(); + } + + // All checks were ok, construct the truncate. + LLVMContext &Ctx = *DAG.getContext(); + EVT NewVT = VT.getVectorVT( + Ctx, EVT::getIntegerVT(Ctx, VT.getScalarSizeInBits() * Stride), NumElems); + EVT TruncVT = + VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT; + + SDValue Res = DAG.getBitcast(NewVT, ExtractedFromVec); + Res = DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, Res); + return DAG.getBitcast(VT, Res); +} + SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { EVT VT = N->getValueType(0); @@ -14270,6 +14361,10 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) return V; + if (TLI.isDesirableToCombineBuildVectorToTruncate()) + if (SDValue V = reduceBuildVecToTrunc(N)) + return V; + if (SDValue V = reduceBuildVecToShuffle(N)) return V; diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index b235e19aaab29..b96c96f0b4df4 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -589,7 +589,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, } else AddOperand(MIB, N0, 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); - // Add the subregster being inserted + // Add the subregister being inserted AddOperand(MIB, N1, 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); MIB.addImm(SubIdx); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 873b2bd48f1e0..7e4bc3ccb5d39 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1991,7 +1991,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, std::move(Args)) .setTailCall(isTailCall) .setSExtResult(isSigned) - .setZExtResult(!isSigned); + .setZExtResult(!isSigned) + .setIsPostTypeLegalization(true); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2029,7 +2030,8 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setSExtResult(isSigned) - .setZExtResult(!isSigned); + .setZExtResult(!isSigned) + .setIsPostTypeLegalization(true); std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -3565,16 +3567,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue Args[] = { HiLHS, LHS, HiRHS, RHS }; Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); } - BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, - DAG.getIntPtrConstant(0, dl)); - TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, - DAG.getIntPtrConstant(1, dl)); - // Ret is a node with an illegal type. Because such things are not - // generally permitted during this phase of legalization, make sure the - // node has no more uses. The above EXTRACT_ELEMENT nodes should have been - // folded. - assert(Ret->use_empty() && - "Unexpected uses of illegally type from expanded lib call."); + assert(Ret.getOpcode() == ISD::MERGE_VALUES && + "Ret value is a collection of constituent nodes holding result."); + BottomHalf = Ret.getOperand(0); + TopHalf = Ret.getOperand(1); } if (isSigned) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index c1cb5d9b5235e..eaf177d0661b3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -112,15 +112,15 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break; } - // If R is null, the sub-method took care of registering the result. - if (R.getNode()) { + if (R.getNode() && R.getNode() != N) { SetSoftenedFloat(SDValue(N, ResNo), R); - ReplaceSoftenFloatResult(N, ResNo, R); + // Return true only if the node is changed, assuming that the operands + // are also converted when necessary. + return true; } - // Return true only if the node is changed, - // assuming that the operands are also converted when necessary. + // Otherwise, return false to tell caller to scan operands. - return R.getNode() && R.getNode() != N; + return false; } SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) { @@ -753,12 +753,17 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { llvm_unreachable("Do not know how to soften this operator's operand!"); case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; + case ISD::CopyToReg: Res = SoftenFloatOp_COPY_TO_REG(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; + case ISD::FABS: Res = SoftenFloatOp_FABS(N); break; + case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break; + case ISD::FNEG: Res = SoftenFloatOp_FNEG(N); break; case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break; + case ISD::SELECT: Res = SoftenFloatOp_SELECT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; case ISD::STORE: @@ -791,9 +796,9 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { if (!isLegalInHWReg(N->getOperand(OpNo).getValueType())) return false; - // When the operand type can be kept in registers, SoftenFloatResult - // will call ReplaceValueWith to replace all references and we can - // skip softening this operand. + + // When the operand type can be kept in registers there is nothing to do for + // the following opcodes. switch (N->getOperand(OpNo).getOpcode()) { case ISD::BITCAST: case ISD::ConstantFP: @@ -807,18 +812,12 @@ bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::SELECT_CC: return true; } - // For some opcodes, SoftenFloatResult handles all conversion of softening - // and replacing operands, so that there is no need to soften operands - // again, although such opcode could be scanned for other illegal operands. + switch (N->getOpcode()) { - case ISD::ConstantFP: - case ISD::CopyFromReg: - case ISD::CopyToReg: - case ISD::FABS: - case ISD::FCOPYSIGN: - case ISD::FNEG: - case ISD::Register: - case ISD::SELECT: + case ISD::ConstantFP: // Leaf node. + case ISD::CopyFromReg: // Operand is a register that we know to be left + // unchanged by SoftenFloatResult(). + case ISD::Register: // Leaf node. return true; } return false; @@ -829,6 +828,21 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { GetSoftenedFloat(N->getOperand(0))); } +SDValue DAGTypeLegalizer::SoftenFloatOp_COPY_TO_REG(SDNode *N) { + SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); + SDValue Op2 = GetSoftenedFloat(N->getOperand(2)); + + if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2)) + return SDValue(); + + if (N->getNumOperands() == 3) + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), 0); + + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2, + N->getOperand(3)), + 0); +} + SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { // If we get here, the result must be legal but the source illegal. EVT SVT = N->getOperand(0).getValueType(); @@ -884,6 +898,34 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { 0); } +SDValue DAGTypeLegalizer::SoftenFloatOp_FABS(SDNode *N) { + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + + if (Op == N->getOperand(0)) + return SDValue(); + + return SDValue(DAG.UpdateNodeOperands(N, Op), 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) { + SDValue Op0 = GetSoftenedFloat(N->getOperand(0)); + SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); + + if (Op0 == N->getOperand(0) && Op1 == N->getOperand(1)) + return SDValue(); + + return SDValue(DAG.UpdateNodeOperands(N, Op0, Op1), 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FNEG(SDNode *N) { + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + + if (Op == N->getOperand(0)) + return SDValue(); + + return SDValue(DAG.UpdateNodeOperands(N, Op), 0); +} + SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { bool Signed = N->getOpcode() == ISD::FP_TO_SINT; EVT SVT = N->getOperand(0).getValueType(); @@ -913,6 +955,17 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res); } +SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT(SDNode *N) { + SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); + SDValue Op2 = GetSoftenedFloat(N->getOperand(2)); + + if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2)) + return SDValue(); + + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), + 0); +} + SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 154af46c94464..001eed9fb8f62 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -80,6 +80,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) { SDValue Res(&Node, i); + EVT VT = Res.getValueType(); bool Failed = false; unsigned Mapped = 0; @@ -129,13 +130,17 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { dbgs() << "Unprocessed value in a map!"; Failed = true; } - } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) { + } else if (isTypeLegal(VT) || IgnoreNodeResults(&Node)) { if (Mapped > 1) { dbgs() << "Value with legal type was transformed!"; Failed = true; } } else { - if (Mapped == 0) { + // If the value can be kept in HW registers, softening machinery can + // leave it unchanged and don't put it to any map. + if (Mapped == 0 && + !(getTypeAction(VT) == TargetLowering::TypeSoftenFloat && + isLegalInHWReg(VT))) { dbgs() << "Processed value not in any map!"; Failed = true; } else if (Mapped & (Mapped - 1)) { @@ -331,11 +336,6 @@ ScanOperands: if (NeedsReanalyzing) { assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); - // Remove any result values from SoftenedFloats as N will be revisited - // again. - for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) - SoftenedFloats.erase(SDValue(N, i)); - N->setNodeId(NewNode); // Recompute the NodeId and correct processed operands, adding the node to // the worklist if ready. @@ -754,8 +754,6 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { // new uses of From due to CSE. If this happens, replace the new uses of // From with To. } while (!From.use_empty()); - - SoftenedFloats.erase(From); } void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 8e999188d8e10..e102df5e913d9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -416,16 +416,6 @@ private: } void SetSoftenedFloat(SDValue Op, SDValue Result); - // Call ReplaceValueWith(SDValue(N, ResNo), Res) if necessary. - void ReplaceSoftenFloatResult(SDNode *N, unsigned ResNo, SDValue &NewRes) { - // When the result type can be kept in HW registers, the converted - // NewRes node could have the same type. We can save the effort in - // cloning every user of N in SoftenFloatOperand or other legalization functions, - // by calling ReplaceValueWith here to update all users. - if (NewRes.getNode() != N && isLegalInHWReg(N->getValueType(ResNo))) - ReplaceValueWith(SDValue(N, ResNo), NewRes); - } - // Convert Float Results to Integer for Non-HW-supported Operations. bool SoftenFloatResult(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); @@ -471,17 +461,23 @@ private: SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); // Return true if we can skip softening the given operand or SDNode because - // it was soften before by SoftenFloatResult and references to the operand - // were replaced by ReplaceValueWith. + // either it was soften before by SoftenFloatResult and references to the + // operand were replaced by ReplaceValueWith or it's value type is legal in HW + // registers and the operand can be left unchanged. bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo); // Convert Float Operand to Integer for Non-HW-supported Operations. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); SDValue SoftenFloatOp_BITCAST(SDNode *N); + SDValue SoftenFloatOp_COPY_TO_REG(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); + SDValue SoftenFloatOp_FABS(SDNode *N); + SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N); + SDValue SoftenFloatOp_FNEG(SDNode *N); SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); + SDValue SoftenFloatOp_SELECT(SDNode *N); SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index aa69e0e2adfce..f3306151d864b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -57,7 +57,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Expand the floating point operand only if it was converted to integers. // Otherwise, it is a legal type like f128 that can be saved in a register. auto SoftenedOp = GetSoftenedFloat(InOp); - if (SoftenedOp == InOp) + if (isLegalInHWReg(SoftenedOp.getValueType())) break; SplitInteger(SoftenedOp, Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index ff0e609803d8a..d41054b15bbcf 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2977,7 +2977,11 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, // Currently a SETCC or a AND/OR/XOR with two SETCCs are handled. unsigned InMaskOpc = InMask->getOpcode(); + + // FIXME: This code seems to be too restrictive, we might consider + // generalizing it or dropping it. assert((InMaskOpc == ISD::SETCC || + ISD::isBuildVectorOfConstantSDNodes(InMask.getNode()) || (isLogicalMaskOp(InMaskOpc) && isSETCCorConvertedSETCC(InMask->getOperand(0)) && isSETCCorConvertedSETCC(InMask->getOperand(1)))) && diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 98553152117d1..823e77850c4ba 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" @@ -5442,7 +5443,7 @@ SDValue SelectionDAG::getAtomicCmpSwap( unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, AtomicOrdering SuccessOrdering, - AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { + AtomicOrdering FailureOrdering, SyncScope::ID SSID) { assert(Opcode == ISD::ATOMIC_CMP_SWAP || Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); @@ -5458,7 +5459,7 @@ SDValue SelectionDAG::getAtomicCmpSwap( MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, - AAMDNodes(), nullptr, SynchScope, SuccessOrdering, + AAMDNodes(), nullptr, SSID, SuccessOrdering, FailureOrdering); return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO); @@ -5480,7 +5481,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, const Value *PtrVal, unsigned Alignment, AtomicOrdering Ordering, - SynchronizationScope SynchScope) { + SyncScope::ID SSID) { if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); @@ -5500,7 +5501,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, MemVT.getStoreSize(), Alignment, AAMDNodes(), - nullptr, SynchScope, Ordering); + nullptr, SSID, Ordering); return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO); } @@ -7630,45 +7631,13 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, SDValue Loc = LD->getOperand(1); SDValue BaseLoc = Base->getOperand(1); - if (Loc.getOpcode() == ISD::FrameIndex) { - if (BaseLoc.getOpcode() != ISD::FrameIndex) - return false; - const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); - int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); - int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); - int FS = MFI.getObjectSize(FI); - int BFS = MFI.getObjectSize(BFI); - if (FS != BFS || FS != (int)Bytes) return false; - return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes); - } - - // Handle X + C. - if (isBaseWithConstantOffset(Loc)) { - int64_t LocOffset = cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue(); - if (Loc.getOperand(0) == BaseLoc) { - // If the base location is a simple address with no offset itself, then - // the second load's first add operand should be the base address. - if (LocOffset == Dist * (int)Bytes) - return true; - } else if (isBaseWithConstantOffset(BaseLoc)) { - // The base location itself has an offset, so subtract that value from the - // second load's offset before comparing to distance * size. - int64_t BOffset = - cast<ConstantSDNode>(BaseLoc.getOperand(1))->getSExtValue(); - if (Loc.getOperand(0) == BaseLoc.getOperand(0)) { - if ((LocOffset - BOffset) == Dist * (int)Bytes) - return true; - } - } - } - const GlobalValue *GV1 = nullptr; - const GlobalValue *GV2 = nullptr; - int64_t Offset1 = 0; - int64_t Offset2 = 0; - bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1); - bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); - if (isGA1 && isGA2 && GV1 == GV2) - return Offset1 == (Offset2 + Dist*Bytes); + + auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this); + auto LocDecomp = BaseIndexOffset::match(Loc, *this); + + int64_t Offset = 0; + if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset)) + return (Dist * Bytes == Offset); return false; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 4e899ae6668e7..0d69441ebb7f7 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -37,13 +37,13 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); - // Match non-equal FrameIndexes - a FrameIndex stemming from an - // alloca will not have it's ObjectOffset set until post-DAG and - // as such we must assume the two framesIndices are incomparable. + // Match non-equal FrameIndexes - If both frame indices are fixed + // we know their relative offsets and can compare them. Otherwise + // we must be conservative. if (auto *A = dyn_cast<FrameIndexSDNode>(Base)) if (auto *B = dyn_cast<FrameIndexSDNode>(Other.Base)) - if (!MFI.getObjectAllocation(A->getIndex()) && - !MFI.getObjectAllocation(B->getIndex())) { + if (MFI.isFixedObjectIndex(A->getIndex()) && + MFI.isFixedObjectIndex(B->getIndex())) { Off += MFI.getObjectOffset(B->getIndex()) - MFI.getObjectOffset(A->getIndex()); return true; @@ -60,12 +60,18 @@ BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) { int64_t Offset = 0; bool IsIndexSignExt = false; - // Consume constant adds - while (Base->getOpcode() == ISD::ADD && - isa<ConstantSDNode>(Base->getOperand(1))) { - int64_t POffset = cast<ConstantSDNode>(Base->getOperand(1))->getSExtValue(); - Offset += POffset; - Base = Base->getOperand(0); + // Consume constant adds & ors with appropriate masking. + while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) { + if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) { + // Only consider ORs which act as adds. + if (Base->getOpcode() == ISD::OR && + !DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) + break; + Offset += C->getSExtValue(); + Base = Base->getOperand(0); + continue; + } + break; } if (Base->getOpcode() == ISD::ADD) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index acf68fbbdedfc..41c3f5f235eab 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3220,7 +3220,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { setValue(&I, DAG.getBuildVector(VT, DL, Ops)); } -void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { +void SelectionDAGBuilder::visitInsertValue(const User &I) { + ArrayRef<unsigned> Indices; + if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I)) + Indices = IV->getIndices(); + else + Indices = cast<ConstantExpr>(&I)->getIndices(); + const Value *Op0 = I.getOperand(0); const Value *Op1 = I.getOperand(1); Type *AggTy = I.getType(); @@ -3228,7 +3234,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { bool IntoUndef = isa<UndefValue>(Op0); bool FromUndef = isa<UndefValue>(Op1); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> AggValueVTs; @@ -3268,13 +3274,19 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { DAG.getVTList(AggValueVTs), Values)); } -void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { +void SelectionDAGBuilder::visitExtractValue(const User &I) { + ArrayRef<unsigned> Indices; + if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I)) + Indices = EV->getIndices(); + else + Indices = cast<ConstantExpr>(&I)->getIndices(); + const Value *Op0 = I.getOperand(0); Type *AggTy = Op0->getType(); Type *ValTy = I.getType(); bool OutOfUndef = isa<UndefValue>(Op0); - unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> ValValueVTs; @@ -3559,6 +3571,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { MMOFlags |= MachineMemOperand::MOInvariant; if (isDereferenceable) MMOFlags |= MachineMemOperand::MODereferenceable; + MMOFlags |= TLI.getMMOFlags(I); SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), Alignment, @@ -3688,6 +3701,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { MMOFlags |= MachineMemOperand::MOVolatile; if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr) MMOFlags |= MachineMemOperand::MONonTemporal; + MMOFlags |= TLI.getMMOFlags(I); // An aggregate load cannot wrap around the address space, so offsets to its // parts don't wrap either. @@ -3978,7 +3992,7 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrder = I.getSuccessOrdering(); AtomicOrdering FailureOrder = I.getFailureOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -3988,7 +4002,7 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), - /*Alignment=*/ 0, SuccessOrder, FailureOrder, Scope); + /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID); SDValue OutChain = L.getValue(2); @@ -4014,7 +4028,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; } AtomicOrdering Order = I.getOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -4025,7 +4039,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { getValue(I.getPointerOperand()), getValue(I.getValOperand()), I.getPointerOperand(), - /* Alignment=*/ 0, Order, Scope); + /* Alignment=*/ 0, Order, SSID); SDValue OutChain = L.getValue(1); @@ -4040,7 +4054,7 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { Ops[0] = getRoot(); Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl, TLI.getFenceOperandTy(DAG.getDataLayout())); - Ops[2] = DAG.getConstant(I.getSynchScope(), dl, + Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl, TLI.getFenceOperandTy(DAG.getDataLayout())); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } @@ -4048,7 +4062,7 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -4066,7 +4080,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { VT.getStoreSize(), I.getAlignment() ? I.getAlignment() : DAG.getEVTAlignment(VT), - AAMDNodes(), nullptr, Scope, Order); + AAMDNodes(), nullptr, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = @@ -4083,7 +4097,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); - SynchronizationScope Scope = I.getSynchScope(); + SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); @@ -4100,7 +4114,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { getValue(I.getPointerOperand()), getValue(I.getValueOperand()), I.getPointerOperand(), I.getAlignment(), - Order, Scope); + Order, SSID); DAG.setRoot(OutChain); } @@ -4982,6 +4996,83 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(CallResult.second); return nullptr; } + case Intrinsic::memmove_element_unordered_atomic: { + auto &MI = cast<ElementUnorderedAtomicMemMoveInst>(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Src = getValue(MI.getRawSource()); + SDValue Length = getValue(MI.getLength()); + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; + Args.push_back(Entry); + + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); + RTLIB::Libcall LibraryCall = + RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( + TLI.getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + DAG.setRoot(CallResult.second); + return nullptr; + } + case Intrinsic::memset_element_unordered_atomic: { + auto &MI = cast<ElementUnorderedAtomicMemSetInst>(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Val = getValue(MI.getValue()); + SDValue Length = getValue(MI.getLength()); + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Ty = Type::getInt8Ty(*DAG.getContext()); + Entry.Node = Val; + Args.push_back(Entry); + + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; + Args.push_back(Entry); + + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); + RTLIB::Libcall LibraryCall = + RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( + TLI.getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + DAG.setRoot(CallResult.second); + return nullptr; + } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); DILocalVariable *Variable = DI.getVariable(); @@ -7842,6 +7933,22 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { auto &DL = CLI.DAG.getDataLayout(); ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); + if (CLI.IsPostTypeLegalization) { + // If we are lowering a libcall after legalization, split the return type. + SmallVector<EVT, 4> OldRetTys = std::move(RetTys); + SmallVector<uint64_t, 4> OldOffsets = std::move(Offsets); + for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) { + EVT RetVT = OldRetTys[i]; + uint64_t Offset = OldOffsets[i]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT); + unsigned RegisterVTSize = RegisterVT.getSizeInBits(); + RetTys.append(NumRegs, RegisterVT); + for (unsigned j = 0; j != NumRegs; ++j) + Offsets.push_back(Offset + j * RegisterVTSize); + } + } + SmallVector<ISD::OutputArg, 4> Outs; GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL); @@ -7924,6 +8031,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs); + // FIXME: Split arguments if CLI.IsPostTypeLegalization Type *FinalType = Args[i].Ty; if (Args[i].IsByVal) FinalType = cast<PointerType>(Args[i].Ty)->getElementType(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 431d52b4b9b9f..ac1d6aae65a52 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -38,7 +38,6 @@ class BranchInst; class CallInst; class DbgValueInst; class ExtractElementInst; -class ExtractValueInst; class FCmpInst; class FPExtInst; class FPToSIInst; @@ -53,7 +52,6 @@ class IntToPtrInst; class IndirectBrInst; class InvokeInst; class InsertElementInst; -class InsertValueInst; class Instruction; class LoadInst; class MachineBasicBlock; @@ -859,8 +857,8 @@ private: void visitInsertElement(const User &I); void visitShuffleVector(const User &I); - void visitExtractValue(const ExtractValueInst &I); - void visitInsertValue(const InsertValueInst &I); + void visitExtractValue(const User &I); + void visitInsertValue(const User &I); void visitLandingPad(const LandingPadInst &I); void visitGetElementPtr(const User &I); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index f711ca71f79fe..bdf57e8058426 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1483,7 +1483,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Try to select the instruction with FastISel. if (FastIS->selectInstruction(Inst)) { - FastISelFailed = true; --NumFastIselRemaining; ++NumFastIselSuccess; // If fast isel succeeded, skip over all the folded instructions, and @@ -1506,8 +1505,14 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { continue; } + FastISelFailed = true; + // Then handle certain instructions as single-LLVM-Instruction blocks. - if (isa<CallInst>(Inst)) { + // We cannot separate out GCrelocates to their own blocks since we need + // to keep track of gc-relocates for a particular gc-statepoint. This is + // done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before + // visitGCRelocate. + if (isa<CallInst>(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst)) { OptimizationRemarkMissed R("sdagisel", "FastISelFailure", Inst->getDebugLoc(), LLVMBB); diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 7886737b879c2..17a3a84ecda57 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -125,8 +125,11 @@ static void MarkBlocksLiveIn(BasicBlock *BB, if (!LiveBBs.insert(BB).second) return; // already been here. - for (BasicBlock *PredBB : predecessors(BB)) - MarkBlocksLiveIn(PredBB, LiveBBs); + df_iterator_default_set<BasicBlock*> Visited; + + for (BasicBlock *B : inverse_depth_first_ext(BB, Visited)) + LiveBBs.insert(B); + } /// substituteLPadValues - Substitute the values returned by the landingpad diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 008b984dd9616..323045fd2aaae 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -53,10 +53,10 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI, std::pair<SlotIndex, SlotIndex> &LIP = LastInsertPoint[Num]; SlotIndex MBBEnd = LIS.getMBBEndIdx(&MBB); - SmallVector<const MachineBasicBlock *, 1> EHPadSucessors; + SmallVector<const MachineBasicBlock *, 1> EHPadSuccessors; for (const MachineBasicBlock *SMBB : MBB.successors()) if (SMBB->isEHPad()) - EHPadSucessors.push_back(SMBB); + EHPadSuccessors.push_back(SMBB); // Compute insert points on the first call. The pair is independent of the // current live interval. @@ -68,7 +68,7 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI, LIP.first = LIS.getInstructionIndex(*FirstTerm); // If there is a landing pad successor, also find the call instruction. - if (EHPadSucessors.empty()) + if (EHPadSuccessors.empty()) return LIP.first; // There may not be a call instruction (?) in which case we ignore LPad. LIP.second = LIP.first; @@ -87,7 +87,7 @@ InsertPointAnalysis::computeLastInsertPoint(const LiveInterval &CurLI, if (!LIP.second) return LIP.first; - if (none_of(EHPadSucessors, [&](const MachineBasicBlock *EHPad) { + if (none_of(EHPadSuccessors, [&](const MachineBasicBlock *EHPad) { return LIS.isLiveInToMBB(CurLI, EHPad); })) return LIP.first; diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index e9d38c10c8601..3914ee5147122 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -384,6 +384,26 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { "__llvm_memcpy_element_unordered_atomic_8"; Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_16] = "__llvm_memcpy_element_unordered_atomic_16"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1] = + "__llvm_memmove_element_unordered_atomic_1"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2] = + "__llvm_memmove_element_unordered_atomic_2"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4] = + "__llvm_memmove_element_unordered_atomic_4"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8] = + "__llvm_memmove_element_unordered_atomic_8"; + Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16] = + "__llvm_memmove_element_unordered_atomic_16"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_1] = + "__llvm_memset_element_unordered_atomic_1"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_2] = + "__llvm_memset_element_unordered_atomic_2"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_4] = + "__llvm_memset_element_unordered_atomic_4"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_8] = + "__llvm_memset_element_unordered_atomic_8"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_16] = + "__llvm_memset_element_unordered_atomic_16"; Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; @@ -803,6 +823,40 @@ RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { } } +RTLIB::Libcall RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { + switch (ElementSize) { + case 1: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1; + case 2: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2; + case 4: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4; + case 8: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8; + case 16: + return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16; + default: + return UNKNOWN_LIBCALL; + } +} + +RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { + switch (ElementSize) { + case 1: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_1; + case 2: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_2; + case 4: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_4; + case 8: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_8; + case 16: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_16; + default: + return UNKNOWN_LIBCALL; + } +} + /// InitCmpLibcallCCs - Set default comparison libcall CC. /// static void InitCmpLibcallCCs(ISD::CondCode *CCs) { diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp index b9fa9b6a6ad7e..c2c02f8de03fb 100644 --- a/lib/DebugInfo/CodeView/SymbolDumper.cpp +++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp @@ -62,6 +62,18 @@ private: }; } +static StringRef getSymbolKindName(SymbolKind Kind) { + switch (Kind) { +#define SYMBOL_RECORD(EnumName, EnumVal, Name) \ + case EnumName: \ + return #Name; +#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def" + default: + break; + } + return "UnknownSym"; +} + void CVSymbolDumperImpl::printLocalVariableAddrRange( const LocalVariableAddrRange &Range, uint32_t RelocationOffset) { DictScope S(W, "LocalVariableAddrRange"); @@ -86,18 +98,23 @@ void CVSymbolDumperImpl::printTypeIndex(StringRef FieldName, TypeIndex TI) { } Error CVSymbolDumperImpl::visitSymbolBegin(CVSymbol &CVR) { + W.startLine() << getSymbolKindName(CVR.Type); + W.getOStream() << " {\n"; + W.indent(); + W.printEnum("Kind", unsigned(CVR.Type), getSymbolTypeNames()); return Error::success(); } Error CVSymbolDumperImpl::visitSymbolEnd(CVSymbol &CVR) { if (PrintRecordBytes && ObjDelegate) ObjDelegate->printBinaryBlockWithRelocs("SymData", CVR.content()); + + W.unindent(); + W.startLine() << "}\n"; return Error::success(); } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, BlockSym &Block) { - DictScope S(W, "BlockStart"); - StringRef LinkageName; W.printHex("PtrParent", Block.Parent); W.printHex("PtrEnd", Block.End); @@ -113,7 +130,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, BlockSym &Block) { } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Thunk32Sym &Thunk) { - DictScope S(W, "Thunk32"); W.printNumber("Parent", Thunk.Parent); W.printNumber("End", Thunk.End); W.printNumber("Next", Thunk.Next); @@ -126,7 +142,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Thunk32Sym &Thunk) { Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, TrampolineSym &Tramp) { - DictScope S(W, "Trampoline"); W.printEnum("Type", uint16_t(Tramp.Type), getTrampolineNames()); W.printNumber("Size", Tramp.Size); W.printNumber("ThunkOff", Tramp.ThunkOffset); @@ -137,7 +152,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, SectionSym &Section) { - DictScope S(W, "Section"); W.printNumber("SectionNumber", Section.SectionNumber); W.printNumber("Alignment", Section.Alignment); W.printNumber("Rva", Section.Rva); @@ -152,7 +166,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, SectionSym &Section) { Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, CoffGroupSym &CoffGroup) { - DictScope S(W, "COFF Group"); W.printNumber("Size", CoffGroup.Size); W.printFlags("Characteristics", CoffGroup.Characteristics, getImageSectionCharacteristicNames(), @@ -165,8 +178,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, BPRelativeSym &BPRel) { - DictScope S(W, "BPRelativeSym"); - W.printNumber("Offset", BPRel.Offset); printTypeIndex("Type", BPRel.Type); W.printString("VarName", BPRel.Name); @@ -175,16 +186,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, BuildInfoSym &BuildInfo) { - DictScope S(W, "BuildInfo"); - W.printNumber("BuildId", BuildInfo.BuildId); return Error::success(); } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, CallSiteInfoSym &CallSiteInfo) { - DictScope S(W, "CallSiteInfo"); - StringRef LinkageName; if (ObjDelegate) { ObjDelegate->printRelocatedField("CodeOffset", @@ -200,8 +207,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, EnvBlockSym &EnvBlock) { - DictScope S(W, "EnvBlock"); - ListScope L(W, "Entries"); for (auto Entry : EnvBlock.Fields) { W.printString(Entry); @@ -211,7 +216,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, FileStaticSym &FileStatic) { - DictScope S(W, "FileStatic"); printTypeIndex("Index", FileStatic.Index); W.printNumber("ModFilenameOffset", FileStatic.ModFilenameOffset); W.printFlags("Flags", uint16_t(FileStatic.Flags), getLocalFlagNames()); @@ -220,7 +224,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ExportSym &Export) { - DictScope S(W, "Export"); W.printNumber("Ordinal", Export.Ordinal); W.printFlags("Flags", uint16_t(Export.Flags), getExportSymFlagNames()); W.printString("Name", Export.Name); @@ -229,8 +232,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ExportSym &Export) { Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Compile2Sym &Compile2) { - DictScope S(W, "CompilerFlags2"); - W.printEnum("Language", Compile2.getLanguage(), getSourceLanguageNames()); W.printFlags("Flags", Compile2.getFlags(), getCompileSym2FlagNames()); W.printEnum("Machine", unsigned(Compile2.Machine), getCPUTypeNames()); @@ -254,8 +255,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Compile3Sym &Compile3) { - DictScope S(W, "CompilerFlags3"); - W.printEnum("Language", Compile3.getLanguage(), getSourceLanguageNames()); W.printFlags("Flags", Compile3.getFlags(), getCompileSym3FlagNames()); W.printEnum("Machine", unsigned(Compile3.Machine), getCPUTypeNames()); @@ -281,8 +280,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ConstantSym &Constant) { - DictScope S(W, "Constant"); - printTypeIndex("Type", Constant.Type); W.printNumber("Value", Constant.Value); W.printString("Name", Constant.Name); @@ -290,9 +287,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, DataSym &Data) { - DictScope S(W, "DataSym"); - - W.printEnum("Kind", uint16_t(CVR.kind()), getSymbolTypeNames()); StringRef LinkageName; if (ObjDelegate) { ObjDelegate->printRelocatedField("DataOffset", Data.getRelocationOffset(), @@ -308,15 +302,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, DataSym &Data) { Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeFramePointerRelFullScopeSym &DefRangeFramePointerRelFullScope) { - DictScope S(W, "DefRangeFramePointerRelFullScope"); W.printNumber("Offset", DefRangeFramePointerRelFullScope.Offset); return Error::success(); } Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeFramePointerRelSym &DefRangeFramePointerRel) { - DictScope S(W, "DefRangeFramePointerRel"); - W.printNumber("Offset", DefRangeFramePointerRel.Offset); printLocalVariableAddrRange(DefRangeFramePointerRel.Range, DefRangeFramePointerRel.getRelocationOffset()); @@ -326,8 +317,6 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeRegisterRelSym &DefRangeRegisterRel) { - DictScope S(W, "DefRangeRegisterRel"); - W.printNumber("BaseRegister", DefRangeRegisterRel.Hdr.Register); W.printBoolean("HasSpilledUDTMember", DefRangeRegisterRel.hasSpilledUDTMember()); @@ -341,8 +330,6 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeRegisterSym &DefRangeRegister) { - DictScope S(W, "DefRangeRegister"); - W.printNumber("Register", DefRangeRegister.Hdr.Register); W.printNumber("MayHaveNoName", DefRangeRegister.Hdr.MayHaveNoName); printLocalVariableAddrRange(DefRangeRegister.Range, @@ -353,8 +340,6 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeSubfieldRegisterSym &DefRangeSubfieldRegister) { - DictScope S(W, "DefRangeSubfieldRegister"); - W.printNumber("Register", DefRangeSubfieldRegister.Hdr.Register); W.printNumber("MayHaveNoName", DefRangeSubfieldRegister.Hdr.MayHaveNoName); W.printNumber("OffsetInParent", DefRangeSubfieldRegister.Hdr.OffsetInParent); @@ -366,8 +351,6 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, DefRangeSubfieldSym &DefRangeSubfield) { - DictScope S(W, "DefRangeSubfield"); - if (ObjDelegate) { DebugStringTableSubsectionRef Strings = ObjDelegate->getStringTable(); auto ExpectedProgram = Strings.getString(DefRangeSubfield.Program); @@ -387,8 +370,6 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, DefRangeSym &DefRange) { - DictScope S(W, "DefRange"); - if (ObjDelegate) { DebugStringTableSubsectionRef Strings = ObjDelegate->getStringTable(); auto ExpectedProgram = Strings.getString(DefRange.Program); @@ -406,8 +387,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, FrameCookieSym &FrameCookie) { - DictScope S(W, "FrameCookie"); - StringRef LinkageName; if (ObjDelegate) { ObjDelegate->printRelocatedField("CodeOffset", @@ -423,8 +402,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, FrameProcSym &FrameProc) { - DictScope S(W, "FrameProc"); - W.printHex("TotalFrameBytes", FrameProc.TotalFrameBytes); W.printHex("PaddingFrameBytes", FrameProc.PaddingFrameBytes); W.printHex("OffsetToPadding", FrameProc.OffsetToPadding); @@ -440,8 +417,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord( CVSymbol &CVR, HeapAllocationSiteSym &HeapAllocSite) { - DictScope S(W, "HeapAllocationSite"); - StringRef LinkageName; if (ObjDelegate) { ObjDelegate->printRelocatedField("CodeOffset", @@ -458,8 +433,6 @@ Error CVSymbolDumperImpl::visitKnownRecord( Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, InlineSiteSym &InlineSite) { - DictScope S(W, "InlineSite"); - W.printHex("PtrParent", InlineSite.Parent); W.printHex("PtrEnd", InlineSite.End); printTypeIndex("Inlinee", InlineSite.Inlinee); @@ -515,7 +488,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, RegisterSym &Register) { - DictScope S(W, "RegisterSym"); printTypeIndex("Type", Register.Index); W.printEnum("Seg", uint16_t(Register.Register), getRegisterNames()); W.printString("Name", Register.Name); @@ -523,7 +495,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, PublicSym32 &Public) { - DictScope S(W, "PublicSym"); W.printFlags("Flags", uint32_t(Public.Flags), getPublicSymFlagNames()); W.printNumber("Seg", Public.Segment); W.printNumber("Off", Public.Offset); @@ -532,7 +503,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, PublicSym32 &Public) { } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcRefSym &ProcRef) { - DictScope S(W, "ProcRef"); W.printNumber("SumName", ProcRef.SumName); W.printNumber("SymOffset", ProcRef.SymOffset); W.printNumber("Mod", ProcRef.Module); @@ -541,8 +511,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcRefSym &ProcRef) { } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, LabelSym &Label) { - DictScope S(W, "Label"); - StringRef LinkageName; if (ObjDelegate) { ObjDelegate->printRelocatedField("CodeOffset", Label.getRelocationOffset(), @@ -558,8 +526,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, LabelSym &Label) { } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, LocalSym &Local) { - DictScope S(W, "Local"); - printTypeIndex("Type", Local.Type); W.printFlags("Flags", uint16_t(Local.Flags), getLocalFlagNames()); W.printString("VarName", Local.Name); @@ -567,16 +533,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, LocalSym &Local) { } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ObjNameSym &ObjName) { - DictScope S(W, "ObjectName"); - W.printHex("Signature", ObjName.Signature); W.printString("ObjectName", ObjName.Name); return Error::success(); } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcSym &Proc) { - DictScope S(W, "ProcStart"); - if (InFunctionScope) return llvm::make_error<CodeViewError>( "Visiting a ProcSym while inside function scope!"); @@ -584,7 +546,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcSym &Proc) { InFunctionScope = true; StringRef LinkageName; - W.printEnum("Kind", uint16_t(CVR.kind()), getSymbolTypeNames()); W.printHex("PtrParent", Proc.Parent); W.printHex("PtrEnd", Proc.End); W.printHex("PtrNext", Proc.Next); @@ -607,13 +568,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcSym &Proc) { Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ScopeEndSym &ScopeEnd) { - if (CVR.kind() == SymbolKind::S_END) - DictScope S(W, "BlockEnd"); - else if (CVR.kind() == SymbolKind::S_PROC_ID_END) - DictScope S(W, "ProcEnd"); - else if (CVR.kind() == SymbolKind::S_INLINESITE_END) - DictScope S(W, "InlineSiteEnd"); - InFunctionScope = false; return Error::success(); } @@ -627,8 +581,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, CallerSym &Caller) { Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, RegRelativeSym &RegRel) { - DictScope S(W, "RegRelativeSym"); - W.printHex("Offset", RegRel.Offset); printTypeIndex("Type", RegRel.Type); W.printEnum("Register", uint16_t(RegRel.Register), getRegisterNames()); @@ -638,8 +590,6 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ThreadLocalDataSym &Data) { - DictScope S(W, "ThreadLocalDataSym"); - StringRef LinkageName; if (ObjDelegate) { ObjDelegate->printRelocatedField("DataOffset", Data.getRelocationOffset(), @@ -653,15 +603,12 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, } Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, UDTSym &UDT) { - DictScope S(W, "UDT"); printTypeIndex("Type", UDT.Type); W.printString("UDTName", UDT.Name); return Error::success(); } Error CVSymbolDumperImpl::visitUnknownSymbol(CVSymbol &CVR) { - DictScope S(W, "UnknownSym"); - W.printEnum("Kind", uint16_t(CVR.kind()), getSymbolTypeNames()); W.printNumber("Length", CVR.length()); return Error::success(); } diff --git a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp index 72cb9e2e35442..0d935c4472aef 100644 --- a/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp +++ b/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp @@ -382,6 +382,13 @@ static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind, case SymbolKind::S_BUILDINFO: Refs.push_back({TiRefKind::IndexRef, 0, 1}); // Compile flags break; + case SymbolKind::S_LTHREAD32: + case SymbolKind::S_GTHREAD32: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type + break; + case SymbolKind::S_FILESTATIC: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type + break; case SymbolKind::S_LOCAL: Refs.push_back({TiRefKind::TypeRef, 0, 1}); // Type break; @@ -403,6 +410,10 @@ static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind, case SymbolKind::S_INLINESITE: Refs.push_back({TiRefKind::IndexRef, 8, 1}); // ID of inlinee break; + case SymbolKind::S_HEAPALLOCSITE: + // FIXME: It's not clear if this is a type or item reference. + Refs.push_back({TiRefKind::IndexRef, 8, 1}); // signature + break; // Defranges don't have types, just registers and code offsets. case SymbolKind::S_DEFRANGE_REGISTER: @@ -419,6 +430,7 @@ static bool discoverTypeIndices(ArrayRef<uint8_t> Content, SymbolKind Kind, case SymbolKind::S_COMPILE: case SymbolKind::S_COMPILE2: case SymbolKind::S_COMPILE3: + case SymbolKind::S_ENVBLOCK: case SymbolKind::S_BLOCK32: case SymbolKind::S_FRAMEPROC: break; diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp index a18d4efec07a7..495e09fbae355 100644 --- a/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -591,10 +591,10 @@ void DWARFContext::parseCompileUnits() { void DWARFContext::parseTypeUnits() { if (!TUs.empty()) return; - for (const auto &I : getTypesSections()) { + forEachTypesSections([&](const DWARFSection &S) { TUs.emplace_back(); - TUs.back().parse(*this, I.second); - } + TUs.back().parse(*this, S); + }); } void DWARFContext::parseDWOCompileUnits() { @@ -604,10 +604,10 @@ void DWARFContext::parseDWOCompileUnits() { void DWARFContext::parseDWOTypeUnits() { if (!DWOTUs.empty()) return; - for (const auto &I : getTypesDWOSections()) { + forEachTypesDWOSections([&](const DWARFSection &S) { DWOTUs.emplace_back(); - DWOTUs.back().parseDWO(*this, I.second); - } + DWOTUs.back().parseDWO(*this, S); + }); } DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) { @@ -937,27 +937,23 @@ DWARFContextInMemory::DWARFContextInMemory( : FileName(Obj.getFileName()), IsLittleEndian(Obj.isLittleEndian()), AddressSize(Obj.getBytesInAddress()) { for (const SectionRef &Section : Obj.sections()) { - StringRef name; - Section.getName(name); + StringRef Name; + Section.getName(Name); // Skip BSS and Virtual sections, they aren't interesting. - bool IsBSS = Section.isBSS(); - if (IsBSS) - continue; - bool IsVirtual = Section.isVirtual(); - if (IsVirtual) + if (Section.isBSS() || Section.isVirtual()) continue; - StringRef data; + StringRef Data; section_iterator RelocatedSection = Section.getRelocatedSection(); // Try to obtain an already relocated version of this section. // Else use the unrelocated section from the object file. We'll have to // apply relocations ourselves later. - if (!L || !L->getLoadedSectionContents(*RelocatedSection, data)) - Section.getContents(data); + if (!L || !L->getLoadedSectionContents(*RelocatedSection, Data)) + Section.getContents(Data); - if (auto Err = maybeDecompress(Section, name, data)) { + if (auto Err = maybeDecompress(Section, Name, Data)) { ErrorPolicy EP = HandleError( - createError("failed to decompress '" + name + "', ", std::move(Err))); + createError("failed to decompress '" + Name + "', ", std::move(Err))); if (EP == ErrorPolicy::Halt) return; continue; @@ -965,27 +961,27 @@ DWARFContextInMemory::DWARFContextInMemory( // Compressed sections names in GNU style starts from ".z", // at this point section is decompressed and we drop compression prefix. - name = name.substr( - name.find_first_not_of("._z")); // Skip ".", "z" and "_" prefixes. + Name = Name.substr( + Name.find_first_not_of("._z")); // Skip ".", "z" and "_" prefixes. + + // Map platform specific debug section names to DWARF standard section + // names. + Name = Obj.mapDebugSectionName(Name); - if (StringRef *SectionData = MapSectionToMember(name)) { - *SectionData = data; - if (name == "debug_ranges") { + if (StringRef *SectionData = mapSectionToMember(Name)) { + *SectionData = Data; + if (Name == "debug_ranges") { // FIXME: Use the other dwo range section when we emit it. - RangeDWOSection.Data = data; + RangeDWOSection.Data = Data; } - } else if (name == "debug_types") { + } else if (Name == "debug_types") { // Find debug_types data by section rather than name as there are // multiple, comdat grouped, debug_types sections. - TypesSections[Section].Data = data; - } else if (name == "debug_types.dwo") { - TypesDWOSections[Section].Data = data; + TypesSections[Section].Data = Data; + } else if (Name == "debug_types.dwo") { + TypesDWOSections[Section].Data = Data; } - // Map platform specific debug section names to DWARF standard section - // names. - name = Obj.mapDebugSectionName(name); - if (RelocatedSection == Obj.section_end()) continue; @@ -1012,21 +1008,8 @@ DWARFContextInMemory::DWARFContextInMemory( // TODO: Add support for relocations in other sections as needed. // Record relocations for the debug_info and debug_line sections. - RelocAddrMap *Map = - StringSwitch<RelocAddrMap *>(RelSecName) - .Case("debug_info", &InfoSection.Relocs) - .Case("debug_loc", &LocSection.Relocs) - .Case("debug_info.dwo", &InfoDWOSection.Relocs) - .Case("debug_line", &LineSection.Relocs) - .Case("debug_str_offsets", &StringOffsetSection.Relocs) - .Case("debug_ranges", &RangeSection.Relocs) - .Case("debug_addr", &AddrSection.Relocs) - .Case("apple_names", &AppleNamesSection.Relocs) - .Case("apple_types", &AppleTypesSection.Relocs) - .Case("apple_namespaces", &AppleNamespacesSection.Relocs) - .Case("apple_namespac", &AppleNamespacesSection.Relocs) - .Case("apple_objc", &AppleObjCSection.Relocs) - .Default(nullptr); + DWARFSection *Sec = mapNameToDWARFSection(RelSecName); + RelocAddrMap *Map = Sec ? &Sec->Relocs : nullptr; if (!Map) { // Find debug_types relocs by section rather than name as there are // multiple, comdat grouped, debug_types sections. @@ -1059,10 +1042,10 @@ DWARFContextInMemory::DWARFContextInMemory( object::RelocVisitor V(Obj); uint64_t Val = V.visit(Reloc.getType(), Reloc, SymInfoOrErr->Address); if (V.error()) { - SmallString<32> Name; - Reloc.getTypeName(Name); + SmallString<32> Type; + Reloc.getTypeName(Type); ErrorPolicy EP = HandleError( - createError("failed to compute relocation: " + Name + ", ", + createError("failed to compute relocation: " + Type + ", ", errorCodeToError(object_error::parse_failed))); if (EP == ErrorPolicy::Halt) return; @@ -1079,40 +1062,47 @@ DWARFContextInMemory::DWARFContextInMemory( bool isLittleEndian) : IsLittleEndian(isLittleEndian), AddressSize(AddrSize) { for (const auto &SecIt : Sections) { - if (StringRef *SectionData = MapSectionToMember(SecIt.first())) + if (StringRef *SectionData = mapSectionToMember(SecIt.first())) *SectionData = SecIt.second->getBuffer(); } } -StringRef *DWARFContextInMemory::MapSectionToMember(StringRef Name) { +DWARFSection *DWARFContextInMemory::mapNameToDWARFSection(StringRef Name) { + return StringSwitch<DWARFSection *>(Name) + .Case("debug_info", &InfoSection) + .Case("debug_loc", &LocSection) + .Case("debug_line", &LineSection) + .Case("debug_str_offsets", &StringOffsetSection) + .Case("debug_ranges", &RangeSection) + .Case("debug_info.dwo", &InfoDWOSection) + .Case("debug_loc.dwo", &LocDWOSection) + .Case("debug_line.dwo", &LineDWOSection) + .Case("debug_str_offsets.dwo", &StringOffsetDWOSection) + .Case("debug_addr", &AddrSection) + .Case("apple_names", &AppleNamesSection) + .Case("apple_types", &AppleTypesSection) + .Case("apple_namespaces", &AppleNamespacesSection) + .Case("apple_namespac", &AppleNamespacesSection) + .Case("apple_objc", &AppleObjCSection) + .Default(nullptr); +} + +StringRef *DWARFContextInMemory::mapSectionToMember(StringRef Name) { + if (DWARFSection *Sec = mapNameToDWARFSection(Name)) + return &Sec->Data; return StringSwitch<StringRef *>(Name) - .Case("debug_info", &InfoSection.Data) .Case("debug_abbrev", &AbbrevSection) - .Case("debug_loc", &LocSection.Data) - .Case("debug_line", &LineSection.Data) .Case("debug_aranges", &ARangeSection) .Case("debug_frame", &DebugFrameSection) .Case("eh_frame", &EHFrameSection) .Case("debug_str", &StringSection) - .Case("debug_str_offsets", &StringOffsetSection.Data) - .Case("debug_ranges", &RangeSection.Data) .Case("debug_macinfo", &MacinfoSection) .Case("debug_pubnames", &PubNamesSection) .Case("debug_pubtypes", &PubTypesSection) .Case("debug_gnu_pubnames", &GnuPubNamesSection) .Case("debug_gnu_pubtypes", &GnuPubTypesSection) - .Case("debug_info.dwo", &InfoDWOSection.Data) .Case("debug_abbrev.dwo", &AbbrevDWOSection) - .Case("debug_loc.dwo", &LocDWOSection.Data) - .Case("debug_line.dwo", &LineDWOSection.Data) .Case("debug_str.dwo", &StringDWOSection) - .Case("debug_str_offsets.dwo", &StringOffsetDWOSection.Data) - .Case("debug_addr", &AddrSection.Data) - .Case("apple_names", &AppleNamesSection.Data) - .Case("apple_types", &AppleTypesSection.Data) - .Case("apple_namespaces", &AppleNamespacesSection.Data) - .Case("apple_namespac", &AppleNamespacesSection.Data) - .Case("apple_objc", &AppleObjCSection.Data) .Case("debug_cu_index", &CUIndexSection) .Case("debug_tu_index", &TUIndexSection) .Case("gdb_index", &GdbIndexSection) diff --git a/lib/DebugInfo/DWARF/DWARFDie.cpp b/lib/DebugInfo/DWARF/DWARFDie.cpp index ef416f72ad175..111f0bbd44448 100644 --- a/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -395,7 +395,7 @@ DWARFDie::attribute_iterator::attribute_iterator(DWARFDie D, bool End) : void DWARFDie::attribute_iterator::updateForIndex( const DWARFAbbreviationDeclaration &AbbrDecl, uint32_t I) { Index = I; - // AbbrDecl must be valid befor calling this function. + // AbbrDecl must be valid before calling this function. auto NumAttrs = AbbrDecl.getNumAttributes(); if (Index < NumAttrs) { AttrValue.Attr = AbbrDecl.getAttrByIndex(Index); diff --git a/lib/DebugInfo/PDB/CMakeLists.txt b/lib/DebugInfo/PDB/CMakeLists.txt index e9fd29ccc4caf..ff01c948e0997 100644 --- a/lib/DebugInfo/PDB/CMakeLists.txt +++ b/lib/DebugInfo/PDB/CMakeLists.txt @@ -41,6 +41,7 @@ add_pdb_impl_folder(Native Native/InfoStream.cpp Native/InfoStreamBuilder.cpp Native/ModuleDebugStream.cpp + Native/NativeBuiltinSymbol.cpp Native/NativeCompilandSymbol.cpp Native/NativeEnumModules.cpp Native/NativeExeSymbol.cpp @@ -53,6 +54,7 @@ add_pdb_impl_folder(Native Native/PDBStringTableBuilder.cpp Native/PDBTypeServerHandler.cpp Native/PublicsStream.cpp + Native/PublicsStreamBuilder.cpp Native/RawError.cpp Native/SymbolStream.cpp Native/TpiHashing.cpp diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp index 745dd742aadc3..897f78c510322 100644 --- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -65,6 +65,10 @@ void DbiModuleDescriptorBuilder::setObjFileName(StringRef Name) { ObjFileName = Name; } +void DbiModuleDescriptorBuilder::setPdbFilePathNI(uint32_t NI) { + PdbFilePathNI = NI; +} + void DbiModuleDescriptorBuilder::addSymbol(CVSymbol Symbol) { Symbols.push_back(Symbol); // Symbols written to a PDB file are required to be 4 byte aligned. The same @@ -111,7 +115,7 @@ void DbiModuleDescriptorBuilder::finalize() { (void)Layout.Mod; // Set in constructor (void)Layout.ModDiStream; // Set in finalizeMsfLayout Layout.NumFiles = SourceFiles.size(); - Layout.PdbFilePathNI = 0; + Layout.PdbFilePathNI = PdbFilePathNI; Layout.SrcFileNameNI = 0; // This value includes both the signature field as well as the record bytes diff --git a/lib/DebugInfo/PDB/Native/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp index a1f0671dec3e6..0eeac7e4c0847 100644 --- a/lib/DebugInfo/PDB/Native/DbiStream.cpp +++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp @@ -225,6 +225,10 @@ void DbiStream::visitSectionContributions( } } +Expected<StringRef> DbiStream::getECName(uint32_t NI) const { + return ECNames.getStringForID(NI); +} + Error DbiStream::initializeSectionContributionData() { if (SecContrSubstream.empty()) return Error::success(); @@ -248,6 +252,9 @@ Error DbiStream::initializeSectionHeadersData() { return Error::success(); uint32_t StreamNum = getDebugStreamIndex(DbgHeaderType::SectionHdr); + if (StreamNum == kInvalidStreamIndex) + return Error::success(); + if (StreamNum >= Pdb.getNumStreams()) return make_error<RawError>(raw_error_code::no_stream); diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp index aad247ea185f2..25076e40fc98c 100644 --- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp @@ -49,9 +49,17 @@ void DbiStreamBuilder::setSectionMap(ArrayRef<SecMapEntry> SecMap) { SectionMap = SecMap; } +void DbiStreamBuilder::setSymbolRecordStreamIndex(uint32_t Index) { + SymRecordStreamIndex = Index; +} + +void DbiStreamBuilder::setPublicsStreamIndex(uint32_t Index) { + PublicsStreamIndex = Index; +} + Error DbiStreamBuilder::addDbgStream(pdb::DbgHeaderType Type, ArrayRef<uint8_t> Data) { - if (DbgStreams[(int)Type].StreamNumber) + if (DbgStreams[(int)Type].StreamNumber != kInvalidStreamIndex) return make_error<RawError>(raw_error_code::duplicate_entry, "The specified stream type already exists"); auto ExpectedIndex = Msf.addStream(Data.size()); @@ -63,11 +71,16 @@ Error DbiStreamBuilder::addDbgStream(pdb::DbgHeaderType Type, return Error::success(); } +uint32_t DbiStreamBuilder::addECName(StringRef Name) { + return ECNamesBuilder.insert(Name); +} + uint32_t DbiStreamBuilder::calculateSerializedLength() const { // For now we only support serializing the header. return sizeof(DbiStreamHeader) + calculateFileInfoSubstreamSize() + calculateModiSubstreamSize() + calculateSectionContribsStreamSize() + - calculateSectionMapStreamSize() + calculateDbgStreamsSize(); + calculateSectionMapStreamSize() + calculateDbgStreamsSize() + + ECNamesBuilder.calculateSerializedSize(); } Expected<DbiModuleDescriptorBuilder &> @@ -247,15 +260,15 @@ Error DbiStreamBuilder::finalize() { H->PdbDllVersion = PdbDllVersion; H->MachineType = static_cast<uint16_t>(MachineType); - H->ECSubstreamSize = 0; + H->ECSubstreamSize = ECNamesBuilder.calculateSerializedSize(); H->FileInfoSize = FileInfoBuffer.getLength(); H->ModiSubstreamSize = calculateModiSubstreamSize(); H->OptionalDbgHdrSize = DbgStreams.size() * sizeof(uint16_t); H->SecContrSubstreamSize = calculateSectionContribsStreamSize(); H->SectionMapSize = calculateSectionMapStreamSize(); H->TypeServerSize = 0; - H->SymRecordStreamIndex = kInvalidStreamIndex; - H->PublicSymbolStreamIndex = kInvalidStreamIndex; + H->SymRecordStreamIndex = SymRecordStreamIndex; + H->PublicSymbolStreamIndex = PublicsStreamIndex; H->MFCTypeServerIndex = kInvalidStreamIndex; H->GlobalSymbolStreamIndex = kInvalidStreamIndex; @@ -383,6 +396,9 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout, if (auto EC = Writer.writeStreamRef(FileInfoBuffer)) return EC; + if (auto EC = ECNamesBuilder.commit(Writer)) + return EC; + for (auto &Stream : DbgStreams) if (auto EC = Writer.writeInteger(Stream.StreamNumber)) return EC; diff --git a/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp index 354b8c0e07ff5..6cdf6dde04d9f 100644 --- a/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp +++ b/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp @@ -86,7 +86,8 @@ Error NamedStreamMap::commit(BinaryStreamWriter &Writer) const { for (const auto &Name : OrderedStreamNames) { auto Item = Mapping.find(Name); - assert(Item != Mapping.end()); + if (Item == Mapping.end()) + continue; if (auto EC = Writer.writeCString(Item->getKey())) return EC; } @@ -108,7 +109,8 @@ uint32_t NamedStreamMap::finalize() { for (const auto &Name : OrderedStreamNames) { auto Item = Mapping.find(Name); - assert(Item != Mapping.end()); + if (Item == Mapping.end()) + continue; FinalizedHashTable.set(FinalizedInfo->StringDataBytes, Item->getValue()); FinalizedInfo->StringDataBytes += Item->getKeyLength() + 1; } diff --git a/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp new file mode 100644 index 0000000000000..60416f69e137c --- /dev/null +++ b/lib/DebugInfo/PDB/Native/NativeBuiltinSymbol.cpp @@ -0,0 +1,48 @@ +//===- NativeBuiltinSymbol.cpp ------------------------------------ C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h" + +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" + +namespace llvm { +namespace pdb { + +NativeBuiltinSymbol::NativeBuiltinSymbol(NativeSession &PDBSession, + SymIndexId Id, PDB_BuiltinType T, + uint64_t L) + : NativeRawSymbol(PDBSession, Id), Session(PDBSession), Type(T), Length(L) { +} + +NativeBuiltinSymbol::~NativeBuiltinSymbol() {} + +std::unique_ptr<NativeRawSymbol> NativeBuiltinSymbol::clone() const { + return llvm::make_unique<NativeBuiltinSymbol>(Session, SymbolId, Type, Length); +} + +void NativeBuiltinSymbol::dump(raw_ostream &OS, int Indent) const { + // TODO: Apparently nothing needs this yet. +} + +PDB_SymType NativeBuiltinSymbol::getSymTag() const { + return PDB_SymType::BuiltinType; +} + +PDB_BuiltinType NativeBuiltinSymbol::getBuiltinType() const { return Type; } + +bool NativeBuiltinSymbol::isConstType() const { return false; } + +uint64_t NativeBuiltinSymbol::getLength() const { return Length; } + +bool NativeBuiltinSymbol::isUnalignedType() const { return false; } + +bool NativeBuiltinSymbol::isVolatileType() const { return false; } + +} // namespace pdb +} // namespace llvm diff --git a/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp index 180c169ec209c..7132a99a9f160 100644 --- a/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp +++ b/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp @@ -15,7 +15,7 @@ namespace llvm { namespace pdb { NativeCompilandSymbol::NativeCompilandSymbol(NativeSession &Session, - uint32_t SymbolId, + SymIndexId SymbolId, DbiModuleDescriptor MI) : NativeRawSymbol(Session, SymbolId), Module(MI) {} diff --git a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp index 6206155b9fb64..cb0830f453c8c 100644 --- a/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp +++ b/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp @@ -18,7 +18,7 @@ namespace llvm { namespace pdb { -NativeExeSymbol::NativeExeSymbol(NativeSession &Session, uint32_t SymbolId) +NativeExeSymbol::NativeExeSymbol(NativeSession &Session, SymIndexId SymbolId) : NativeRawSymbol(Session, SymbolId), File(Session.getPDBFile()) {} std::unique_ptr<NativeRawSymbol> NativeExeSymbol::clone() const { diff --git a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp index b4f5c96ce66be..92612bcea4ac4 100644 --- a/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp +++ b/lib/DebugInfo/PDB/Native/NativeRawSymbol.cpp @@ -13,7 +13,7 @@ using namespace llvm; using namespace llvm::pdb; -NativeRawSymbol::NativeRawSymbol(NativeSession &PDBSession, uint32_t SymbolId) +NativeRawSymbol::NativeRawSymbol(NativeSession &PDBSession, SymIndexId SymbolId) : Session(PDBSession), SymbolId(SymbolId) {} void NativeRawSymbol::dump(raw_ostream &OS, int Indent) const {} diff --git a/lib/DebugInfo/PDB/Native/NativeSession.cpp b/lib/DebugInfo/PDB/Native/NativeSession.cpp index 93d43d9ef341f..76de0d8f9e7ef 100644 --- a/lib/DebugInfo/PDB/Native/NativeSession.cpp +++ b/lib/DebugInfo/PDB/Native/NativeSession.cpp @@ -10,9 +10,11 @@ #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/PDB/GenericError.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/IPDBSourceFile.h" +#include "llvm/DebugInfo/PDB/Native/NativeBuiltinSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeExeSymbol.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" @@ -33,6 +35,28 @@ using namespace llvm; using namespace llvm::msf; using namespace llvm::pdb; +namespace { +// Maps codeview::SimpleTypeKind of a built-in type to the parameters necessary +// to instantiate a NativeBuiltinSymbol for that type. +static const struct BuiltinTypeEntry { + codeview::SimpleTypeKind Kind; + PDB_BuiltinType Type; + uint32_t Size; +} BuiltinTypes[] = { + {codeview::SimpleTypeKind::Int32, PDB_BuiltinType::Int, 4}, + {codeview::SimpleTypeKind::UInt32, PDB_BuiltinType::UInt, 4}, + {codeview::SimpleTypeKind::UInt32Long, PDB_BuiltinType::UInt, 4}, + {codeview::SimpleTypeKind::UInt64Quad, PDB_BuiltinType::UInt, 8}, + {codeview::SimpleTypeKind::NarrowCharacter, PDB_BuiltinType::Char, 1}, + {codeview::SimpleTypeKind::SignedCharacter, PDB_BuiltinType::Char, 1}, + {codeview::SimpleTypeKind::UnsignedCharacter, PDB_BuiltinType::UInt, 1}, + {codeview::SimpleTypeKind::UInt16Short, PDB_BuiltinType::UInt, 2}, + {codeview::SimpleTypeKind::Boolean8, PDB_BuiltinType::Bool, 1} + // This table can be grown as necessary, but these are the only types we've + // needed so far. +}; +} // namespace + NativeSession::NativeSession(std::unique_ptr<PDBFile> PdbFile, std::unique_ptr<BumpPtrAllocator> Allocator) : Pdb(std::move(PdbFile)), Allocator(std::move(Allocator)) {} @@ -71,19 +95,51 @@ Error NativeSession::createFromExe(StringRef Path, std::unique_ptr<PDBSymbolCompiland> NativeSession::createCompilandSymbol(DbiModuleDescriptor MI) { - const auto Id = static_cast<uint32_t>(SymbolCache.size()); + const auto Id = static_cast<SymIndexId>(SymbolCache.size()); SymbolCache.push_back( llvm::make_unique<NativeCompilandSymbol>(*this, Id, MI)); return llvm::make_unique<PDBSymbolCompiland>( *this, std::unique_ptr<IPDBRawSymbol>(SymbolCache[Id]->clone())); } +SymIndexId NativeSession::findSymbolByTypeIndex(codeview::TypeIndex Index) { + // First see if it's already in our cache. + const auto Entry = TypeIndexToSymbolId.find(Index); + if (Entry != TypeIndexToSymbolId.end()) + return Entry->second; + + // Symbols for built-in types are created on the fly. + if (Index.isSimple()) { + // FIXME: We will eventually need to handle pointers to other simple types, + // which are still simple types in the world of CodeView TypeIndexes. + if (Index.getSimpleMode() != codeview::SimpleTypeMode::Direct) + return 0; + const auto Kind = Index.getSimpleKind(); + const auto It = + std::find_if(std::begin(BuiltinTypes), std::end(BuiltinTypes), + [Kind](const BuiltinTypeEntry &Builtin) { + return Builtin.Kind == Kind; + }); + if (It == std::end(BuiltinTypes)) + return 0; + SymIndexId Id = SymbolCache.size(); + SymbolCache.emplace_back( + llvm::make_unique<NativeBuiltinSymbol>(*this, Id, It->Type, It->Size)); + TypeIndexToSymbolId[Index] = Id; + return Id; + } + + // TODO: Look up PDB type by type index + + return 0; +} + uint64_t NativeSession::getLoadAddress() const { return 0; } void NativeSession::setLoadAddress(uint64_t Address) {} std::unique_ptr<PDBSymbolExe> NativeSession::getGlobalScope() { - const auto Id = static_cast<uint32_t>(SymbolCache.size()); + const auto Id = static_cast<SymIndexId>(SymbolCache.size()); SymbolCache.push_back(llvm::make_unique<NativeExeSymbol>(*this, Id)); auto RawSymbol = SymbolCache[Id]->clone(); auto PdbSymbol(PDBSymbol::create(*this, std::move(RawSymbol))); diff --git a/lib/DebugInfo/PDB/Native/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp index 4f6ebb0cb3428..0b6492efc70f3 100644 --- a/lib/DebugInfo/PDB/Native/PDBFile.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp @@ -385,8 +385,11 @@ bool PDBFile::hasPDBDbiStream() const { return StreamDBI < getNumStreams(); } bool PDBFile::hasPDBGlobalsStream() { auto DbiS = getPDBDbiStream(); - if (!DbiS) + if (!DbiS) { + consumeError(DbiS.takeError()); return false; + } + return DbiS->getGlobalSymbolStreamIndex() < getNumStreams(); } @@ -396,8 +399,10 @@ bool PDBFile::hasPDBIpiStream() const { return StreamIPI < getNumStreams(); } bool PDBFile::hasPDBPublicsStream() { auto DbiS = getPDBDbiStream(); - if (!DbiS) + if (!DbiS) { + consumeError(DbiS.takeError()); return false; + } return DbiS->getPublicSymbolStreamIndex() < getNumStreams(); } diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index 12b0c3b36c1dd..9f35fd73629cd 100644 --- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -18,6 +18,7 @@ #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" +#include "llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h" @@ -33,6 +34,8 @@ using namespace llvm::support; PDBFileBuilder::PDBFileBuilder(BumpPtrAllocator &Allocator) : Allocator(Allocator) {} +PDBFileBuilder::~PDBFileBuilder() {} + Error PDBFileBuilder::initialize(uint32_t BlockSize) { auto ExpectedMsf = MSFBuilder::create(Allocator, BlockSize); if (!ExpectedMsf) @@ -71,6 +74,12 @@ PDBStringTableBuilder &PDBFileBuilder::getStringTableBuilder() { return Strings; } +PublicsStreamBuilder &PDBFileBuilder::getPublicsBuilder() { + if (!Publics) + Publics = llvm::make_unique<PublicsStreamBuilder>(*Msf); + return *Publics; +} + Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) { auto ExpectedStream = Msf->addStream(Size); if (!ExpectedStream) @@ -96,8 +105,6 @@ Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() { return std::move(EC); if (auto EC = addNamedStream("/LinkInfo", 0)) return std::move(EC); - if (auto EC = addNamedStream("/src/headerblock", 0)) - return std::move(EC); if (Info) { if (auto EC = Info->finalizeMsfLayout()) @@ -115,6 +122,14 @@ Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() { if (auto EC = Ipi->finalizeMsfLayout()) return std::move(EC); } + if (Publics) { + if (auto EC = Publics->finalizeMsfLayout()) + return std::move(EC); + if (Dbi) { + Dbi->setPublicsStreamIndex(Publics->getStreamIndex()); + Dbi->setSymbolRecordStreamIndex(Publics->getRecordStreamIdx()); + } + } return Msf->build(); } @@ -194,5 +209,13 @@ Error PDBFileBuilder::commit(StringRef Filename) { return EC; } + if (Publics) { + auto PS = WritableMappedBlockStream::createIndexedStream( + Layout, Buffer, Publics->getStreamIndex(), Allocator); + BinaryStreamWriter PSWriter(*PS); + if (auto EC = Publics->commit(PSWriter)) + return EC; + } + return Buffer.commit(); } diff --git a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp index f9f8ac219d357..acd45f7a62192 100644 --- a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp +++ b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp @@ -21,7 +21,7 @@ using namespace llvm; using namespace llvm::support; using namespace llvm::pdb; -uint32_t PDBStringTable::getByteSize() const { return ByteSize; } +uint32_t PDBStringTable::getByteSize() const { return Header->ByteSize; } uint32_t PDBStringTable::getNameCount() const { return NameCount; } uint32_t PDBStringTable::getHashVersion() const { return Header->HashVersion; } uint32_t PDBStringTable::getSignature() const { return Header->Signature; } diff --git a/lib/DebugInfo/PDB/Native/PublicsStream.cpp b/lib/DebugInfo/PDB/Native/PublicsStream.cpp index 8f3474b9ce190..9c3e654f808ba 100644 --- a/lib/DebugInfo/PDB/Native/PublicsStream.cpp +++ b/lib/DebugInfo/PDB/Native/PublicsStream.cpp @@ -41,19 +41,6 @@ using namespace llvm::msf; using namespace llvm::support; using namespace llvm::pdb; -// This is PSGSIHDR struct defined in -// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h -struct PublicsStream::HeaderInfo { - ulittle32_t SymHash; - ulittle32_t AddrMap; - ulittle32_t NumThunks; - ulittle32_t SizeOfThunk; - ulittle16_t ISectThunkTable; - char Padding[2]; - ulittle32_t OffThunkTable; - ulittle32_t NumSections; -}; - PublicsStream::PublicsStream(PDBFile &File, std::unique_ptr<MappedBlockStream> Stream) : Pdb(File), Stream(std::move(Stream)) {} @@ -72,7 +59,8 @@ Error PublicsStream::reload() { BinaryStreamReader Reader(*Stream); // Check stream size. - if (Reader.bytesRemaining() < sizeof(HeaderInfo) + sizeof(GSIHashHeader)) + if (Reader.bytesRemaining() < + sizeof(PublicsStreamHeader) + sizeof(GSIHashHeader)) return make_error<RawError>(raw_error_code::corrupt_file, "Publics Stream does not contain a header."); diff --git a/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp new file mode 100644 index 0000000000000..28c4a8fc35d92 --- /dev/null +++ b/lib/DebugInfo/PDB/Native/PublicsStreamBuilder.cpp @@ -0,0 +1,89 @@ +//===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/PublicsStreamBuilder.h" + +#include "llvm/DebugInfo/MSF/MSFBuilder.h" +#include "llvm/DebugInfo/MSF/MSFCommon.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" + +#include "GSI.h" + +using namespace llvm; +using namespace llvm::msf; +using namespace llvm::pdb; + +PublicsStreamBuilder::PublicsStreamBuilder(msf::MSFBuilder &Msf) : Msf(Msf) {} + +PublicsStreamBuilder::~PublicsStreamBuilder() {} + +uint32_t PublicsStreamBuilder::calculateSerializedLength() const { + uint32_t Size = 0; + Size += sizeof(PublicsStreamHeader); + Size += sizeof(GSIHashHeader); + Size += HashRecords.size() * sizeof(PSHashRecord); + size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32); + uint32_t NumBitmapEntries = BitmapSizeInBits / 8; + Size += NumBitmapEntries; + + // FIXME: Account for hash buckets. For now since we we write a zero-bitmap + // indicating that no hash buckets are valid, we also write zero byets of hash + // bucket data. + Size += 0; + return Size; +} + +Error PublicsStreamBuilder::finalizeMsfLayout() { + Expected<uint32_t> Idx = Msf.addStream(calculateSerializedLength()); + if (!Idx) + return Idx.takeError(); + StreamIdx = *Idx; + + Expected<uint32_t> RecordIdx = Msf.addStream(0); + if (!RecordIdx) + return RecordIdx.takeError(); + RecordStreamIdx = *RecordIdx; + return Error::success(); +} + +Error PublicsStreamBuilder::commit(BinaryStreamWriter &PublicsWriter) { + PublicsStreamHeader PSH; + GSIHashHeader GSH; + + // FIXME: Figure out what to put for these values. + PSH.AddrMap = 0; + PSH.ISectThunkTable = 0; + PSH.NumSections = 0; + PSH.NumThunks = 0; + PSH.OffThunkTable = 0; + PSH.SizeOfThunk = 0; + PSH.SymHash = 0; + + GSH.VerSignature = GSIHashHeader::HdrSignature; + GSH.VerHdr = GSIHashHeader::HdrVersion; + GSH.HrSize = 0; + GSH.NumBuckets = 0; + + if (auto EC = PublicsWriter.writeObject(PSH)) + return EC; + if (auto EC = PublicsWriter.writeObject(GSH)) + return EC; + if (auto EC = PublicsWriter.writeArray(makeArrayRef(HashRecords))) + return EC; + + size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32); + uint32_t NumBitmapEntries = BitmapSizeInBits / 8; + std::vector<uint8_t> BitmapData(NumBitmapEntries); + // FIXME: Build an actual bitmap + if (auto EC = PublicsWriter.writeBytes(makeArrayRef(BitmapData))) + return EC; + + // FIXME: Write actual hash buckets. + return Error::success(); +} diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index ff8749fbfed48..1164d60ffc104 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -317,7 +317,13 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name, raw_string_ostream MangledNameStream(MangledName); Mangler::getNameWithPrefix(MangledNameStream, Name, getDataLayout()); } - return findSymbol(MangledName, CheckFunctionsOnly).getAddress(); + if (auto Sym = findSymbol(MangledName, CheckFunctionsOnly)) { + if (auto AddrOrErr = Sym.getAddress()) + return *AddrOrErr; + else + report_fatal_error(AddrOrErr.takeError()); + } else + report_fatal_error(Sym.takeError()); } JITSymbol MCJIT::findSymbol(const std::string &Name, @@ -599,11 +605,12 @@ GenericValue MCJIT::runFunction(Function *F, ArrayRef<GenericValue> ArgValues) { void *MCJIT::getPointerToNamedFunction(StringRef Name, bool AbortOnFailure) { if (!isSymbolSearchingDisabled()) { - void *ptr = - reinterpret_cast<void*>( - static_cast<uintptr_t>(Resolver.findSymbol(Name).getAddress())); - if (ptr) - return ptr; + if (auto Sym = Resolver.findSymbol(Name)) { + if (auto AddrOrErr = Sym.getAddress()) + return reinterpret_cast<void*>( + static_cast<uintptr_t>(*AddrOrErr)); + } else if (auto Err = Sym.takeError()) + report_fatal_error(std::move(Err)); } /// If a LazyFunctionCreator is installed, use it to get/create the function. diff --git a/lib/ExecutionEngine/Orc/OrcCBindings.cpp b/lib/ExecutionEngine/Orc/OrcCBindings.cpp index 5fe259f80b6fb..de80cb1d0dd4c 100644 --- a/lib/ExecutionEngine/Orc/OrcCBindings.cpp +++ b/lib/ExecutionEngine/Orc/OrcCBindings.cpp @@ -60,12 +60,13 @@ void LLVMOrcGetMangledSymbol(LLVMOrcJITStackRef JITStack, char **MangledName, void LLVMOrcDisposeMangledSymbol(char *MangledName) { delete[] MangledName; } -LLVMOrcTargetAddress +LLVMOrcErrorCode LLVMOrcCreateLazyCompileCallback(LLVMOrcJITStackRef JITStack, + LLVMOrcTargetAddress *RetAddr, LLVMOrcLazyCompileCallbackFn Callback, void *CallbackCtx) { OrcCBindingsStack &J = *unwrap(JITStack); - return J.createLazyCompileCallback(Callback, CallbackCtx); + return J.createLazyCompileCallback(*RetAddr, Callback, CallbackCtx); } LLVMOrcErrorCode LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack, @@ -82,38 +83,44 @@ LLVMOrcErrorCode LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack, return J.setIndirectStubPointer(StubName, NewAddr); } -LLVMOrcModuleHandle +LLVMOrcErrorCode LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack, + LLVMOrcModuleHandle *RetHandle, LLVMSharedModuleRef Mod, LLVMOrcSymbolResolverFn SymbolResolver, void *SymbolResolverCtx) { OrcCBindingsStack &J = *unwrap(JITStack); std::shared_ptr<Module> *M(unwrap(Mod)); - return J.addIRModuleEager(*M, SymbolResolver, SymbolResolverCtx); + return J.addIRModuleEager(*RetHandle, *M, SymbolResolver, SymbolResolverCtx); } -LLVMOrcModuleHandle +LLVMOrcErrorCode LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack, + LLVMOrcModuleHandle *RetHandle, LLVMSharedModuleRef Mod, LLVMOrcSymbolResolverFn SymbolResolver, void *SymbolResolverCtx) { OrcCBindingsStack &J = *unwrap(JITStack); std::shared_ptr<Module> *M(unwrap(Mod)); - return J.addIRModuleLazy(*M, SymbolResolver, SymbolResolverCtx); + return J.addIRModuleLazy(*RetHandle, *M, SymbolResolver, SymbolResolverCtx); } -void LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, LLVMOrcModuleHandle H) { +LLVMOrcErrorCode LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, + LLVMOrcModuleHandle H) { OrcCBindingsStack &J = *unwrap(JITStack); - J.removeModule(H); + return J.removeModule(H); } -LLVMOrcTargetAddress LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack, - const char *SymbolName) { +LLVMOrcErrorCode LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack, + LLVMOrcTargetAddress *RetAddr, + const char *SymbolName) { OrcCBindingsStack &J = *unwrap(JITStack); - auto Sym = J.findSymbol(SymbolName, true); - return Sym.getAddress(); + return J.findSymbolAddress(*RetAddr, SymbolName, true); } -void LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack) { - delete unwrap(JITStack); +LLVMOrcErrorCode LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack) { + auto *J = unwrap(JITStack); + auto Err = J->shutdown(); + delete J; + return Err; } diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h index 931d0a9eb2ade..e38decf94f3e9 100644 --- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h +++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h @@ -70,7 +70,7 @@ private: virtual JITSymbol findSymbolIn(const std::string &Name, bool ExportedSymbolsOnly) = 0; - virtual void removeModule() = 0; + virtual Error removeModule() = 0; }; template <typename LayerT> class GenericHandleImpl : public GenericHandle { @@ -83,7 +83,7 @@ private: return Layer.findSymbolIn(Handle, Name, ExportedSymbolsOnly); } - void removeModule() override { return Layer.removeModule(Handle); } + Error removeModule() override { return Layer.removeModule(Handle); } private: LayerT &Layer; @@ -105,6 +105,10 @@ public: IndirectStubsManagerBuilder IndirectStubsMgrBuilder) : DL(TM.createDataLayout()), IndirectStubsMgr(IndirectStubsMgrBuilder()), CCMgr(std::move(CCMgr)), + ObjectLayer( + []() { + return std::make_shared<SectionMemoryManager>(); + }), CompileLayer(ObjectLayer, orc::SimpleCompiler(TM)), CODLayer(CompileLayer, [](Function &F) { return std::set<Function *>({&F}); }, @@ -112,12 +116,14 @@ public: CXXRuntimeOverrides( [this](const std::string &S) { return mangle(S); }) {} - ~OrcCBindingsStack() { + LLVMOrcErrorCode shutdown() { // Run any destructors registered with __cxa_atexit. CXXRuntimeOverrides.runDestructors(); // Run any IR destructors. for (auto &DtorRunner : IRStaticDestructorRunners) - DtorRunner.runViaLayer(*this); + if (auto Err = DtorRunner.runViaLayer(*this)) + return mapError(std::move(Err)); + return LLVMOrcErrSuccess; } std::string mangle(StringRef Name) { @@ -134,14 +140,17 @@ public: return reinterpret_cast<PtrTy>(static_cast<uintptr_t>(Addr)); } - JITTargetAddress - createLazyCompileCallback(LLVMOrcLazyCompileCallbackFn Callback, + + LLVMOrcErrorCode + createLazyCompileCallback(JITTargetAddress &RetAddr, + LLVMOrcLazyCompileCallbackFn Callback, void *CallbackCtx) { auto CCInfo = CCMgr->getCompileCallback(); CCInfo.setCompileAction([=]() -> JITTargetAddress { return Callback(wrap(this), CallbackCtx); }); - return CCInfo.getAddress(); + RetAddr = CCInfo.getAddress(); + return LLVMOrcErrSuccess; } LLVMOrcErrorCode createIndirectStub(StringRef StubName, @@ -155,12 +164,12 @@ public: return mapError(IndirectStubsMgr->updatePointer(Name, Addr)); } - std::unique_ptr<JITSymbolResolver> + std::shared_ptr<JITSymbolResolver> createResolver(LLVMOrcSymbolResolverFn ExternalResolver, void *ExternalResolverCtx) { return orc::createLambdaResolver( [this, ExternalResolver, ExternalResolverCtx](const std::string &Name) - -> JITSymbol { + -> JITSymbol { // Search order: // 1. JIT'd symbols. // 2. Runtime overrides. @@ -168,6 +177,9 @@ public: if (auto Sym = CODLayer.findSymbol(Name, true)) return Sym; + else if (auto Err = Sym.takeError()) + return Sym.takeError(); + if (auto Sym = CXXRuntimeOverrides.searchOverrides(Name)) return Sym; @@ -178,16 +190,19 @@ public: return JITSymbol(nullptr); }, - [](const std::string &Name) { + [](const std::string &Name) -> JITSymbol { return JITSymbol(nullptr); }); } template <typename LayerT> - ModuleHandleT addIRModule(LayerT &Layer, std::shared_ptr<Module> M, - std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr, - LLVMOrcSymbolResolverFn ExternalResolver, - void *ExternalResolverCtx) { + LLVMOrcErrorCode + addIRModule(ModuleHandleT &RetHandle, LayerT &Layer, + std::shared_ptr<Module> M, + std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr, + LLVMOrcSymbolResolverFn ExternalResolver, + void *ExternalResolverCtx) { + // Attach a data-layout if one isn't already present. if (M->getDataLayout().isDefault()) M->setDataLayout(DL); @@ -204,43 +219,52 @@ public: auto Resolver = createResolver(ExternalResolver, ExternalResolverCtx); // Add the module to the JIT. - auto LH = Layer.addModule(std::move(M), std::move(MemMgr), - std::move(Resolver)); - ModuleHandleT H = createHandle(Layer, LH); + ModuleHandleT H; + if (auto LHOrErr = Layer.addModule(std::move(M), std::move(Resolver))) + H = createHandle(Layer, *LHOrErr); + else + return mapError(LHOrErr.takeError()); // Run the static constructors, and save the static destructor runner for // execution when the JIT is torn down. orc::CtorDtorRunner<OrcCBindingsStack> CtorRunner(std::move(CtorNames), H); - CtorRunner.runViaLayer(*this); + if (auto Err = CtorRunner.runViaLayer(*this)) + return mapError(std::move(Err)); IRStaticDestructorRunners.emplace_back(std::move(DtorNames), H); - return H; + RetHandle = H; + return LLVMOrcErrSuccess; } - ModuleHandleT addIRModuleEager(std::shared_ptr<Module> M, - LLVMOrcSymbolResolverFn ExternalResolver, - void *ExternalResolverCtx) { - return addIRModule(CompileLayer, std::move(M), + LLVMOrcErrorCode addIRModuleEager(ModuleHandleT &RetHandle, + std::shared_ptr<Module> M, + LLVMOrcSymbolResolverFn ExternalResolver, + void *ExternalResolverCtx) { + return addIRModule(RetHandle, CompileLayer, std::move(M), llvm::make_unique<SectionMemoryManager>(), std::move(ExternalResolver), ExternalResolverCtx); } - ModuleHandleT addIRModuleLazy(std::shared_ptr<Module> M, - LLVMOrcSymbolResolverFn ExternalResolver, - void *ExternalResolverCtx) { - return addIRModule(CODLayer, std::move(M), + LLVMOrcErrorCode addIRModuleLazy(ModuleHandleT &RetHandle, + std::shared_ptr<Module> M, + LLVMOrcSymbolResolverFn ExternalResolver, + void *ExternalResolverCtx) { + return addIRModule(RetHandle, CODLayer, std::move(M), llvm::make_unique<SectionMemoryManager>(), std::move(ExternalResolver), ExternalResolverCtx); } - void removeModule(ModuleHandleT H) { - GenericHandles[H]->removeModule(); + LLVMOrcErrorCode removeModule(ModuleHandleT H) { + if (auto Err = GenericHandles[H]->removeModule()) + return mapError(std::move(Err)); GenericHandles[H] = nullptr; FreeHandleIndexes.push_back(H); + return LLVMOrcErrSuccess; } - JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) { + JITSymbol findSymbol(const std::string &Name, + bool ExportedSymbolsOnly) { if (auto Sym = IndirectStubsMgr->findStub(Name, ExportedSymbolsOnly)) return Sym; return CODLayer.findSymbol(mangle(Name), ExportedSymbolsOnly); @@ -251,6 +275,26 @@ public: return GenericHandles[H]->findSymbolIn(Name, ExportedSymbolsOnly); } + LLVMOrcErrorCode findSymbolAddress(JITTargetAddress &RetAddr, + const std::string &Name, + bool ExportedSymbolsOnly) { + RetAddr = 0; + if (auto Sym = findSymbol(Name, ExportedSymbolsOnly)) { + // Successful lookup, non-null symbol: + if (auto AddrOrErr = Sym.getAddress()) { + RetAddr = *AddrOrErr; + return LLVMOrcErrSuccess; + } else + return mapError(AddrOrErr.takeError()); + } else if (auto Err = Sym.takeError()) { + // Lookup failure - report error. + return mapError(std::move(Err)); + } + // Otherwise we had a successful lookup but got a null result. We already + // set RetAddr to '0' above, so just return success. + return LLVMOrcErrSuccess; + } + const std::string &getErrorMessage() const { return ErrMsg; } private: diff --git a/lib/ExecutionEngine/Orc/OrcError.cpp b/lib/ExecutionEngine/Orc/OrcError.cpp index 9e70c4ac1dbff..df2d320e0f7aa 100644 --- a/lib/ExecutionEngine/Orc/OrcError.cpp +++ b/lib/ExecutionEngine/Orc/OrcError.cpp @@ -45,6 +45,8 @@ public: return "Could not negotiate RPC function"; case OrcErrorCode::RPCResponseAbandoned: return "RPC response abandoned"; + case OrcErrorCode::JITSymbolNotFound: + return "JIT symbol not found"; case OrcErrorCode::UnexpectedRPCCall: return "Unexpected RPC call"; case OrcErrorCode::UnexpectedRPCResponse: @@ -63,10 +65,29 @@ static ManagedStatic<OrcErrorCategory> OrcErrCat; namespace llvm { namespace orc { +char JITSymbolNotFound::ID = 0; + std::error_code orcError(OrcErrorCode ErrCode) { typedef std::underlying_type<OrcErrorCode>::type UT; return std::error_code(static_cast<UT>(ErrCode), *OrcErrCat); } +JITSymbolNotFound::JITSymbolNotFound(std::string SymbolName) + : SymbolName(std::move(SymbolName)) {} + +std::error_code JITSymbolNotFound::convertToErrorCode() const { + typedef std::underlying_type<OrcErrorCode>::type UT; + return std::error_code(static_cast<UT>(OrcErrorCode::JITSymbolNotFound), + *OrcErrCat); +} + +void JITSymbolNotFound::log(raw_ostream &OS) const { + OS << "Could not find symbol '" << SymbolName << "'"; +} + +const std::string &JITSymbolNotFound::getSymbolName() const { + return SymbolName; +} + } } diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h index 690276232a6f8..346a40405ff18 100644 --- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h +++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h @@ -172,10 +172,13 @@ public: std::shared_ptr<JITSymbolResolver> ClientResolver, std::unique_ptr<TargetMachine> TM) : ExecutionEngine(TM->createDataLayout()), TM(std::move(TM)), - MemMgr(*this, std::move(MemMgr)), Resolver(*this), + MemMgr(std::make_shared<MCJITReplacementMemMgr>(*this, + std::move(MemMgr))), + Resolver(std::make_shared<LinkingResolver>(*this)), ClientResolver(std::move(ClientResolver)), NotifyObjectLoaded(*this), NotifyFinalized(*this), - ObjectLayer(NotifyObjectLoaded, NotifyFinalized), + ObjectLayer([this]() { return this->MemMgr; }, NotifyObjectLoaded, + NotifyFinalized), CompileLayer(ObjectLayer, SimpleCompiler(*this->TM)), LazyEmitLayer(CompileLayer) {} @@ -199,20 +202,20 @@ public: delete Mod; }; LocalModules.push_back(std::shared_ptr<Module>(MPtr, std::move(Deleter))); - LazyEmitLayer.addModule(LocalModules.back(), &MemMgr, &Resolver); + cantFail(LazyEmitLayer.addModule(LocalModules.back(), Resolver)); } void addObjectFile(std::unique_ptr<object::ObjectFile> O) override { auto Obj = std::make_shared<object::OwningBinary<object::ObjectFile>>(std::move(O), nullptr); - ObjectLayer.addObject(std::move(Obj), &MemMgr, &Resolver); + cantFail(ObjectLayer.addObject(std::move(Obj), Resolver)); } void addObjectFile(object::OwningBinary<object::ObjectFile> O) override { auto Obj = std::make_shared<object::OwningBinary<object::ObjectFile>>(std::move(O)); - ObjectLayer.addObject(std::move(Obj), &MemMgr, &Resolver); + cantFail(ObjectLayer.addObject(std::move(Obj), Resolver)); } void addArchive(object::OwningBinary<object::Archive> A) override { @@ -231,7 +234,7 @@ public: } uint64_t getSymbolAddress(StringRef Name) { - return findSymbol(Name).getAddress(); + return cantFail(findSymbol(Name).getAddress()); } JITSymbol findSymbol(StringRef Name) { @@ -320,7 +323,7 @@ private: auto Obj = std::make_shared<object::OwningBinary<object::ObjectFile>>( std::move(ChildObj), nullptr); - ObjectLayer.addObject(std::move(Obj), &MemMgr, &Resolver); + cantFail(ObjectLayer.addObject(std::move(Obj), Resolver)); if (auto Sym = ObjectLayer.findSymbol(Name, true)) return Sym; } @@ -341,7 +344,7 @@ private: const LoadedObjectInfo &Info) const { M.UnfinalizedSections[H] = std::move(M.SectionsAllocatedSinceLastLoad); M.SectionsAllocatedSinceLastLoad = SectionAddrSet(); - M.MemMgr.notifyObjectLoaded(&M, *Obj->getBinary()); + M.MemMgr->notifyObjectLoaded(&M, *Obj->getBinary()); } private: OrcMCJITReplacement &M; @@ -373,8 +376,8 @@ private: using LazyEmitLayerT = LazyEmittingLayer<CompileLayerT>; std::unique_ptr<TargetMachine> TM; - MCJITReplacementMemMgr MemMgr; - LinkingResolver Resolver; + std::shared_ptr<MCJITReplacementMemMgr> MemMgr; + std::shared_ptr<LinkingResolver> Resolver; std::shared_ptr<JITSymbolResolver> ClientResolver; Mangler Mang; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 2b69f1a0269fd..8198836f7a0c9 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -128,7 +128,10 @@ void RuntimeDyldImpl::resolveRelocations() { ); // First, resolve relocations associated with external symbols. - resolveExternalSymbols(); + if (auto Err = resolveExternalSymbols()) { + HasError = true; + ErrorStr = toString(std::move(Err)); + } // Iterate over all outstanding relocations for (auto it = Relocations.begin(), e = Relocations.end(); it != e; ++it) { @@ -243,9 +246,11 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { continue; // Then check the symbol resolver to see if there's a definition // elsewhere in this logical dylib. - if (auto Sym = Resolver.findSymbolInLogicalDylib(Name)) + if (auto Sym = Resolver.findSymbolInLogicalDylib(Name)) { if (Sym.getFlags().isStrongDefinition()) continue; + } else if (auto Err = Sym.takeError()) + return std::move(Err); // else JITSymFlags &= ~JITSymbolFlags::Weak; } @@ -953,7 +958,7 @@ void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs, } } -void RuntimeDyldImpl::resolveExternalSymbols() { +Error RuntimeDyldImpl::resolveExternalSymbols() { while (!ExternalSymbolRelocations.empty()) { StringMap<RelocationList>::iterator i = ExternalSymbolRelocations.begin(); @@ -971,10 +976,24 @@ void RuntimeDyldImpl::resolveExternalSymbols() { // This is an external symbol, try to get its address from the symbol // resolver. // First search for the symbol in this logical dylib. - Addr = Resolver.findSymbolInLogicalDylib(Name.data()).getAddress(); + if (auto Sym = Resolver.findSymbolInLogicalDylib(Name.data())) { + if (auto AddrOrErr = Sym.getAddress()) + Addr = *AddrOrErr; + else + return AddrOrErr.takeError(); + } else if (auto Err = Sym.takeError()) + return Err; + // If that fails, try searching for an external symbol. - if (!Addr) - Addr = Resolver.findSymbol(Name.data()).getAddress(); + if (!Addr) { + if (auto Sym = Resolver.findSymbol(Name.data())) { + if (auto AddrOrErr = Sym.getAddress()) + Addr = *AddrOrErr; + else + return AddrOrErr.takeError(); + } else if (auto Err = Sym.takeError()) + return Err; + } // The call to getSymbolAddress may have caused additional modules to // be loaded, which may have added new entries to the // ExternalSymbolRelocations map. Consquently, we need to update our @@ -1009,6 +1028,8 @@ void RuntimeDyldImpl::resolveExternalSymbols() { ExternalSymbolRelocations.erase(i); } + + return Error::success(); } //===----------------------------------------------------------------------===// diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp index 1bd28ef37ed1c..1c54ad6fb03f8 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp @@ -27,9 +27,12 @@ using namespace llvm::object; namespace { class LoadedCOFFObjectInfo final - : public RuntimeDyld::LoadedObjectInfoHelper<LoadedCOFFObjectInfo> { + : public LoadedObjectInfoHelper<LoadedCOFFObjectInfo, + RuntimeDyld::LoadedObjectInfo> { public: - LoadedCOFFObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap) + LoadedCOFFObjectInfo( + RuntimeDyldImpl &RTDyld, + RuntimeDyld::LoadedObjectInfo::ObjSectionToIDMap ObjSecToIDMap) : LoadedObjectInfoHelper(RTDyld, std::move(ObjSecToIDMap)) {} OwningBinary<ObjectFile> diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp index e45fdc7aee18a..5bc7434e703f6 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp @@ -742,7 +742,7 @@ uint64_t RuntimeDyldCheckerImpl::getSymbolLocalAddr(StringRef Symbol) const { uint64_t RuntimeDyldCheckerImpl::getSymbolRemoteAddr(StringRef Symbol) const { if (auto InternalSymbol = getRTDyld().getSymbol(Symbol)) return InternalSymbol.getAddress(); - return getRTDyld().Resolver.findSymbol(Symbol).getAddress(); + return cantFail(getRTDyld().Resolver.findSymbol(Symbol).getAddress()); } uint64_t RuntimeDyldCheckerImpl::readMemoryAtAddr(uint64_t SrcAddr, diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 8b6f9bef66df9..77c968401c160 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -123,7 +123,8 @@ void DyldELFObject<ELFT>::updateSymbolAddress(const SymbolRef &SymRef, } class LoadedELFObjectInfo final - : public RuntimeDyld::LoadedObjectInfoHelper<LoadedELFObjectInfo> { + : public LoadedObjectInfoHelper<LoadedELFObjectInfo, + RuntimeDyld::LoadedObjectInfo> { public: LoadedELFObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap) : LoadedObjectInfoHelper(RTDyld, std::move(ObjSecToIDMap)) {} diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 5268bc5a18684..95b04fd932511 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -417,7 +417,7 @@ protected: StubMap &Stubs) = 0; /// \brief Resolve relocations to external symbols. - void resolveExternalSymbols(); + Error resolveExternalSymbols(); // \brief Compute an upper bound of the memory that is required to load all // sections diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index 00541e8c06fea..80e9c7ac18aac 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -27,7 +27,8 @@ using namespace llvm::object; namespace { class LoadedMachOObjectInfo final - : public RuntimeDyld::LoadedObjectInfoHelper<LoadedMachOObjectInfo> { + : public LoadedObjectInfoHelper<LoadedMachOObjectInfo, + RuntimeDyld::LoadedObjectInfo> { public: LoadedMachOObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap) diff --git a/lib/Fuzzer/CMakeLists.txt b/lib/Fuzzer/CMakeLists.txt index b886021aee3fd..fa743c280e861 100644 --- a/lib/Fuzzer/CMakeLists.txt +++ b/lib/Fuzzer/CMakeLists.txt @@ -13,6 +13,7 @@ if( APPLE ) endif() endif() +set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS}") if( LLVM_USE_SANITIZE_COVERAGE ) if(NOT "${LLVM_USE_SANITIZER}" STREQUAL "Address") message(FATAL_ERROR @@ -20,7 +21,6 @@ if( LLVM_USE_SANITIZE_COVERAGE ) "LLVM_USE_SANITIZE_COVERAGE=YES to be set." ) endif() - set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS}") # Disable the coverage and sanitizer instrumentation for the fuzzer itself. set(CMAKE_CXX_FLAGS "${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=trace-pc-guard,edge,trace-cmp,indirect-calls,8bit-counters -Werror") diff --git a/lib/Fuzzer/FuzzerCorpus.h b/lib/Fuzzer/FuzzerCorpus.h index 0f0573994a035..218ae5b6ac4d4 100644 --- a/lib/Fuzzer/FuzzerCorpus.h +++ b/lib/Fuzzer/FuzzerCorpus.h @@ -34,6 +34,7 @@ struct InputInfo { size_t NumExecutedMutations = 0; size_t NumSuccessfullMutations = 0; bool MayDeleteFile = false; + std::vector<uint32_t> FeatureSet; }; class InputCorpus { @@ -68,24 +69,84 @@ class InputCorpus { } bool empty() const { return Inputs.empty(); } const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; } - void AddToCorpus(const Unit &U, size_t NumFeatures, - bool MayDeleteFile = false) { + void AddToCorpus(const Unit &U, size_t NumFeatures, bool MayDeleteFile, + const std::vector<uint32_t> &FeatureSet) { assert(!U.empty()); - uint8_t Hash[kSHA1NumBytes]; if (FeatureDebug) Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures); - ComputeSHA1(U.data(), U.size(), Hash); - Hashes.insert(Sha1ToString(Hash)); Inputs.push_back(new InputInfo()); InputInfo &II = *Inputs.back(); II.U = U; II.NumFeatures = NumFeatures; II.MayDeleteFile = MayDeleteFile; - memcpy(II.Sha1, Hash, kSHA1NumBytes); + II.FeatureSet = FeatureSet; + ComputeSHA1(U.data(), U.size(), II.Sha1); + Hashes.insert(Sha1ToString(II.Sha1)); UpdateCorpusDistribution(); + PrintCorpus(); // ValidateFeatureSet(); } + // Debug-only + void PrintUnit(const Unit &U) { + if (!FeatureDebug) return; + for (uint8_t C : U) { + if (C != 'F' && C != 'U' && C != 'Z') + C = '.'; + Printf("%c", C); + } + } + + // Debug-only + void PrintFeatureSet(const std::vector<uint32_t> &FeatureSet) { + if (!FeatureDebug) return; + Printf("{"); + for (uint32_t Feature: FeatureSet) + Printf("%u,", Feature); + Printf("}"); + } + + // Debug-only + void PrintCorpus() { + if (!FeatureDebug) return; + Printf("======= CORPUS:\n"); + int i = 0; + for (auto II : Inputs) { + if (std::find(II->U.begin(), II->U.end(), 'F') != II->U.end()) { + Printf("[%2d] ", i); + Printf("%s sz=%zd ", Sha1ToString(II->Sha1).c_str(), II->U.size()); + PrintUnit(II->U); + Printf(" "); + PrintFeatureSet(II->FeatureSet); + Printf("\n"); + } + i++; + } + } + + // If FeatureSet is that same as in II, replace II->U with {Data,Size}. + bool TryToReplace(InputInfo *II, const uint8_t *Data, size_t Size, + const std::vector<uint32_t> &FeatureSet) { + if (II->U.size() > Size && II->FeatureSet.size() && + II->FeatureSet == FeatureSet) { + if (FeatureDebug) + Printf("Replace: %zd => %zd\n", II->U.size(), Size); + Replace(II, {Data, Data + Size}); + PrintCorpus(); + return true; + } + return false; + } + + void Replace(InputInfo *II, const Unit &U) { + assert(II->U.size()); + Hashes.erase(Sha1ToString(II->Sha1)); + DeleteFile(*II); + ComputeSHA1(U.data(), U.size(), II->Sha1); + Hashes.insert(Sha1ToString(II->Sha1)); + II->U = U; + } + bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); } bool HasUnit(const std::string &H) { return Hashes.count(H); } InputInfo &ChooseUnitToMutate(Random &Rand) { @@ -124,10 +185,14 @@ class InputCorpus { Printf("\n"); } - void DeleteInput(size_t Idx) { - InputInfo &II = *Inputs[Idx]; + void DeleteFile(const InputInfo &II) { if (!OutputCorpus.empty() && II.MayDeleteFile) RemoveFile(DirPlusFile(OutputCorpus, Sha1ToString(II.Sha1))); + } + + void DeleteInput(size_t Idx) { + InputInfo &II = *Inputs[Idx]; + DeleteFile(II); Unit().swap(II.U); if (FeatureDebug) Printf("EVICTED %zd\n", Idx); diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp index 0453a7f443b53..87968893853e4 100644 --- a/lib/Fuzzer/FuzzerDriver.cpp +++ b/lib/Fuzzer/FuzzerDriver.cpp @@ -265,7 +265,7 @@ int RunOneTest(Fuzzer *F, const char *InputFilePath, size_t MaxLen) { Unit U = FileToVector(InputFilePath); if (MaxLen && MaxLen < U.size()) U.resize(MaxLen); - F->RunOne(U.data(), U.size()); + F->ExecuteCallback(U.data(), U.size()); F->TryDetectingAMemoryLeak(U.data(), U.size(), true); return 0; } @@ -441,7 +441,6 @@ int MinimizeCrashInputInternalStep(Fuzzer *F, InputCorpus *Corpus) { Printf("INFO: The input is small enough, exiting\n"); exit(0); } - Corpus->AddToCorpus(U, 0); F->SetMaxInputLen(U.size()); F->SetMaxMutationLen(U.size() - 1); F->MinimizeCrashLoop(U); @@ -572,6 +571,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { Options.UseCmp = Flags.use_cmp; Options.UseValueProfile = Flags.use_value_profile; Options.Shrink = Flags.shrink; + Options.ReduceInputs = Flags.reduce_inputs; Options.ShuffleAtStartUp = Flags.shuffle; Options.PreferSmall = Flags.prefer_small; Options.ReloadIntervalSec = Flags.reload; @@ -657,7 +657,7 @@ int FuzzerDriver(int *argc, char ***argv, UserCallback Callback) { size_t Size = SMR.ReadByteArraySize(); SMR.WriteByteArray(nullptr, 0); const Unit tmp(SMR.GetByteArray(), SMR.GetByteArray() + Size); - F->RunOne(tmp.data(), tmp.size()); + F->ExecuteCallback(tmp.data(), tmp.size()); SMR.PostServer(); } return 0; diff --git a/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp b/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp index 7b02b6f0b701b..503f0395cf8f8 100644 --- a/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp +++ b/lib/Fuzzer/FuzzerExtFunctionsWeak.cpp @@ -41,7 +41,8 @@ namespace fuzzer { ExternalFunctions::ExternalFunctions() { #define EXT_FUNC(NAME, RETURN_TYPE, FUNC_SIG, WARN) \ this->NAME = ::NAME; \ - CheckFnPtr((void *)::NAME, #NAME, WARN); + CheckFnPtr(reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(::NAME)), \ + #NAME, WARN); #include "FuzzerExtFunctions.def" diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def index 7ff196c8fa960..5e70cbad3cf1f 100644 --- a/lib/Fuzzer/FuzzerFlags.def +++ b/lib/Fuzzer/FuzzerFlags.def @@ -65,7 +65,9 @@ FUZZER_FLAG_INT(use_memmem, 1, FUZZER_FLAG_INT(use_value_profile, 0, "Experimental. Use value profile to guide fuzzing.") FUZZER_FLAG_INT(use_cmp, 1, "Use CMP traces to guide mutations") -FUZZER_FLAG_INT(shrink, 0, "Experimental. Try to shrink corpus elements.") +FUZZER_FLAG_INT(shrink, 0, "Experimental. Try to shrink corpus inputs.") +FUZZER_FLAG_INT(reduce_inputs, 0, "Experimental. " + "Try to reduce the size of inputs wile preserving their full feature sets") FUZZER_FLAG_UNSIGNED(jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn" " this number of jobs in separate worker processes" " with stdout/stderr redirected to fuzz-JOB.log.") diff --git a/lib/Fuzzer/FuzzerIOWindows.cpp b/lib/Fuzzer/FuzzerIOWindows.cpp index 75d4e3a06071e..742520267b73f 100644 --- a/lib/Fuzzer/FuzzerIOWindows.cpp +++ b/lib/Fuzzer/FuzzerIOWindows.cpp @@ -182,7 +182,7 @@ static size_t ParseFileName(const std::string &FileName, const size_t Offset) { return Pos - Offset; } -// Parse a directory ending in separator, like: SomeDir\ +// Parse a directory ending in separator, like: `SomeDir\` // Returns number of characters considered if successful. static size_t ParseDir(const std::string &FileName, const size_t Offset) { size_t Pos = Offset; @@ -197,7 +197,7 @@ static size_t ParseDir(const std::string &FileName, const size_t Offset) { return Pos - Offset; } -// Parse a servername and share, like: SomeServer\SomeShare\ +// Parse a servername and share, like: `SomeServer\SomeShare\` // Returns number of characters considered if successful. static size_t ParseServerAndShare(const std::string &FileName, const size_t Offset) { diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h index 5f184c2316e2a..a732f895375ed 100644 --- a/lib/Fuzzer/FuzzerInternal.h +++ b/lib/Fuzzer/FuzzerInternal.h @@ -65,7 +65,8 @@ public: static void StaticFileSizeExceedCallback(); void ExecuteCallback(const uint8_t *Data, size_t Size); - size_t RunOne(const uint8_t *Data, size_t Size); + bool RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile = false, + InputInfo *II = nullptr); // Merge Corpora[1:] into Corpora[0]. void Merge(const std::vector<std::string> &Corpora); @@ -95,13 +96,12 @@ private: void InterruptCallback(); void MutateAndTestOne(); void ReportNewCoverage(InputInfo *II, const Unit &U); - size_t RunOne(const Unit &U) { return RunOne(U.data(), U.size()); } + void PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size); void WriteToOutputCorpus(const Unit &U); void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix); void PrintStats(const char *Where, const char *End = "\n", size_t Units = 0); void PrintStatusForNewUnit(const Unit &U); void ShuffleCorpus(UnitVector *V); - void AddToCorpus(const Unit &U); void CheckExitOnSrcPosOrItem(); // Trace-based fuzzing: we run a unit with some kind of tracing @@ -142,6 +142,8 @@ private: size_t MaxInputLen = 0; size_t MaxMutationLen = 0; + std::vector<uint32_t> FeatureSetTmp; + // Need to know our own thread. static thread_local bool IsMyThread; }; diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp index fbf18357ede65..6816f3af8a6f7 100644 --- a/lib/Fuzzer/FuzzerLoop.cpp +++ b/lib/Fuzzer/FuzzerLoop.cpp @@ -22,9 +22,6 @@ #include <set> #if defined(__has_include) -#if __has_include(<sanitizer / coverage_interface.h>) -#include <sanitizer/coverage_interface.h> -#endif #if __has_include(<sanitizer / lsan_interface.h>) #include <sanitizer/lsan_interface.h> #endif @@ -348,11 +345,8 @@ void Fuzzer::RereadOutputCorpus(size_t MaxSize) { if (U.size() > MaxSize) U.resize(MaxSize); if (!Corpus.HasUnit(U)) { - if (size_t NumFeatures = RunOne(U)) { - CheckExitOnSrcPosOrItem(); - Corpus.AddToCorpus(U, NumFeatures); + if (RunOne(U.data(), U.size())) Reloaded = true; - } } } if (Reloaded) @@ -377,10 +371,7 @@ void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) { ExecuteCallback(&dummy, 0); for (const auto &U : *InitialCorpus) { - if (size_t NumFeatures = RunOne(U)) { - CheckExitOnSrcPosOrItem(); - Corpus.AddToCorpus(U, NumFeatures); - } + RunOne(U.data(), U.size()); TryDetectingAMemoryLeak(U.data(), U.size(), /*DuringInitialCorpusExecution*/ true); } @@ -392,18 +383,7 @@ void Fuzzer::ShuffleAndMinimize(UnitVector *InitialCorpus) { } } -size_t Fuzzer::RunOne(const uint8_t *Data, size_t Size) { - if (!Size) return 0; - TotalNumberOfRuns++; - - ExecuteCallback(Data, Size); - - size_t NumUpdatesBefore = Corpus.NumFeatureUpdates(); - TPC.CollectFeatures([&](size_t Feature) { - Corpus.AddFeature(Feature, Size, Options.Shrink); - }); - size_t NumUpdatesAfter = Corpus.NumFeatureUpdates(); - +void Fuzzer::PrintPulseAndReportSlowInput(const uint8_t *Data, size_t Size) { auto TimeOfUnit = duration_cast<seconds>(UnitStopTime - UnitStartTime).count(); if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && @@ -415,7 +395,34 @@ size_t Fuzzer::RunOne(const uint8_t *Data, size_t Size) { Printf("Slowest unit: %zd s:\n", TimeOfLongestUnitInSeconds); WriteUnitToFileWithPrefix({Data, Data + Size}, "slow-unit-"); } - return NumUpdatesAfter - NumUpdatesBefore; +} + +bool Fuzzer::RunOne(const uint8_t *Data, size_t Size, bool MayDeleteFile, + InputInfo *II) { + if (!Size) return false; + + ExecuteCallback(Data, Size); + + FeatureSetTmp.clear(); + size_t NumUpdatesBefore = Corpus.NumFeatureUpdates(); + TPC.CollectFeatures([&](size_t Feature) { + Corpus.AddFeature(Feature, Size, Options.Shrink); + if (Options.ReduceInputs) + FeatureSetTmp.push_back(Feature); + }); + PrintPulseAndReportSlowInput(Data, Size); + size_t NumNewFeatures = Corpus.NumFeatureUpdates() - NumUpdatesBefore; + if (NumNewFeatures) { + Corpus.AddToCorpus({Data, Data + Size}, NumNewFeatures, MayDeleteFile, + FeatureSetTmp); + CheckExitOnSrcPosOrItem(); + return true; + } + if (II && Corpus.TryToReplace(II, Data, Size, FeatureSetTmp)) { + CheckExitOnSrcPosOrItem(); + return true; + } + return false; } size_t Fuzzer::GetCurrentUnitInFuzzingThead(const uint8_t **Data) const { @@ -443,6 +450,7 @@ static bool LooseMemeq(const uint8_t *A, const uint8_t *B, size_t Size) { } void Fuzzer::ExecuteCallback(const uint8_t *Data, size_t Size) { + TotalNumberOfRuns++; assert(InFuzzingThread()); if (SMR.IsClient()) SMR.WriteByteArray(Data, Size); @@ -595,12 +603,9 @@ void Fuzzer::MutateAndTestOne() { if (i == 0) StartTraceRecording(); II.NumExecutedMutations++; - if (size_t NumFeatures = RunOne(CurrentUnitData, Size)) { - Corpus.AddToCorpus({CurrentUnitData, CurrentUnitData + Size}, NumFeatures, - /*MayDeleteFile=*/true); + if (RunOne(CurrentUnitData, Size, /*MayDeleteFile=*/true, &II)) ReportNewCoverage(&II, {CurrentUnitData, CurrentUnitData + Size}); - CheckExitOnSrcPosOrItem(); - } + StopTraceRecording(); TryDetectingAMemoryLeak(CurrentUnitData, Size, /*DuringInitialCorpusExecution*/ false); @@ -638,7 +643,8 @@ void Fuzzer::MinimizeCrashLoop(const Unit &U) { for (int i = 0; i < Options.MutateDepth; i++) { size_t NewSize = MD.Mutate(CurrentUnitData, U.size(), MaxMutationLen); assert(NewSize > 0 && NewSize <= MaxMutationLen); - RunOne(CurrentUnitData, NewSize); + ExecuteCallback(CurrentUnitData, NewSize); + PrintPulseAndReportSlowInput(CurrentUnitData, NewSize); TryDetectingAMemoryLeak(CurrentUnitData, NewSize, /*DuringInitialCorpusExecution*/ false); } diff --git a/lib/Fuzzer/FuzzerOptions.h b/lib/Fuzzer/FuzzerOptions.h index b1366789be007..9500235e2b1f3 100644 --- a/lib/Fuzzer/FuzzerOptions.h +++ b/lib/Fuzzer/FuzzerOptions.h @@ -32,6 +32,7 @@ struct FuzzingOptions { bool UseCmp = false; bool UseValueProfile = false; bool Shrink = false; + bool ReduceInputs = false; int ReloadIntervalSec = 1; bool ShuffleAtStartUp = true; bool PreferSmall = true; diff --git a/lib/Fuzzer/FuzzerUtilDarwin.cpp b/lib/Fuzzer/FuzzerUtilDarwin.cpp index 9674368c355ee..2df4872a92069 100644 --- a/lib/Fuzzer/FuzzerUtilDarwin.cpp +++ b/lib/Fuzzer/FuzzerUtilDarwin.cpp @@ -15,6 +15,8 @@ #include <mutex> #include <signal.h> #include <spawn.h> +#include <stdlib.h> +#include <string.h> #include <sys/wait.h> // There is no header for this on macOS so declare here @@ -97,11 +99,16 @@ int ExecuteCommand(const std::string &Command) { pid_t Pid; char **Environ = environ; // Read from global const char *CommandCStr = Command.c_str(); - const char *Argv[] = {"sh", "-c", CommandCStr, NULL}; + char *const Argv[] = { + strdup("sh"), + strdup("-c"), + strdup(CommandCStr), + NULL + }; int ErrorCode = 0, ProcessStatus = 0; // FIXME: We probably shouldn't hardcode the shell path. ErrorCode = posix_spawn(&Pid, "/bin/sh", NULL, &SpawnAttributes, - (char *const *)Argv, Environ); + Argv, Environ); (void)posix_spawnattr_destroy(&SpawnAttributes); if (!ErrorCode) { pid_t SavedPid = Pid; @@ -120,6 +127,8 @@ int ExecuteCommand(const std::string &Command) { // Shell execution failure. ProcessStatus = W_EXITCODE(127, 0); } + for (unsigned i = 0, n = sizeof(Argv) / sizeof(Argv[0]); i < n; ++i) + free(Argv[i]); // Restore the signal handlers of the current process when the last thread // using this function finishes. diff --git a/lib/Fuzzer/test/CMakeLists.txt b/lib/Fuzzer/test/CMakeLists.txt index 1cf6c9502a2b5..30566bdc87aed 100644 --- a/lib/Fuzzer/test/CMakeLists.txt +++ b/lib/Fuzzer/test/CMakeLists.txt @@ -118,6 +118,7 @@ set(Tests SingleStrncmpTest SpamyTest ShrinkControlFlowTest + ShrinkControlFlowSimpleTest ShrinkValueProfileTest StrcmpTest StrncmpOOBTest @@ -271,5 +272,5 @@ add_lit_testsuite(check-fuzzer "Running Fuzzer tests" # Don't add dependencies on Windows. The linker step would fail on Windows, # since cmake will use link.exe for linking and won't include compiler-rt libs. if(NOT MSVC) - add_dependencies(check-fuzzer FileCheck sancov not) + add_dependencies(check-fuzzer FileCheck sancov not llvm-symbolizer) endif() diff --git a/lib/Fuzzer/test/FuzzerUnittest.cpp b/lib/Fuzzer/test/FuzzerUnittest.cpp index 812894fd947f9..1053c28527bfd 100644 --- a/lib/Fuzzer/test/FuzzerUnittest.cpp +++ b/lib/Fuzzer/test/FuzzerUnittest.cpp @@ -5,6 +5,9 @@ // with ASan) involving C++ standard library types when using libcxx. #define _LIBCPP_HAS_NO_ASAN +// Do not attempt to use LLVM ostream from gtest. +#define GTEST_NO_LLVM_RAW_OSTREAM 1 + #include "FuzzerCorpus.h" #include "FuzzerDictionary.h" #include "FuzzerInternal.h" @@ -590,7 +593,7 @@ TEST(Corpus, Distribution) { size_t N = 10; size_t TriesPerUnit = 1<<16; for (size_t i = 0; i < N; i++) - C->AddToCorpus(Unit{ static_cast<uint8_t>(i) }, 0); + C->AddToCorpus(Unit{ static_cast<uint8_t>(i) }, 0, false, {}); std::vector<size_t> Hist(N); for (size_t i = 0; i < N * TriesPerUnit; i++) { diff --git a/lib/Fuzzer/test/ShrinkControlFlowSimpleTest.cpp b/lib/Fuzzer/test/ShrinkControlFlowSimpleTest.cpp new file mode 100644 index 0000000000000..0afd26df23a0f --- /dev/null +++ b/lib/Fuzzer/test/ShrinkControlFlowSimpleTest.cpp @@ -0,0 +1,19 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// Test that we can find the minimal item in the corpus (3 bytes: "FUZ"). +#include <cstddef> +#include <cstdint> +#include <cstdio> +#include <cstdlib> +#include <cstring> + +static volatile int Sink; + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size < 2) return 0; + if (Data[0] == 'F' && Data[Size / 2] == 'U' && Data[Size - 1] == 'Z') + Sink++; + return 0; +} + diff --git a/lib/Fuzzer/test/reduce_inputs.test b/lib/Fuzzer/test/reduce_inputs.test new file mode 100644 index 0000000000000..a4a5c57123d3f --- /dev/null +++ b/lib/Fuzzer/test/reduce_inputs.test @@ -0,0 +1,13 @@ +# Test -reduce_inputs=1 + +RUN: rm -rf %t/C +RUN: mkdir -p %t/C +RUN: LLVMFuzzer-ShrinkControlFlowSimpleTest -exit_on_item=0eb8e4ed029b774d80f2b66408203801cb982a60 -reduce_inputs=1 -runs=1000000 %t/C 2>&1 | FileCheck %s +CHECK: INFO: found item with checksum '0eb8e4ed029b774d80f2b66408203801cb982a60' + +# Test that reduce_inputs deletes redundant files in the corpus. +RUN: LLVMFuzzer-ShrinkControlFlowSimpleTest -runs=0 %t/C 2>&1 | FileCheck %s --check-prefix=COUNT +COUNT: READ units: 3 + + + diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index c7f112887a306..80371780fb6d9 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -2119,6 +2119,8 @@ class AssemblyWriter { bool ShouldPreserveUseListOrder; UseListOrderStack UseListOrders; SmallVector<StringRef, 8> MDNames; + /// Synchronization scope names registered with LLVMContext. + SmallVector<StringRef, 8> SSNs; public: /// Construct an AssemblyWriter with an external SlotTracker @@ -2134,10 +2136,15 @@ public: void writeOperand(const Value *Op, bool PrintType); void writeParamOperand(const Value *Operand, AttributeSet Attrs); void writeOperandBundles(ImmutableCallSite CS); - void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope); - void writeAtomicCmpXchg(AtomicOrdering SuccessOrdering, + void writeSyncScope(const LLVMContext &Context, + SyncScope::ID SSID); + void writeAtomic(const LLVMContext &Context, + AtomicOrdering Ordering, + SyncScope::ID SSID); + void writeAtomicCmpXchg(const LLVMContext &Context, + AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope); + SyncScope::ID SSID); void writeAllMDNodes(); void writeMDNode(unsigned Slot, const MDNode *Node); @@ -2199,30 +2206,42 @@ void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) { WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule); } -void AssemblyWriter::writeAtomic(AtomicOrdering Ordering, - SynchronizationScope SynchScope) { - if (Ordering == AtomicOrdering::NotAtomic) - return; +void AssemblyWriter::writeSyncScope(const LLVMContext &Context, + SyncScope::ID SSID) { + switch (SSID) { + case SyncScope::System: { + break; + } + default: { + if (SSNs.empty()) + Context.getSyncScopeNames(SSNs); - switch (SynchScope) { - case SingleThread: Out << " singlethread"; break; - case CrossThread: break; + Out << " syncscope(\""; + PrintEscapedString(SSNs[SSID], Out); + Out << "\")"; + break; + } } +} + +void AssemblyWriter::writeAtomic(const LLVMContext &Context, + AtomicOrdering Ordering, + SyncScope::ID SSID) { + if (Ordering == AtomicOrdering::NotAtomic) + return; + writeSyncScope(Context, SSID); Out << " " << toIRString(Ordering); } -void AssemblyWriter::writeAtomicCmpXchg(AtomicOrdering SuccessOrdering, +void AssemblyWriter::writeAtomicCmpXchg(const LLVMContext &Context, + AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope) { + SyncScope::ID SSID) { assert(SuccessOrdering != AtomicOrdering::NotAtomic && FailureOrdering != AtomicOrdering::NotAtomic); - switch (SynchScope) { - case SingleThread: Out << " singlethread"; break; - case CrossThread: break; - } - + writeSyncScope(Context, SSID); Out << " " << toIRString(SuccessOrdering); Out << " " << toIRString(FailureOrdering); } @@ -3215,21 +3234,22 @@ void AssemblyWriter::printInstruction(const Instruction &I) { // Print atomic ordering/alignment for memory operations if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) { if (LI->isAtomic()) - writeAtomic(LI->getOrdering(), LI->getSynchScope()); + writeAtomic(LI->getContext(), LI->getOrdering(), LI->getSyncScopeID()); if (LI->getAlignment()) Out << ", align " << LI->getAlignment(); } else if (const StoreInst *SI = dyn_cast<StoreInst>(&I)) { if (SI->isAtomic()) - writeAtomic(SI->getOrdering(), SI->getSynchScope()); + writeAtomic(SI->getContext(), SI->getOrdering(), SI->getSyncScopeID()); if (SI->getAlignment()) Out << ", align " << SI->getAlignment(); } else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(&I)) { - writeAtomicCmpXchg(CXI->getSuccessOrdering(), CXI->getFailureOrdering(), - CXI->getSynchScope()); + writeAtomicCmpXchg(CXI->getContext(), CXI->getSuccessOrdering(), + CXI->getFailureOrdering(), CXI->getSyncScopeID()); } else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I)) { - writeAtomic(RMWI->getOrdering(), RMWI->getSynchScope()); + writeAtomic(RMWI->getContext(), RMWI->getOrdering(), + RMWI->getSyncScopeID()); } else if (const FenceInst *FI = dyn_cast<FenceInst>(&I)) { - writeAtomic(FI->getOrdering(), FI->getSynchScope()); + writeAtomic(FI->getContext(), FI->getOrdering(), FI->getSyncScopeID()); } // Print Metadata info. diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt index 11259cbe18152..1cc229d68bfce 100644 --- a/lib/IR/CMakeLists.txt +++ b/lib/IR/CMakeLists.txt @@ -43,6 +43,7 @@ add_llvm_library(LLVMCore Pass.cpp PassManager.cpp PassRegistry.cpp + SafepointIRVerifier.cpp ProfileSummary.cpp Statepoint.cpp Type.cpp diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index 3469026ad7ed6..23ccd8d4cf424 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -242,7 +242,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, // X | -1 -> -1. if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) - if (RHSC->isAllOnesValue()) + if (RHSC->isMinusOne()) return RHSC; Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize); @@ -1015,33 +1015,33 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) { switch (Opcode) { case Instruction::Add: - if (CI2->equalsInt(0)) return C1; // X + 0 == X + if (CI2->isZero()) return C1; // X + 0 == X break; case Instruction::Sub: - if (CI2->equalsInt(0)) return C1; // X - 0 == X + if (CI2->isZero()) return C1; // X - 0 == X break; case Instruction::Mul: - if (CI2->equalsInt(0)) return C2; // X * 0 == 0 - if (CI2->equalsInt(1)) + if (CI2->isZero()) return C2; // X * 0 == 0 + if (CI2->isOne()) return C1; // X * 1 == X break; case Instruction::UDiv: case Instruction::SDiv: - if (CI2->equalsInt(1)) + if (CI2->isOne()) return C1; // X / 1 == X - if (CI2->equalsInt(0)) + if (CI2->isZero()) return UndefValue::get(CI2->getType()); // X / 0 == undef break; case Instruction::URem: case Instruction::SRem: - if (CI2->equalsInt(1)) + if (CI2->isOne()) return Constant::getNullValue(CI2->getType()); // X % 1 == 0 - if (CI2->equalsInt(0)) + if (CI2->isZero()) return UndefValue::get(CI2->getType()); // X % 0 == undef break; case Instruction::And: if (CI2->isZero()) return C2; // X & 0 == 0 - if (CI2->isAllOnesValue()) + if (CI2->isMinusOne()) return C1; // X & -1 == X if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) { @@ -1078,12 +1078,12 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, } break; case Instruction::Or: - if (CI2->equalsInt(0)) return C1; // X | 0 == X - if (CI2->isAllOnesValue()) + if (CI2->isZero()) return C1; // X | 0 == X + if (CI2->isMinusOne()) return C2; // X | -1 == -1 break; case Instruction::Xor: - if (CI2->equalsInt(0)) return C1; // X ^ 0 == X + if (CI2->isZero()) return C1; // X ^ 0 == X if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) { switch (CE1->getOpcode()) { @@ -1091,7 +1091,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, case Instruction::ICmp: case Instruction::FCmp: // cmp pred ^ true -> cmp !pred - assert(CI2->equalsInt(1)); + assert(CI2->isOne()); CmpInst::Predicate pred = (CmpInst::Predicate)CE1->getPredicate(); pred = CmpInst::getInversePredicate(pred); return ConstantExpr::getCompare(pred, CE1->getOperand(0), @@ -1126,18 +1126,18 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, case Instruction::Mul: return ConstantInt::get(CI1->getContext(), C1V * C2V); case Instruction::UDiv: - assert(!CI2->isNullValue() && "Div by zero handled above"); + assert(!CI2->isZero() && "Div by zero handled above"); return ConstantInt::get(CI1->getContext(), C1V.udiv(C2V)); case Instruction::SDiv: - assert(!CI2->isNullValue() && "Div by zero handled above"); + assert(!CI2->isZero() && "Div by zero handled above"); if (C2V.isAllOnesValue() && C1V.isMinSignedValue()) return UndefValue::get(CI1->getType()); // MIN_INT / -1 -> undef return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V)); case Instruction::URem: - assert(!CI2->isNullValue() && "Div by zero handled above"); + assert(!CI2->isZero() && "Div by zero handled above"); return ConstantInt::get(CI1->getContext(), C1V.urem(C2V)); case Instruction::SRem: - assert(!CI2->isNullValue() && "Div by zero handled above"); + assert(!CI2->isZero() && "Div by zero handled above"); if (C2V.isAllOnesValue() && C1V.isMinSignedValue()) return UndefValue::get(CI1->getType()); // MIN_INT % -1 -> undef return ConstantInt::get(CI1->getContext(), C1V.srem(C2V)); @@ -1170,7 +1170,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, case Instruction::LShr: case Instruction::AShr: case Instruction::Shl: - if (CI1->equalsInt(0)) return C1; + if (CI1->isZero()) return C1; break; default: break; diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index d387a6f0ecb9a..e31779c83e3a3 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -512,7 +512,7 @@ ConstantInt *ConstantInt::getFalse(LLVMContext &Context) { } Constant *ConstantInt::getTrue(Type *Ty) { - assert(Ty->getScalarType()->isIntegerTy(1) && "Type not i1 or vector of i1."); + assert(Ty->isIntOrIntVectorTy(1) && "Type not i1 or vector of i1."); ConstantInt *TrueC = ConstantInt::getTrue(Ty->getContext()); if (auto *VTy = dyn_cast<VectorType>(Ty)) return ConstantVector::getSplat(VTy->getNumElements(), TrueC); @@ -520,7 +520,7 @@ Constant *ConstantInt::getTrue(Type *Ty) { } Constant *ConstantInt::getFalse(Type *Ty) { - assert(Ty->getScalarType()->isIntegerTy(1) && "Type not i1 or vector of i1."); + assert(Ty->isIntOrIntVectorTy(1) && "Type not i1 or vector of i1."); ConstantInt *FalseC = ConstantInt::getFalse(Ty->getContext()); if (auto *VTy = dyn_cast<VectorType>(Ty)) return ConstantVector::getSplat(VTy->getNumElements(), FalseC); @@ -1635,9 +1635,9 @@ Constant *ConstantExpr::getFPToSI(Constant *C, Type *Ty, bool OnlyIfReduced) { Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy, bool OnlyIfReduced) { - assert(C->getType()->getScalarType()->isPointerTy() && + assert(C->getType()->isPtrOrPtrVectorTy() && "PtrToInt source must be pointer or pointer vector"); - assert(DstTy->getScalarType()->isIntegerTy() && + assert(DstTy->isIntOrIntVectorTy() && "PtrToInt destination must be integer or integer vector"); assert(isa<VectorType>(C->getType()) == isa<VectorType>(DstTy)); if (isa<VectorType>(C->getType())) @@ -1648,9 +1648,9 @@ Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy, Constant *ConstantExpr::getIntToPtr(Constant *C, Type *DstTy, bool OnlyIfReduced) { - assert(C->getType()->getScalarType()->isIntegerTy() && + assert(C->getType()->isIntOrIntVectorTy() && "IntToPtr source must be integer or integer vector"); - assert(DstTy->getScalarType()->isPointerTy() && + assert(DstTy->isPtrOrPtrVectorTy() && "IntToPtr destination must be a pointer or pointer vector"); assert(isa<VectorType>(C->getType()) == isa<VectorType>(DstTy)); if (isa<VectorType>(C->getType())) @@ -1914,8 +1914,8 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C, Constant *ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced) { assert(LHS->getType() == RHS->getType()); - assert(pred >= ICmpInst::FIRST_ICMP_PREDICATE && - pred <= ICmpInst::LAST_ICMP_PREDICATE && "Invalid ICmp Predicate"); + assert(CmpInst::isIntPredicate((CmpInst::Predicate)pred) && + "Invalid ICmp Predicate"); if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS)) return FC; // Fold a few common cases... @@ -1939,7 +1939,8 @@ Constant *ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced) { assert(LHS->getType() == RHS->getType()); - assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && "Invalid FCmp Predicate"); + assert(CmpInst::isFPPredicate((CmpInst::Predicate)pred) && + "Invalid FCmp Predicate"); if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS)) return FC; // Fold a few common cases... @@ -2379,32 +2380,32 @@ void ConstantDataSequential::destroyConstantImpl() { Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint8_t> Elts) { Type *Ty = ArrayType::get(Type::getInt8Ty(Context), Elts.size()); const char *Data = reinterpret_cast<const char *>(Elts.data()); - return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*1), Ty); + return getImpl(StringRef(Data, Elts.size() * 1), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint16_t> Elts){ Type *Ty = ArrayType::get(Type::getInt16Ty(Context), Elts.size()); const char *Data = reinterpret_cast<const char *>(Elts.data()); - return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*2), Ty); + return getImpl(StringRef(Data, Elts.size() * 2), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint32_t> Elts){ Type *Ty = ArrayType::get(Type::getInt32Ty(Context), Elts.size()); const char *Data = reinterpret_cast<const char *>(Elts.data()); - return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty); + return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint64_t> Elts){ Type *Ty = ArrayType::get(Type::getInt64Ty(Context), Elts.size()); const char *Data = reinterpret_cast<const char *>(Elts.data()); - return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*8), Ty); + return getImpl(StringRef(Data, Elts.size() * 8), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<float> Elts) { Type *Ty = ArrayType::get(Type::getFloatTy(Context), Elts.size()); const char *Data = reinterpret_cast<const char *>(Elts.data()); - return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty); + return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<double> Elts) { Type *Ty = ArrayType::get(Type::getDoubleTy(Context), Elts.size()); const char *Data = reinterpret_cast<const char *>(Elts.data()); - return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 8), Ty); + return getImpl(StringRef(Data, Elts.size() * 8), Ty); } /// getFP() constructors - Return a constant with array type with an element @@ -2416,27 +2417,26 @@ Constant *ConstantDataArray::getFP(LLVMContext &Context, ArrayRef<uint16_t> Elts) { Type *Ty = ArrayType::get(Type::getHalfTy(Context), Elts.size()); const char *Data = reinterpret_cast<const char *>(Elts.data()); - return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 2), Ty); + return getImpl(StringRef(Data, Elts.size() * 2), Ty); } Constant *ConstantDataArray::getFP(LLVMContext &Context, ArrayRef<uint32_t> Elts) { Type *Ty = ArrayType::get(Type::getFloatTy(Context), Elts.size()); const char *Data = reinterpret_cast<const char *>(Elts.data()); - return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 4), Ty); + return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataArray::getFP(LLVMContext &Context, ArrayRef<uint64_t> Elts) { Type *Ty = ArrayType::get(Type::getDoubleTy(Context), Elts.size()); const char *Data = reinterpret_cast<const char *>(Elts.data()); - return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 8), Ty); + return getImpl(StringRef(Data, Elts.size() * 8), Ty); } Constant *ConstantDataArray::getString(LLVMContext &Context, StringRef Str, bool AddNull) { if (!AddNull) { const uint8_t *Data = reinterpret_cast<const uint8_t *>(Str.data()); - return get(Context, makeArrayRef(const_cast<uint8_t *>(Data), - Str.size())); + return get(Context, makeArrayRef(Data, Str.size())); } SmallVector<uint8_t, 64> ElementVals; @@ -2451,32 +2451,32 @@ Constant *ConstantDataArray::getString(LLVMContext &Context, Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint8_t> Elts){ Type *Ty = VectorType::get(Type::getInt8Ty(Context), Elts.size()); const char *Data = reinterpret_cast<const char *>(Elts.data()); - return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*1), Ty); + return getImpl(StringRef(Data, Elts.size() * 1), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint16_t> Elts){ Type *Ty = VectorType::get(Type::getInt16Ty(Context), Elts.size()); const char *Data = reinterpret_cast<const char *>(Elts.data()); - return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*2), Ty); + return getImpl(StringRef(Data, Elts.size() * 2), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint32_t> Elts){ Type *Ty = VectorType::get(Type::getInt32Ty(Context), Elts.size()); const char *Data = reinterpret_cast<const char *>(Elts.data()); - return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty); + return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint64_t> Elts){ Type *Ty = VectorType::get(Type::getInt64Ty(Context), Elts.size()); const char *Data = reinterpret_cast<const char *>(Elts.data()); - return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*8), Ty); + return getImpl(StringRef(Data, Elts.size() * 8), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<float> Elts) { Type *Ty = VectorType::get(Type::getFloatTy(Context), Elts.size()); const char *Data = reinterpret_cast<const char *>(Elts.data()); - return getImpl(StringRef(const_cast<char *>(Data), Elts.size()*4), Ty); + return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<double> Elts) { Type *Ty = VectorType::get(Type::getDoubleTy(Context), Elts.size()); const char *Data = reinterpret_cast<const char *>(Elts.data()); - return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 8), Ty); + return getImpl(StringRef(Data, Elts.size() * 8), Ty); } /// getFP() constructors - Return a constant with vector type with an element @@ -2488,19 +2488,19 @@ Constant *ConstantDataVector::getFP(LLVMContext &Context, ArrayRef<uint16_t> Elts) { Type *Ty = VectorType::get(Type::getHalfTy(Context), Elts.size()); const char *Data = reinterpret_cast<const char *>(Elts.data()); - return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 2), Ty); + return getImpl(StringRef(Data, Elts.size() * 2), Ty); } Constant *ConstantDataVector::getFP(LLVMContext &Context, ArrayRef<uint32_t> Elts) { Type *Ty = VectorType::get(Type::getFloatTy(Context), Elts.size()); const char *Data = reinterpret_cast<const char *>(Elts.data()); - return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 4), Ty); + return getImpl(StringRef(Data, Elts.size() * 4), Ty); } Constant *ConstantDataVector::getFP(LLVMContext &Context, ArrayRef<uint64_t> Elts) { Type *Ty = VectorType::get(Type::getDoubleTy(Context), Elts.size()); const char *Data = reinterpret_cast<const char *>(Elts.data()); - return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 8), Ty); + return getImpl(StringRef(Data, Elts.size() * 8), Ty); } Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) { @@ -2555,13 +2555,13 @@ uint64_t ConstantDataSequential::getElementAsInteger(unsigned Elt) const { switch (getElementType()->getIntegerBitWidth()) { default: llvm_unreachable("Invalid bitwidth for CDS"); case 8: - return *const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(EltPtr)); + return *reinterpret_cast<const uint8_t *>(EltPtr); case 16: - return *const_cast<uint16_t *>(reinterpret_cast<const uint16_t *>(EltPtr)); + return *reinterpret_cast<const uint16_t *>(EltPtr); case 32: - return *const_cast<uint32_t *>(reinterpret_cast<const uint32_t *>(EltPtr)); + return *reinterpret_cast<const uint32_t *>(EltPtr); case 64: - return *const_cast<uint64_t *>(reinterpret_cast<const uint64_t *>(EltPtr)); + return *reinterpret_cast<const uint64_t *>(EltPtr); } } @@ -2589,16 +2589,13 @@ APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const { float ConstantDataSequential::getElementAsFloat(unsigned Elt) const { assert(getElementType()->isFloatTy() && "Accessor can only be used when element is a 'float'"); - const float *EltPtr = reinterpret_cast<const float *>(getElementPointer(Elt)); - return *const_cast<float *>(EltPtr); + return *reinterpret_cast<const float *>(getElementPointer(Elt)); } double ConstantDataSequential::getElementAsDouble(unsigned Elt) const { assert(getElementType()->isDoubleTy() && "Accessor can only be used when element is a 'float'"); - const double *EltPtr = - reinterpret_cast<const double *>(getElementPointer(Elt)); - return *const_cast<double *>(EltPtr); + return *reinterpret_cast<const double *>(getElementPointer(Elt)); } Constant *ConstantDataSequential::getElementAsConstant(unsigned Elt) const { diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index 4ff0261a7f08f..2165ae5a94702 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -50,6 +50,7 @@ void llvm::initializeCore(PassRegistry &Registry) { initializePrintModulePassWrapperPass(Registry); initializePrintFunctionPassWrapperPass(Registry); initializePrintBasicBlockPassPass(Registry); + initializeSafepointIRVerifierPass(Registry); initializeVerifierLegacyPassPass(Registry); } @@ -2755,11 +2756,14 @@ static LLVMAtomicOrdering mapToLLVMOrdering(AtomicOrdering Ordering) { llvm_unreachable("Invalid AtomicOrdering value!"); } +// TODO: Should this and other atomic instructions support building with +// "syncscope"? LLVMValueRef LLVMBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering Ordering, LLVMBool isSingleThread, const char *Name) { return wrap( unwrap(B)->CreateFence(mapFromLLVMOrdering(Ordering), - isSingleThread ? SingleThread : CrossThread, + isSingleThread ? SyncScope::SingleThread + : SyncScope::System, Name)); } @@ -3041,7 +3045,8 @@ LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op, case LLVMAtomicRMWBinOpUMin: intop = AtomicRMWInst::UMin; break; } return wrap(unwrap(B)->CreateAtomicRMW(intop, unwrap(PTR), unwrap(Val), - mapFromLLVMOrdering(ordering), singleThread ? SingleThread : CrossThread)); + mapFromLLVMOrdering(ordering), singleThread ? SyncScope::SingleThread + : SyncScope::System)); } LLVMValueRef LLVMBuildAtomicCmpXchg(LLVMBuilderRef B, LLVMValueRef Ptr, @@ -3053,7 +3058,7 @@ LLVMValueRef LLVMBuildAtomicCmpXchg(LLVMBuilderRef B, LLVMValueRef Ptr, return wrap(unwrap(B)->CreateAtomicCmpXchg(unwrap(Ptr), unwrap(Cmp), unwrap(New), mapFromLLVMOrdering(SuccessOrdering), mapFromLLVMOrdering(FailureOrdering), - singleThread ? SingleThread : CrossThread)); + singleThread ? SyncScope::SingleThread : SyncScope::System)); } @@ -3061,17 +3066,18 @@ LLVMBool LLVMIsAtomicSingleThread(LLVMValueRef AtomicInst) { Value *P = unwrap<Value>(AtomicInst); if (AtomicRMWInst *I = dyn_cast<AtomicRMWInst>(P)) - return I->getSynchScope() == SingleThread; - return cast<AtomicCmpXchgInst>(P)->getSynchScope() == SingleThread; + return I->getSyncScopeID() == SyncScope::SingleThread; + return cast<AtomicCmpXchgInst>(P)->getSyncScopeID() == + SyncScope::SingleThread; } void LLVMSetAtomicSingleThread(LLVMValueRef AtomicInst, LLVMBool NewValue) { Value *P = unwrap<Value>(AtomicInst); - SynchronizationScope Sync = NewValue ? SingleThread : CrossThread; + SyncScope::ID SSID = NewValue ? SyncScope::SingleThread : SyncScope::System; if (AtomicRMWInst *I = dyn_cast<AtomicRMWInst>(P)) - return I->setSynchScope(Sync); - return cast<AtomicCmpXchgInst>(P)->setSynchScope(Sync); + return I->setSyncScopeID(SSID); + return cast<AtomicCmpXchgInst>(P)->setSyncScopeID(SSID); } LLVMAtomicOrdering LLVMGetCmpXchgSuccessOrdering(LLVMValueRef CmpXchgInst) { diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index 3dd653d2d0473..365cb019aec43 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -362,13 +362,13 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2, (LI->getAlignment() == cast<LoadInst>(I2)->getAlignment() || IgnoreAlignment) && LI->getOrdering() == cast<LoadInst>(I2)->getOrdering() && - LI->getSynchScope() == cast<LoadInst>(I2)->getSynchScope(); + LI->getSyncScopeID() == cast<LoadInst>(I2)->getSyncScopeID(); if (const StoreInst *SI = dyn_cast<StoreInst>(I1)) return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() && (SI->getAlignment() == cast<StoreInst>(I2)->getAlignment() || IgnoreAlignment) && SI->getOrdering() == cast<StoreInst>(I2)->getOrdering() && - SI->getSynchScope() == cast<StoreInst>(I2)->getSynchScope(); + SI->getSyncScopeID() == cast<StoreInst>(I2)->getSyncScopeID(); if (const CmpInst *CI = dyn_cast<CmpInst>(I1)) return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate(); if (const CallInst *CI = dyn_cast<CallInst>(I1)) @@ -386,7 +386,7 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2, return EVI->getIndices() == cast<ExtractValueInst>(I2)->getIndices(); if (const FenceInst *FI = dyn_cast<FenceInst>(I1)) return FI->getOrdering() == cast<FenceInst>(I2)->getOrdering() && - FI->getSynchScope() == cast<FenceInst>(I2)->getSynchScope(); + FI->getSyncScopeID() == cast<FenceInst>(I2)->getSyncScopeID(); if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I1)) return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I2)->isVolatile() && CXI->isWeak() == cast<AtomicCmpXchgInst>(I2)->isWeak() && @@ -394,12 +394,13 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2, cast<AtomicCmpXchgInst>(I2)->getSuccessOrdering() && CXI->getFailureOrdering() == cast<AtomicCmpXchgInst>(I2)->getFailureOrdering() && - CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I2)->getSynchScope(); + CXI->getSyncScopeID() == + cast<AtomicCmpXchgInst>(I2)->getSyncScopeID(); if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I1)) return RMWI->getOperation() == cast<AtomicRMWInst>(I2)->getOperation() && RMWI->isVolatile() == cast<AtomicRMWInst>(I2)->isVolatile() && RMWI->getOrdering() == cast<AtomicRMWInst>(I2)->getOrdering() && - RMWI->getSynchScope() == cast<AtomicRMWInst>(I2)->getSynchScope(); + RMWI->getSyncScopeID() == cast<AtomicRMWInst>(I2)->getSyncScopeID(); return true; } diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index a79b00be4ffe8..2c49564e328bd 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -1304,34 +1304,34 @@ LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, unsigned Align, Instruction *InsertBef) : LoadInst(Ty, Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic, - CrossThread, InsertBef) {} + SyncScope::System, InsertBef) {} LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, unsigned Align, BasicBlock *InsertAE) : LoadInst(Ptr, Name, isVolatile, Align, AtomicOrdering::NotAtomic, - CrossThread, InsertAE) {} + SyncScope::System, InsertAE) {} LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope, Instruction *InsertBef) + SyncScope::ID SSID, Instruction *InsertBef) : UnaryInstruction(Ty, Load, Ptr, InsertBef) { assert(Ty == cast<PointerType>(Ptr->getType())->getElementType()); setVolatile(isVolatile); setAlignment(Align); - setAtomic(Order, SynchScope); + setAtomic(Order, SSID); AssertOK(); setName(Name); } LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope, + SyncScope::ID SSID, BasicBlock *InsertAE) : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(), Load, Ptr, InsertAE) { setVolatile(isVolatile); setAlignment(Align); - setAtomic(Order, SynchScope); + setAtomic(Order, SSID); AssertOK(); setName(Name); } @@ -1419,16 +1419,16 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, Instruction *InsertBefore) : StoreInst(val, addr, isVolatile, Align, AtomicOrdering::NotAtomic, - CrossThread, InsertBefore) {} + SyncScope::System, InsertBefore) {} StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, BasicBlock *InsertAtEnd) : StoreInst(val, addr, isVolatile, Align, AtomicOrdering::NotAtomic, - CrossThread, InsertAtEnd) {} + SyncScope::System, InsertAtEnd) {} StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope, + SyncScope::ID SSID, Instruction *InsertBefore) : Instruction(Type::getVoidTy(val->getContext()), Store, OperandTraits<StoreInst>::op_begin(this), @@ -1438,13 +1438,13 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Op<1>() = addr; setVolatile(isVolatile); setAlignment(Align); - setAtomic(Order, SynchScope); + setAtomic(Order, SSID); AssertOK(); } StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, unsigned Align, AtomicOrdering Order, - SynchronizationScope SynchScope, + SyncScope::ID SSID, BasicBlock *InsertAtEnd) : Instruction(Type::getVoidTy(val->getContext()), Store, OperandTraits<StoreInst>::op_begin(this), @@ -1454,7 +1454,7 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile, Op<1>() = addr; setVolatile(isVolatile); setAlignment(Align); - setAtomic(Order, SynchScope); + setAtomic(Order, SSID); AssertOK(); } @@ -1474,13 +1474,13 @@ void StoreInst::setAlignment(unsigned Align) { void AtomicCmpXchgInst::Init(Value *Ptr, Value *Cmp, Value *NewVal, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope) { + SyncScope::ID SSID) { Op<0>() = Ptr; Op<1>() = Cmp; Op<2>() = NewVal; setSuccessOrdering(SuccessOrdering); setFailureOrdering(FailureOrdering); - setSynchScope(SynchScope); + setSyncScopeID(SSID); assert(getOperand(0) && getOperand(1) && getOperand(2) && "All operands must be non-null!"); @@ -1507,25 +1507,25 @@ void AtomicCmpXchgInst::Init(Value *Ptr, Value *Cmp, Value *NewVal, AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope, + SyncScope::ID SSID, Instruction *InsertBefore) : Instruction( StructType::get(Cmp->getType(), Type::getInt1Ty(Cmp->getContext())), AtomicCmpXchg, OperandTraits<AtomicCmpXchgInst>::op_begin(this), OperandTraits<AtomicCmpXchgInst>::operands(this), InsertBefore) { - Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SynchScope); + Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SSID); } AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope, + SyncScope::ID SSID, BasicBlock *InsertAtEnd) : Instruction( StructType::get(Cmp->getType(), Type::getInt1Ty(Cmp->getContext())), AtomicCmpXchg, OperandTraits<AtomicCmpXchgInst>::op_begin(this), OperandTraits<AtomicCmpXchgInst>::operands(this), InsertAtEnd) { - Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SynchScope); + Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SSID); } //===----------------------------------------------------------------------===// @@ -1534,12 +1534,12 @@ AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, void AtomicRMWInst::Init(BinOp Operation, Value *Ptr, Value *Val, AtomicOrdering Ordering, - SynchronizationScope SynchScope) { + SyncScope::ID SSID) { Op<0>() = Ptr; Op<1>() = Val; setOperation(Operation); setOrdering(Ordering); - setSynchScope(SynchScope); + setSyncScopeID(SSID); assert(getOperand(0) && getOperand(1) && "All operands must be non-null!"); @@ -1554,24 +1554,24 @@ void AtomicRMWInst::Init(BinOp Operation, Value *Ptr, Value *Val, AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, AtomicOrdering Ordering, - SynchronizationScope SynchScope, + SyncScope::ID SSID, Instruction *InsertBefore) : Instruction(Val->getType(), AtomicRMW, OperandTraits<AtomicRMWInst>::op_begin(this), OperandTraits<AtomicRMWInst>::operands(this), InsertBefore) { - Init(Operation, Ptr, Val, Ordering, SynchScope); + Init(Operation, Ptr, Val, Ordering, SSID); } AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, AtomicOrdering Ordering, - SynchronizationScope SynchScope, + SyncScope::ID SSID, BasicBlock *InsertAtEnd) : Instruction(Val->getType(), AtomicRMW, OperandTraits<AtomicRMWInst>::op_begin(this), OperandTraits<AtomicRMWInst>::operands(this), InsertAtEnd) { - Init(Operation, Ptr, Val, Ordering, SynchScope); + Init(Operation, Ptr, Val, Ordering, SSID); } //===----------------------------------------------------------------------===// @@ -1579,19 +1579,19 @@ AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, //===----------------------------------------------------------------------===// FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, - SynchronizationScope SynchScope, + SyncScope::ID SSID, Instruction *InsertBefore) : Instruction(Type::getVoidTy(C), Fence, nullptr, 0, InsertBefore) { setOrdering(Ordering); - setSynchScope(SynchScope); + setSyncScopeID(SSID); } FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, - SynchronizationScope SynchScope, + SyncScope::ID SSID, BasicBlock *InsertAtEnd) : Instruction(Type::getVoidTy(C), Fence, nullptr, 0, InsertAtEnd) { setOrdering(Ordering); - setSynchScope(SynchScope); + setSyncScopeID(SSID); } //===----------------------------------------------------------------------===// @@ -3064,16 +3064,14 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) { if (VectorType *VT = dyn_cast<VectorType>(SrcTy)) if (VT->getNumElements() != cast<VectorType>(DstTy)->getNumElements()) return false; - return SrcTy->getScalarType()->isPointerTy() && - DstTy->getScalarType()->isIntegerTy(); + return SrcTy->isPtrOrPtrVectorTy() && DstTy->isIntOrIntVectorTy(); case Instruction::IntToPtr: if (isa<VectorType>(SrcTy) != isa<VectorType>(DstTy)) return false; if (VectorType *VT = dyn_cast<VectorType>(SrcTy)) if (VT->getNumElements() != cast<VectorType>(DstTy)->getNumElements()) return false; - return SrcTy->getScalarType()->isIntegerTy() && - DstTy->getScalarType()->isPointerTy(); + return SrcTy->isIntOrIntVectorTy() && DstTy->isPtrOrPtrVectorTy(); case Instruction::BitCast: { PointerType *SrcPtrTy = dyn_cast<PointerType>(SrcTy->getScalarType()); PointerType *DstPtrTy = dyn_cast<PointerType>(DstTy->getScalarType()); @@ -3797,12 +3795,12 @@ AllocaInst *AllocaInst::cloneImpl() const { LoadInst *LoadInst::cloneImpl() const { return new LoadInst(getOperand(0), Twine(), isVolatile(), - getAlignment(), getOrdering(), getSynchScope()); + getAlignment(), getOrdering(), getSyncScopeID()); } StoreInst *StoreInst::cloneImpl() const { return new StoreInst(getOperand(0), getOperand(1), isVolatile(), - getAlignment(), getOrdering(), getSynchScope()); + getAlignment(), getOrdering(), getSyncScopeID()); } @@ -3810,7 +3808,7 @@ AtomicCmpXchgInst *AtomicCmpXchgInst::cloneImpl() const { AtomicCmpXchgInst *Result = new AtomicCmpXchgInst(getOperand(0), getOperand(1), getOperand(2), getSuccessOrdering(), getFailureOrdering(), - getSynchScope()); + getSyncScopeID()); Result->setVolatile(isVolatile()); Result->setWeak(isWeak()); return Result; @@ -3818,14 +3816,14 @@ AtomicCmpXchgInst *AtomicCmpXchgInst::cloneImpl() const { AtomicRMWInst *AtomicRMWInst::cloneImpl() const { AtomicRMWInst *Result = - new AtomicRMWInst(getOperation(),getOperand(0), getOperand(1), - getOrdering(), getSynchScope()); + new AtomicRMWInst(getOperation(), getOperand(0), getOperand(1), + getOrdering(), getSyncScopeID()); Result->setVolatile(isVolatile()); return Result; } FenceInst *FenceInst::cloneImpl() const { - return new FenceInst(getContext(), getOrdering(), getSynchScope()); + return new FenceInst(getContext(), getOrdering(), getSyncScopeID()); } TruncInst *TruncInst::cloneImpl() const { diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp index 2e13f362344d8..c58459d6d5f5e 100644 --- a/lib/IR/LLVMContext.cpp +++ b/lib/IR/LLVMContext.cpp @@ -81,6 +81,18 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { assert(GCTransitionEntry->second == LLVMContext::OB_gc_transition && "gc-transition operand bundle id drifted!"); (void)GCTransitionEntry; + + SyncScope::ID SingleThreadSSID = + pImpl->getOrInsertSyncScopeID("singlethread"); + assert(SingleThreadSSID == SyncScope::SingleThread && + "singlethread synchronization scope ID drifted!"); + (void)SingleThreadSSID; + + SyncScope::ID SystemSSID = + pImpl->getOrInsertSyncScopeID(""); + assert(SystemSSID == SyncScope::System && + "system synchronization scope ID drifted!"); + (void)SystemSSID; } LLVMContext::~LLVMContext() { delete pImpl; } @@ -255,6 +267,14 @@ uint32_t LLVMContext::getOperandBundleTagID(StringRef Tag) const { return pImpl->getOperandBundleTagID(Tag); } +SyncScope::ID LLVMContext::getOrInsertSyncScopeID(StringRef SSN) { + return pImpl->getOrInsertSyncScopeID(SSN); +} + +void LLVMContext::getSyncScopeNames(SmallVectorImpl<StringRef> &SSNs) const { + pImpl->getSyncScopeNames(SSNs); +} + void LLVMContext::setGC(const Function &Fn, std::string GCName) { auto It = pImpl->GCNames.find(&Fn); diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp index c19e1be44fdc7..57dd08b36fe70 100644 --- a/lib/IR/LLVMContextImpl.cpp +++ b/lib/IR/LLVMContextImpl.cpp @@ -205,6 +205,20 @@ uint32_t LLVMContextImpl::getOperandBundleTagID(StringRef Tag) const { return I->second; } +SyncScope::ID LLVMContextImpl::getOrInsertSyncScopeID(StringRef SSN) { + auto NewSSID = SSC.size(); + assert(NewSSID < std::numeric_limits<SyncScope::ID>::max() && + "Hit the maximum number of synchronization scopes allowed!"); + return SSC.insert(std::make_pair(SSN, SyncScope::ID(NewSSID))).first->second; +} + +void LLVMContextImpl::getSyncScopeNames( + SmallVectorImpl<StringRef> &SSNs) const { + SSNs.resize(SSC.size()); + for (const auto &SSE : SSC) + SSNs[SSE.second] = SSE.first(); +} + /// Singleton instance of the OptBisect class. /// /// This singleton is accessed via the LLVMContext::getOptBisect() function. It diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h index 395beb57fe373..e413a4f344329 100644 --- a/lib/IR/LLVMContextImpl.h +++ b/lib/IR/LLVMContextImpl.h @@ -1297,6 +1297,20 @@ public: void getOperandBundleTags(SmallVectorImpl<StringRef> &Tags) const; uint32_t getOperandBundleTagID(StringRef Tag) const; + /// A set of interned synchronization scopes. The StringMap maps + /// synchronization scope names to their respective synchronization scope IDs. + StringMap<SyncScope::ID> SSC; + + /// getOrInsertSyncScopeID - Maps synchronization scope name to + /// synchronization scope ID. Every synchronization scope registered with + /// LLVMContext has unique ID except pre-defined ones. + SyncScope::ID getOrInsertSyncScopeID(StringRef SSN); + + /// getSyncScopeNames - Populates client supplied SmallVector with + /// synchronization scope names registered with LLVMContext. Synchronization + /// scope names are ordered by increasing synchronization scope IDs. + void getSyncScopeNames(SmallVectorImpl<StringRef> &SSNs) const; + /// Maintain the GC name for each function. /// /// This saves allocating an additional word in Function for programs which diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp index f8853ed169c5d..fdc7de6eaa34e 100644 --- a/lib/IR/Module.cpp +++ b/lib/IR/Module.cpp @@ -88,7 +88,7 @@ Module::~Module() { delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab); } -RandomNumberGenerator *Module::createRNG(const Pass* P) const { +std::unique_ptr<RandomNumberGenerator> Module::createRNG(const Pass* P) const { SmallString<32> Salt(P->getPassName()); // This RNG is guaranteed to produce the same random stream only @@ -103,7 +103,7 @@ RandomNumberGenerator *Module::createRNG(const Pass* P) const { // store salt metadata from the Module constructor. Salt += sys::path::filename(getModuleIdentifier()); - return new RandomNumberGenerator(Salt); + return std::unique_ptr<RandomNumberGenerator>{new RandomNumberGenerator(Salt)}; } /// getNamedValue - Return the first global value in the module with diff --git a/lib/IR/SafepointIRVerifier.cpp b/lib/IR/SafepointIRVerifier.cpp new file mode 100644 index 0000000000000..8b328c221da32 --- /dev/null +++ b/lib/IR/SafepointIRVerifier.cpp @@ -0,0 +1,437 @@ +//===-- SafepointIRVerifier.cpp - Verify gc.statepoint invariants ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Run a sanity check on the IR to ensure that Safepoints - if they've been +// inserted - were inserted correctly. In particular, look for use of +// non-relocated values after a safepoint. It's primary use is to check the +// correctness of safepoint insertion immediately after insertion, but it can +// also be used to verify that later transforms have not found a way to break +// safepoint semenatics. +// +// In its current form, this verify checks a property which is sufficient, but +// not neccessary for correctness. There are some cases where an unrelocated +// pointer can be used after the safepoint. Consider this example: +// +// a = ... +// b = ... +// (a',b') = safepoint(a,b) +// c = cmp eq a b +// br c, ..., .... +// +// Because it is valid to reorder 'c' above the safepoint, this is legal. In +// practice, this is a somewhat uncommon transform, but CodeGenPrep does create +// idioms like this. The verifier knows about these cases and avoids reporting +// false positives. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/SafepointIRVerifier.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "safepoint-ir-verifier" + +using namespace llvm; + +/// This option is used for writing test cases. Instead of crashing the program +/// when verification fails, report a message to the console (for FileCheck +/// usage) and continue execution as if nothing happened. +static cl::opt<bool> PrintOnly("safepoint-ir-verifier-print-only", + cl::init(false)); + +static void Verify(const Function &F, const DominatorTree &DT); + +struct SafepointIRVerifier : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + DominatorTree DT; + SafepointIRVerifier() : FunctionPass(ID) { + initializeSafepointIRVerifierPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + DT.recalculate(F); + Verify(F, DT); + return false; // no modifications + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + + StringRef getPassName() const override { return "safepoint verifier"; } +}; + +void llvm::verifySafepointIR(Function &F) { + SafepointIRVerifier pass; + pass.runOnFunction(F); +} + +char SafepointIRVerifier::ID = 0; + +FunctionPass *llvm::createSafepointIRVerifierPass() { + return new SafepointIRVerifier(); +} + +INITIALIZE_PASS_BEGIN(SafepointIRVerifier, "verify-safepoint-ir", + "Safepoint IR Verifier", false, true) +INITIALIZE_PASS_END(SafepointIRVerifier, "verify-safepoint-ir", + "Safepoint IR Verifier", false, true) + +static bool isGCPointerType(Type *T) { + if (auto *PT = dyn_cast<PointerType>(T)) + // For the sake of this example GC, we arbitrarily pick addrspace(1) as our + // GC managed heap. We know that a pointer into this heap needs to be + // updated and that no other pointer does. + return (1 == PT->getAddressSpace()); + return false; +} + +static bool containsGCPtrType(Type *Ty) { + if (isGCPointerType(Ty)) + return true; + if (VectorType *VT = dyn_cast<VectorType>(Ty)) + return isGCPointerType(VT->getScalarType()); + if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) + return containsGCPtrType(AT->getElementType()); + if (StructType *ST = dyn_cast<StructType>(Ty)) + return std::any_of(ST->subtypes().begin(), ST->subtypes().end(), + containsGCPtrType); + return false; +} + +// Debugging aid -- prints a [Begin, End) range of values. +template<typename IteratorTy> +static void PrintValueSet(raw_ostream &OS, IteratorTy Begin, IteratorTy End) { + OS << "[ "; + while (Begin != End) { + OS << **Begin << " "; + ++Begin; + } + OS << "]"; +} + +/// The verifier algorithm is phrased in terms of availability. The set of +/// values "available" at a given point in the control flow graph is the set of +/// correctly relocated value at that point, and is a subset of the set of +/// definitions dominating that point. + +/// State we compute and track per basic block. +struct BasicBlockState { + // Set of values available coming in, before the phi nodes + DenseSet<const Value *> AvailableIn; + + // Set of values available going out + DenseSet<const Value *> AvailableOut; + + // AvailableOut minus AvailableIn. + // All elements are Instructions + DenseSet<const Value *> Contribution; + + // True if this block contains a safepoint and thus AvailableIn does not + // contribute to AvailableOut. + bool Cleared = false; +}; + + +/// Gather all the definitions dominating the start of BB into Result. This is +/// simply the Defs introduced by every dominating basic block and the function +/// arguments. +static void GatherDominatingDefs(const BasicBlock *BB, + DenseSet<const Value *> &Result, + const DominatorTree &DT, + DenseMap<const BasicBlock *, BasicBlockState *> &BlockMap) { + DomTreeNode *DTN = DT[const_cast<BasicBlock *>(BB)]; + + while (DTN->getIDom()) { + DTN = DTN->getIDom(); + const auto &Defs = BlockMap[DTN->getBlock()]->Contribution; + Result.insert(Defs.begin(), Defs.end()); + // If this block is 'Cleared', then nothing LiveIn to this block can be + // available after this block completes. Note: This turns out to be + // really important for reducing memory consuption of the initial available + // sets and thus peak memory usage by this verifier. + if (BlockMap[DTN->getBlock()]->Cleared) + return; + } + + for (const Argument &A : BB->getParent()->args()) + if (containsGCPtrType(A.getType())) + Result.insert(&A); +} + +/// Model the effect of an instruction on the set of available values. +static void TransferInstruction(const Instruction &I, bool &Cleared, + DenseSet<const Value *> &Available) { + if (isStatepoint(I)) { + Cleared = true; + Available.clear(); + } else if (containsGCPtrType(I.getType())) + Available.insert(&I); +} + +/// Compute the AvailableOut set for BB, based on the +/// BasicBlockState BBS, which is the BasicBlockState for BB. FirstPass is set +/// when the verifier runs for the first time computing the AvailableOut set +/// for BB. +static void TransferBlock(const BasicBlock *BB, + BasicBlockState &BBS, bool FirstPass) { + + const DenseSet<const Value *> &AvailableIn = BBS.AvailableIn; + DenseSet<const Value *> &AvailableOut = BBS.AvailableOut; + + if (BBS.Cleared) { + // AvailableOut does not change no matter how the input changes, just + // leave it be. We need to force this calculation the first time so that + // we have a AvailableOut at all. + if (FirstPass) { + AvailableOut = BBS.Contribution; + } + } else { + // Otherwise, we need to reduce the AvailableOut set by things which are no + // longer in our AvailableIn + DenseSet<const Value *> Temp = BBS.Contribution; + set_union(Temp, AvailableIn); + AvailableOut = std::move(Temp); + } + + DEBUG(dbgs() << "Transfered block " << BB->getName() << " from "; + PrintValueSet(dbgs(), AvailableIn.begin(), AvailableIn.end()); + dbgs() << " to "; + PrintValueSet(dbgs(), AvailableOut.begin(), AvailableOut.end()); + dbgs() << "\n";); +} + +/// A given derived pointer can have multiple base pointers through phi/selects. +/// This type indicates when the base pointer is exclusively constant +/// (ExclusivelySomeConstant), and if that constant is proven to be exclusively +/// null, we record that as ExclusivelyNull. In all other cases, the BaseType is +/// NonConstant. +enum BaseType { + NonConstant = 1, // Base pointers is not exclusively constant. + ExclusivelyNull, + ExclusivelySomeConstant // Base pointers for a given derived pointer is from a + // set of constants, but they are not exclusively + // null. +}; + +/// Return the baseType for Val which states whether Val is exclusively +/// derived from constant/null, or not exclusively derived from constant. +/// Val is exclusively derived off a constant base when all operands of phi and +/// selects are derived off a constant base. +static enum BaseType getBaseType(const Value *Val) { + + SmallVector<const Value *, 32> Worklist; + DenseSet<const Value *> Visited; + bool isExclusivelyDerivedFromNull = true; + Worklist.push_back(Val); + // Strip through all the bitcasts and geps to get base pointer. Also check for + // the exclusive value when there can be multiple base pointers (through phis + // or selects). + while(!Worklist.empty()) { + const Value *V = Worklist.pop_back_val(); + if (!Visited.insert(V).second) + continue; + + if (const auto *CI = dyn_cast<CastInst>(V)) { + Worklist.push_back(CI->stripPointerCasts()); + continue; + } + if (const auto *GEP = dyn_cast<GetElementPtrInst>(V)) { + Worklist.push_back(GEP->getPointerOperand()); + continue; + } + // Push all the incoming values of phi node into the worklist for + // processing. + if (const auto *PN = dyn_cast<PHINode>(V)) { + for (Value *InV: PN->incoming_values()) + Worklist.push_back(InV); + continue; + } + if (const auto *SI = dyn_cast<SelectInst>(V)) { + // Push in the true and false values + Worklist.push_back(SI->getTrueValue()); + Worklist.push_back(SI->getFalseValue()); + continue; + } + if (isa<Constant>(V)) { + // We found at least one base pointer which is non-null, so this derived + // pointer is not exclusively derived from null. + if (V != Constant::getNullValue(V->getType())) + isExclusivelyDerivedFromNull = false; + // Continue processing the remaining values to make sure it's exclusively + // constant. + continue; + } + // At this point, we know that the base pointer is not exclusively + // constant. + return BaseType::NonConstant; + } + // Now, we know that the base pointer is exclusively constant, but we need to + // differentiate between exclusive null constant and non-null constant. + return isExclusivelyDerivedFromNull ? BaseType::ExclusivelyNull + : BaseType::ExclusivelySomeConstant; +} + +static void Verify(const Function &F, const DominatorTree &DT) { + SpecificBumpPtrAllocator<BasicBlockState> BSAllocator; + DenseMap<const BasicBlock *, BasicBlockState *> BlockMap; + + DEBUG(dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n"); + if (PrintOnly) + dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n"; + + + for (const BasicBlock &BB : F) { + BasicBlockState *BBS = new(BSAllocator.Allocate()) BasicBlockState; + for (const auto &I : BB) + TransferInstruction(I, BBS->Cleared, BBS->Contribution); + BlockMap[&BB] = BBS; + } + + for (auto &BBI : BlockMap) { + GatherDominatingDefs(BBI.first, BBI.second->AvailableIn, DT, BlockMap); + TransferBlock(BBI.first, *BBI.second, true); + } + + SetVector<const BasicBlock *> Worklist; + for (auto &BBI : BlockMap) + Worklist.insert(BBI.first); + + // This loop iterates the AvailableIn and AvailableOut sets to a fixed point. + // The AvailableIn and AvailableOut sets decrease as we iterate. + while (!Worklist.empty()) { + const BasicBlock *BB = Worklist.pop_back_val(); + BasicBlockState *BBS = BlockMap[BB]; + + size_t OldInCount = BBS->AvailableIn.size(); + for (const BasicBlock *PBB : predecessors(BB)) + set_intersect(BBS->AvailableIn, BlockMap[PBB]->AvailableOut); + + if (OldInCount == BBS->AvailableIn.size()) + continue; + + assert(OldInCount > BBS->AvailableIn.size() && "invariant!"); + + size_t OldOutCount = BBS->AvailableOut.size(); + TransferBlock(BB, *BBS, false); + if (OldOutCount != BBS->AvailableOut.size()) { + assert(OldOutCount > BBS->AvailableOut.size() && "invariant!"); + Worklist.insert(succ_begin(BB), succ_end(BB)); + } + } + + // We now have all the information we need to decide if the use of a heap + // reference is legal or not, given our safepoint semantics. + + bool AnyInvalidUses = false; + + auto ReportInvalidUse = [&AnyInvalidUses](const Value &V, + const Instruction &I) { + errs() << "Illegal use of unrelocated value found!\n"; + errs() << "Def: " << V << "\n"; + errs() << "Use: " << I << "\n"; + if (!PrintOnly) + abort(); + AnyInvalidUses = true; + }; + + auto isNotExclusivelyConstantDerived = [](const Value *V) { + return getBaseType(V) == BaseType::NonConstant; + }; + + for (const BasicBlock &BB : F) { + // We destructively modify AvailableIn as we traverse the block instruction + // by instruction. + DenseSet<const Value *> &AvailableSet = BlockMap[&BB]->AvailableIn; + for (const Instruction &I : BB) { + if (const PHINode *PN = dyn_cast<PHINode>(&I)) { + if (containsGCPtrType(PN->getType())) + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + const BasicBlock *InBB = PN->getIncomingBlock(i); + const Value *InValue = PN->getIncomingValue(i); + + if (isNotExclusivelyConstantDerived(InValue) && + !BlockMap[InBB]->AvailableOut.count(InValue)) + ReportInvalidUse(*InValue, *PN); + } + } else if (isa<CmpInst>(I) && + containsGCPtrType(I.getOperand(0)->getType())) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + enum BaseType baseTyLHS = getBaseType(LHS), + baseTyRHS = getBaseType(RHS); + + // Returns true if LHS and RHS are unrelocated pointers and they are + // valid unrelocated uses. + auto hasValidUnrelocatedUse = [&AvailableSet, baseTyLHS, baseTyRHS, &LHS, &RHS] () { + // A cmp instruction has valid unrelocated pointer operands only if + // both operands are unrelocated pointers. + // In the comparison between two pointers, if one is an unrelocated + // use, the other *should be* an unrelocated use, for this + // instruction to contain valid unrelocated uses. This unrelocated + // use can be a null constant as well, or another unrelocated + // pointer. + if (AvailableSet.count(LHS) || AvailableSet.count(RHS)) + return false; + // Constant pointers (that are not exclusively null) may have + // meaning in different VMs, so we cannot reorder the compare + // against constant pointers before the safepoint. In other words, + // comparison of an unrelocated use against a non-null constant + // maybe invalid. + if ((baseTyLHS == BaseType::ExclusivelySomeConstant && + baseTyRHS == BaseType::NonConstant) || + (baseTyLHS == BaseType::NonConstant && + baseTyRHS == BaseType::ExclusivelySomeConstant)) + return false; + // All other cases are valid cases enumerated below: + // 1. Comparison between an exlusively derived null pointer and a + // constant base pointer. + // 2. Comparison between an exlusively derived null pointer and a + // non-constant unrelocated base pointer. + // 3. Comparison between 2 unrelocated pointers. + return true; + }; + if (!hasValidUnrelocatedUse()) { + // Print out all non-constant derived pointers that are unrelocated + // uses, which are invalid. + if (baseTyLHS == BaseType::NonConstant && !AvailableSet.count(LHS)) + ReportInvalidUse(*LHS, I); + if (baseTyRHS == BaseType::NonConstant && !AvailableSet.count(RHS)) + ReportInvalidUse(*RHS, I); + } + } else { + for (const Value *V : I.operands()) + if (containsGCPtrType(V->getType()) && + isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V)) + ReportInvalidUse(*V, I); + } + + bool Cleared = false; + TransferInstruction(I, Cleared, AvailableSet); + (void)Cleared; + } + } + + if (PrintOnly && !AnyInvalidUses) { + dbgs() << "No illegal uses found by SafepointIRVerifier in: " << F.getName() + << "\n"; + } +} diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp index 44fe5e48c720c..20e9c2b5fff25 100644 --- a/lib/IR/Type.cpp +++ b/lib/IR/Type.cpp @@ -538,7 +538,7 @@ bool CompositeType::indexValid(const Value *V) const { if (auto *STy = dyn_cast<StructType>(this)) { // Structure indexes require (vectors of) 32-bit integer constants. In the // vector case all of the indices must be equal. - if (!V->getType()->getScalarType()->isIntegerTy(32)) + if (!V->getType()->isIntOrIntVectorTy(32)) return false; const Constant *C = dyn_cast<Constant>(V); if (C && V->getType()->isVectorTy()) diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 819f63520c744..454a56a769230 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -2504,15 +2504,13 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) { Type *SrcTy = I.getOperand(0)->getType(); Type *DestTy = I.getType(); - Assert(SrcTy->getScalarType()->isPointerTy(), - "PtrToInt source must be pointer", &I); + Assert(SrcTy->isPtrOrPtrVectorTy(), "PtrToInt source must be pointer", &I); if (auto *PTy = dyn_cast<PointerType>(SrcTy->getScalarType())) Assert(!DL.isNonIntegralPointerType(PTy), "ptrtoint not supported for non-integral pointers"); - Assert(DestTy->getScalarType()->isIntegerTy(), - "PtrToInt result must be integral", &I); + Assert(DestTy->isIntOrIntVectorTy(), "PtrToInt result must be integral", &I); Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), "PtrToInt type mismatch", &I); @@ -2531,10 +2529,9 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) { Type *SrcTy = I.getOperand(0)->getType(); Type *DestTy = I.getType(); - Assert(SrcTy->getScalarType()->isIntegerTy(), + Assert(SrcTy->isIntOrIntVectorTy(), "IntToPtr source must be an integral", &I); - Assert(DestTy->getScalarType()->isPointerTy(), - "IntToPtr result must be a pointer", &I); + Assert(DestTy->isPtrOrPtrVectorTy(), "IntToPtr result must be a pointer", &I); if (auto *PTy = dyn_cast<PointerType>(DestTy->getScalarType())) Assert(!DL.isNonIntegralPointerType(PTy), @@ -2952,11 +2949,10 @@ void Verifier::visitICmpInst(ICmpInst &IC) { Assert(Op0Ty == Op1Ty, "Both operands to ICmp instruction are not of the same type!", &IC); // Check that the operands are the right type - Assert(Op0Ty->isIntOrIntVectorTy() || Op0Ty->getScalarType()->isPointerTy(), + Assert(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPtrOrPtrVectorTy(), "Invalid operand types for ICmp instruction", &IC); // Check that the predicate is valid. - Assert(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE && - IC.getPredicate() <= CmpInst::LAST_ICMP_PREDICATE, + Assert(IC.isIntPredicate(), "Invalid predicate in ICmp instruction!", &IC); visitInstruction(IC); @@ -2972,8 +2968,7 @@ void Verifier::visitFCmpInst(FCmpInst &FC) { Assert(Op0Ty->isFPOrFPVectorTy(), "Invalid operand types for FCmp instruction", &FC); // Check that the predicate is valid. - Assert(FC.getPredicate() >= CmpInst::FIRST_FCMP_PREDICATE && - FC.getPredicate() <= CmpInst::LAST_FCMP_PREDICATE, + Assert(FC.isFPPredicate(), "Invalid predicate in FCmp instruction!", &FC); visitInstruction(FC); @@ -3011,7 +3006,7 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs); Assert(ElTy, "Invalid indices for GEP pointer type!", &GEP); - Assert(GEP.getType()->getScalarType()->isPointerTy() && + Assert(GEP.getType()->isPtrOrPtrVectorTy() && GEP.getResultElementType() == ElTy, "GEP is not of right type for indices!", &GEP, ElTy); @@ -3027,7 +3022,7 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { unsigned IndexWidth = IndexTy->getVectorNumElements(); Assert(IndexWidth == GEPWidth, "Invalid GEP index vector width", &GEP); } - Assert(IndexTy->getScalarType()->isIntegerTy(), + Assert(IndexTy->isIntOrIntVectorTy(), "All GEP indices should be of integer type"); } } @@ -3113,7 +3108,7 @@ void Verifier::visitLoadInst(LoadInst &LI) { ElTy, &LI); checkAtomicMemAccessSize(ElTy, &LI); } else { - Assert(LI.getSynchScope() == CrossThread, + Assert(LI.getSyncScopeID() == SyncScope::System, "Non-atomic load cannot have SynchronizationScope specified", &LI); } @@ -3142,7 +3137,7 @@ void Verifier::visitStoreInst(StoreInst &SI) { ElTy, &SI); checkAtomicMemAccessSize(ElTy, &SI); } else { - Assert(SI.getSynchScope() == CrossThread, + Assert(SI.getSyncScopeID() == SyncScope::System, "Non-atomic store cannot have SynchronizationScope specified", &SI); } visitInstruction(SI); @@ -4049,6 +4044,73 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { "incorrect alignment of the source argument", CS); break; } + case Intrinsic::memmove_element_unordered_atomic: { + auto *MI = cast<ElementUnorderedAtomicMemMoveInst>(CS.getInstruction()); + + ConstantInt *ElementSizeCI = + dyn_cast<ConstantInt>(MI->getRawElementSizeInBytes()); + Assert(ElementSizeCI, + "element size of the element-wise unordered atomic memory " + "intrinsic must be a constant int", + CS); + const APInt &ElementSizeVal = ElementSizeCI->getValue(); + Assert(ElementSizeVal.isPowerOf2(), + "element size of the element-wise atomic memory intrinsic " + "must be a power of 2", + CS); + + if (auto *LengthCI = dyn_cast<ConstantInt>(MI->getLength())) { + uint64_t Length = LengthCI->getZExtValue(); + uint64_t ElementSize = MI->getElementSizeInBytes(); + Assert((Length % ElementSize) == 0, + "constant length must be a multiple of the element size in the " + "element-wise atomic memory intrinsic", + CS); + } + + auto IsValidAlignment = [&](uint64_t Alignment) { + return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment); + }; + uint64_t DstAlignment = CS.getParamAlignment(0), + SrcAlignment = CS.getParamAlignment(1); + Assert(IsValidAlignment(DstAlignment), + "incorrect alignment of the destination argument", CS); + Assert(IsValidAlignment(SrcAlignment), + "incorrect alignment of the source argument", CS); + break; + } + case Intrinsic::memset_element_unordered_atomic: { + auto *MI = cast<ElementUnorderedAtomicMemSetInst>(CS.getInstruction()); + + ConstantInt *ElementSizeCI = + dyn_cast<ConstantInt>(MI->getRawElementSizeInBytes()); + Assert(ElementSizeCI, + "element size of the element-wise unordered atomic memory " + "intrinsic must be a constant int", + CS); + const APInt &ElementSizeVal = ElementSizeCI->getValue(); + Assert(ElementSizeVal.isPowerOf2(), + "element size of the element-wise atomic memory intrinsic " + "must be a power of 2", + CS); + + if (auto *LengthCI = dyn_cast<ConstantInt>(MI->getLength())) { + uint64_t Length = LengthCI->getZExtValue(); + uint64_t ElementSize = MI->getElementSizeInBytes(); + Assert((Length % ElementSize) == 0, + "constant length must be a multiple of the element size in the " + "element-wise atomic memory intrinsic", + CS); + } + + auto IsValidAlignment = [&](uint64_t Alignment) { + return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment); + }; + uint64_t DstAlignment = CS.getParamAlignment(0); + Assert(IsValidAlignment(DstAlignment), + "incorrect alignment of the destination argument", CS); + break; + } case Intrinsic::gcroot: case Intrinsic::gcwrite: case Intrinsic::gcread: @@ -4253,7 +4315,7 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { // relocated pointer. It can be casted to the correct type later if it's // desired. However, they must have the same address space and 'vectorness' GCRelocateInst &Relocate = cast<GCRelocateInst>(*CS.getInstruction()); - Assert(Relocate.getDerivedPtr()->getType()->getScalarType()->isPointerTy(), + Assert(Relocate.getDerivedPtr()->getType()->isPtrOrPtrVectorTy(), "gc.relocate: relocated value must be a gc pointer", CS); auto ResultType = CS.getType(); diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp index 68b8c9fcb939f..19973946ac5a6 100644 --- a/lib/LTO/LTO.cpp +++ b/lib/LTO/LTO.cpp @@ -665,6 +665,15 @@ Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms, auto GUID = GlobalValue::getGUID(GlobalValue::getGlobalIdentifier( Sym.getIRName(), GlobalValue::ExternalLinkage, "")); ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier(); + + // For linker redefined symbols (via --wrap or --defsym) we want to + // switch the linkage to `weak` to prevent IPOs from happening. + // Find the summary in the module for this very GV and record the new + // linkage so that we can switch it when we import the GV. + if (Res.LinkerRedefined) + if (auto S = ThinLTO.CombinedIndex.findSummaryInModule( + GUID, BM.getModuleIdentifier())) + S->setLinkage(GlobalValue::WeakAnyLinkage); } } } @@ -1021,7 +1030,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, // Collect for each module the list of function it defines (GUID -> // Summary). - StringMap<std::map<GlobalValue::GUID, GlobalValueSummary *>> + StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ThinLTO.ModuleMap.size()); ThinLTO.CombinedIndex.collectDefinedGVSummariesPerModule( ModuleToDefinedGVSummaries); diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp index defad1904989d..f486e525b5e76 100644 --- a/lib/Linker/IRMover.cpp +++ b/lib/Linker/IRMover.cpp @@ -1256,6 +1256,18 @@ Error IRLinker::linkModuleFlagsMetadata() { return Error::success(); } +/// Return InlineAsm adjusted with target-specific directives if required. +/// For ARM and Thumb, we have to add directives to select the appropriate ISA +/// to support mixing module-level inline assembly from ARM and Thumb modules. +static std::string adjustInlineAsm(const std::string &InlineAsm, + const Triple &Triple) { + if (Triple.getArch() == Triple::thumb || Triple.getArch() == Triple::thumbeb) + return ".text\n.balign 2\n.thumb\n" + InlineAsm; + if (Triple.getArch() == Triple::arm || Triple.getArch() == Triple::armeb) + return ".text\n.balign 4\n.arm\n" + InlineAsm; + return InlineAsm; +} + Error IRLinker::run() { // Ensure metadata materialized before value mapping. if (SrcM->getMaterializer()) @@ -1293,11 +1305,13 @@ Error IRLinker::run() { // Append the module inline asm string. if (!IsPerformingImport && !SrcM->getModuleInlineAsm().empty()) { + std::string SrcModuleInlineAsm = adjustInlineAsm(SrcM->getModuleInlineAsm(), + SrcTriple); if (DstM.getModuleInlineAsm().empty()) - DstM.setModuleInlineAsm(SrcM->getModuleInlineAsm()); + DstM.setModuleInlineAsm(SrcModuleInlineAsm); else DstM.setModuleInlineAsm(DstM.getModuleInlineAsm() + "\n" + - SrcM->getModuleInlineAsm()); + SrcModuleInlineAsm); } // Loop over all of the linked values to compute type mappings. diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 30f357826805a..c8dd630119439 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -26,6 +27,7 @@ #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCFragment.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSection.h" @@ -204,8 +206,7 @@ public: void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, bool &IsPCRel, - uint64_t &FixedValue) override; + MCValue Target, uint64_t &FixedValue) override; // Map from a signature symbol to the group section index using RevGroupMapTy = DenseMap<const MCSymbol *, unsigned>; @@ -626,7 +627,10 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, - bool &IsPCRel, uint64_t &FixedValue) { + uint64_t &FixedValue) { + MCAsmBackend &Backend = Asm.getBackend(); + bool IsPCRel = Backend.getFixupKindInfo(Fixup.getKind()).Flags & + MCFixupKindInfo::FKF_IsPCRel; const MCSectionELF &FixupSection = cast<MCSectionELF>(*Fragment->getParent()); uint64_t C = Target.getConstant(); uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 0318d916aa49f..eaf6f19326eb4 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -653,16 +653,14 @@ MCAssembler::handleFixup(const MCAsmLayout &Layout, MCFragment &F, // Evaluate the fixup. MCValue Target; uint64_t FixedValue; - bool IsPCRel = Backend.getFixupKindInfo(Fixup.getKind()).Flags & - MCFixupKindInfo::FKF_IsPCRel; - if (!evaluateFixup(Layout, Fixup, &F, Target, FixedValue)) { + bool IsResolved = evaluateFixup(Layout, Fixup, &F, Target, FixedValue); + if (!IsResolved) { // The fixup was unresolved, we need a relocation. Inform the object // writer of the relocation, and give it an opportunity to adjust the // fixup value if need be. - getWriter().recordRelocation(*this, Layout, &F, Fixup, Target, IsPCRel, - FixedValue); + getWriter().recordRelocation(*this, Layout, &F, Fixup, Target, FixedValue); } - return std::make_tuple(Target, FixedValue, IsPCRel); + return std::make_tuple(Target, FixedValue, IsResolved); } void MCAssembler::layout(MCAsmLayout &Layout) { @@ -738,12 +736,12 @@ void MCAssembler::layout(MCAsmLayout &Layout) { llvm_unreachable("Unknown fragment with fixups!"); for (const MCFixup &Fixup : Fixups) { uint64_t FixedValue; - bool IsPCRel; + bool IsResolved; MCValue Target; - std::tie(Target, FixedValue, IsPCRel) = + std::tie(Target, FixedValue, IsResolved) = handleFixup(Layout, Frag, Fixup); getBackend().applyFixup(*this, Fixup, Target, Contents, FixedValue, - IsPCRel); + IsResolved); } } } diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index c4e7cdbe095e7..62bf0a58fdfa9 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -449,7 +449,7 @@ void MachObjectWriter::recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, - bool &IsPCRel, uint64_t &FixedValue) { + uint64_t &FixedValue) { TargetObjectWriter->recordRelocation(this, Asm, Layout, Fragment, Fixup, Target, FixedValue); } diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp index 82352cb50c70c..0d31f65c49d9f 100644 --- a/lib/MC/WasmObjectWriter.cpp +++ b/lib/MC/WasmObjectWriter.cpp @@ -36,8 +36,7 @@ using namespace llvm; -#undef DEBUG_TYPE -#define DEBUG_TYPE "reloc-info" +#define DEBUG_TYPE "mc" namespace { @@ -153,7 +152,7 @@ struct WasmRelocationEntry { } void print(raw_ostream &Out) const { - Out << "Off=" << Offset << ", Sym=" << Symbol << ", Addend=" << Addend + Out << "Off=" << Offset << ", Sym=" << *Symbol << ", Addend=" << Addend << ", Type=" << Type << ", FixupSection=" << FixupSection; } @@ -199,6 +198,7 @@ class WasmObjectWriter : public MCObjectWriter { DenseMap<WasmFunctionType, int32_t, WasmFunctionTypeDenseMapInfo> FunctionTypeIndices; + SmallVector<WasmFunctionType, 4> FunctionTypes; // TargetObjectWriter wrappers. bool is64Bit() const { return TargetObjectWriter->is64Bit(); } @@ -224,6 +224,7 @@ private: SymbolIndices.clear(); IndirectSymbolIndices.clear(); FunctionTypeIndices.clear(); + FunctionTypes.clear(); MCObjectWriter::reset(); } @@ -231,8 +232,7 @@ private: void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, bool &IsPCRel, - uint64_t &FixedValue) override; + MCValue Target, uint64_t &FixedValue) override; void executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) override; @@ -276,6 +276,8 @@ private: void writeRelocations(ArrayRef<WasmRelocationEntry> Relocations, uint64_t HeaderSize); uint32_t getRelocationIndexValue(const WasmRelocationEntry &RelEntry); + uint32_t getFunctionType(const MCSymbolWasm& Symbol); + uint32_t registerFunctionType(const MCSymbolWasm& Symbol); }; } // end anonymous namespace @@ -350,7 +352,10 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, - bool &IsPCRel, uint64_t &FixedValue) { + uint64_t &FixedValue) { + MCAsmBackend &Backend = Asm.getBackend(); + bool IsPCRel = Backend.getFixupKindInfo(Fixup.getKind()).Flags & + MCFixupKindInfo::FKF_IsPCRel; const auto &FixupSection = cast<MCSectionWasm>(*Fragment->getParent()); uint64_t C = Target.getConstant(); uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); @@ -401,15 +406,11 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, const MCSymbolRefExpr *RefA = Target.getSymA(); const auto *SymA = RefA ? cast<MCSymbolWasm>(&RefA->getSymbol()) : nullptr; - bool ViaWeakRef = false; if (SymA && SymA->isVariable()) { const MCExpr *Expr = SymA->getVariableValue(); - if (const auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr)) { - if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) { - SymA = cast<MCSymbolWasm>(&Inner->getSymbol()); - ViaWeakRef = true; - } - } + const auto *Inner = cast<MCSymbolRefExpr>(Expr); + if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) + llvm_unreachable("weakref used in reloc not yet implemented"); } // Put any constant offset in an addend. Offsets can be negative, and @@ -417,12 +418,8 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm, // be negative and don't wrap. FixedValue = 0; - if (SymA) { - if (ViaWeakRef) - llvm_unreachable("weakref used in reloc not yet implemented"); - else - SymA->setUsedInReloc(); - } + if (SymA) + SymA->setUsedInReloc(); assert(!IsPCRel); assert(SymA); @@ -493,7 +490,7 @@ uint32_t WasmObjectWriter::getRelocationIndexValue( case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: if (!IndirectSymbolIndices.count(RelEntry.Symbol)) - report_fatal_error("symbol not found table index space:" + + report_fatal_error("symbol not found table index space: " + RelEntry.Symbol->getName()); return IndirectSymbolIndices[RelEntry.Symbol]; case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: @@ -502,12 +499,12 @@ uint32_t WasmObjectWriter::getRelocationIndexValue( case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_SLEB: case wasm::R_WEBASSEMBLY_GLOBAL_ADDR_I32: if (!SymbolIndices.count(RelEntry.Symbol)) - report_fatal_error("symbol not found function/global index space:" + + report_fatal_error("symbol not found function/global index space: " + RelEntry.Symbol->getName()); return SymbolIndices[RelEntry.Symbol]; case wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB: if (!TypeIndices.count(RelEntry.Symbol)) - report_fatal_error("symbol not found in type index space:" + + report_fatal_error("symbol not found in type index space: " + RelEntry.Symbol->getName()); return TypeIndices[RelEntry.Symbol]; default: @@ -913,6 +910,38 @@ void WasmObjectWriter::writeLinkingMetaDataSection( endSection(Section); } +uint32_t WasmObjectWriter::getFunctionType(const MCSymbolWasm& Symbol) { + assert(Symbol.isFunction()); + assert(TypeIndices.count(&Symbol)); + return TypeIndices[&Symbol]; +} + +uint32_t WasmObjectWriter::registerFunctionType(const MCSymbolWasm& Symbol) { + assert(Symbol.isFunction()); + + WasmFunctionType F; + if (Symbol.isVariable()) { + const MCExpr *Expr = Symbol.getVariableValue(); + auto *Inner = cast<MCSymbolRefExpr>(Expr); + const auto *ResolvedSym = cast<MCSymbolWasm>(&Inner->getSymbol()); + F.Returns = ResolvedSym->getReturns(); + F.Params = ResolvedSym->getParams(); + } else { + F.Returns = Symbol.getReturns(); + F.Params = Symbol.getParams(); + } + + auto Pair = + FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size())); + if (Pair.second) + FunctionTypes.push_back(F); + TypeIndices[&Symbol] = Pair.first->second; + + DEBUG(dbgs() << "registerFunctionType: " << Symbol << " new:" << Pair.second << "\n"); + DEBUG(dbgs() << " -> type index: " << Pair.first->second << "\n"); + return Pair.first->second; +} + void WasmObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { DEBUG(dbgs() << "WasmObjectWriter::writeObject\n"); @@ -920,7 +949,6 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, wasm::ValType PtrType = is64Bit() ? wasm::ValType::I64 : wasm::ValType::I32; // Collect information from the available symbols. - SmallVector<WasmFunctionType, 4> FunctionTypes; SmallVector<WasmFunction, 4> Functions; SmallVector<uint32_t, 4> TableElems; SmallVector<WasmGlobal, 4> Globals; @@ -960,37 +988,27 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, // Populate the Imports set. for (const MCSymbol &S : Asm.symbols()) { const auto &WS = static_cast<const MCSymbolWasm &>(S); - int32_t Type; - if (WS.isFunction()) { - // Prepare the function's type, if we haven't seen it yet. - WasmFunctionType F; - F.Returns = WS.getReturns(); - F.Params = WS.getParams(); - auto Pair = - FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size())); - if (Pair.second) - FunctionTypes.push_back(F); - - Type = Pair.first->second; - } else { - Type = int32_t(PtrType); - } + if (WS.isTemporary()) + continue; + + if (WS.isFunction()) + registerFunctionType(WS); // If the symbol is not defined in this translation unit, import it. - if (!WS.isTemporary() && !WS.isDefined(/*SetUsed=*/false)) { + if (!WS.isDefined(/*SetUsed=*/false) || WS.isVariable()) { WasmImport Import; Import.ModuleName = WS.getModuleName(); Import.FieldName = WS.getName(); if (WS.isFunction()) { Import.Kind = wasm::WASM_EXTERNAL_FUNCTION; - Import.Type = Type; + Import.Type = getFunctionType(WS); SymbolIndices[&WS] = NumFuncImports; ++NumFuncImports; } else { Import.Kind = wasm::WASM_EXTERNAL_GLOBAL; - Import.Type = Type; + Import.Type = int32_t(PtrType); SymbolIndices[&WS] = NumGlobalImports; ++NumGlobalImports; } @@ -1082,10 +1100,6 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, if (S.isTemporary() && S.getName().empty()) continue; - // Variable references (weak references) are handled in a second pass - if (S.isVariable()) - continue; - const auto &WS = static_cast<const MCSymbolWasm &>(S); DEBUG(dbgs() << "MCSymbol: '" << S << "'" << " isDefined=" << S.isDefined() << " isExternal=" @@ -1097,20 +1111,12 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, if (WS.isWeak()) WeakSymbols.push_back(WS.getName()); + if (WS.isVariable()) + continue; + unsigned Index; if (WS.isFunction()) { - // Prepare the function's type, if we haven't seen it yet. - WasmFunctionType F; - F.Returns = WS.getReturns(); - F.Params = WS.getParams(); - auto Pair = - FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size())); - if (Pair.second) - FunctionTypes.push_back(F); - - int32_t Type = Pair.first->second; - if (WS.isDefined(/*SetUsed=*/false)) { if (WS.getOffset() != 0) report_fatal_error( @@ -1125,21 +1131,21 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, // Prepare the function. WasmFunction Func; - Func.Type = Type; + Func.Type = getFunctionType(WS); Func.Sym = &WS; SymbolIndices[&WS] = Index; Functions.push_back(Func); } else { - // Should be no such thing as weak undefined symbol - assert(!WS.isVariable()); - // An import; the index was assigned above. Index = SymbolIndices.find(&WS)->second; } + DEBUG(dbgs() << " -> function index: " << Index << "\n"); + // If needed, prepare the function to be called indirectly. - if (IsAddressTaken.count(&WS)) { + if (IsAddressTaken.count(&WS) != 0) { IndirectSymbolIndices[&WS] = TableElems.size(); + DEBUG(dbgs() << " -> adding to table: " << TableElems.size() << "\n"); TableElems.push_back(Index); } } else { @@ -1185,7 +1191,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, Align->getMaxBytesToEmit()); DataBytes.resize(Size, Value); } else if (auto *Fill = dyn_cast<MCFillFragment>(&Frag)) { - DataBytes.insert(DataBytes.end(), Size, Fill->getValue()); + DataBytes.insert(DataBytes.end(), Fill->getSize(), Fill->getValue()); } else { const auto &DataFrag = cast<MCDataFragment>(Frag); const SmallVectorImpl<char> &Contents = DataFrag.getContents(); @@ -1205,11 +1211,12 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, Global.InitialValue = DataSection.getSectionOffset(); Global.ImportIndex = 0; SymbolIndices[&WS] = Index; + DEBUG(dbgs() << " -> global index: " << Index << "\n"); Globals.push_back(Global); } // If the symbol is visible outside this translation unit, export it. - if (WS.isExternal() && WS.isDefined(/*SetUsed=*/false)) { + if ((WS.isExternal() && WS.isDefined(/*SetUsed=*/false))) { WasmExport Export; Export.FieldName = WS.getName(); Export.Index = Index; @@ -1217,26 +1224,28 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, Export.Kind = wasm::WASM_EXTERNAL_FUNCTION; else Export.Kind = wasm::WASM_EXTERNAL_GLOBAL; + DEBUG(dbgs() << " -> export " << Exports.size() << "\n"); Exports.push_back(Export); } } - // Handle weak aliases + // Handle weak aliases. We need to process these in a separate pass because + // we need to have processed the target of the alias before the alias itself + // and the symbols are not necessarily ordered in this way. for (const MCSymbol &S : Asm.symbols()) { if (!S.isVariable()) continue; - assert(S.isExternal()); assert(S.isDefined(/*SetUsed=*/false)); const auto &WS = static_cast<const MCSymbolWasm &>(S); - - // Find the target symbol of this weak alias + // Find the target symbol of this weak alias and export that index const MCExpr *Expr = WS.getVariableValue(); - auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr); + auto *Inner = cast<MCSymbolRefExpr>(Expr); const auto *ResolvedSym = cast<MCSymbolWasm>(&Inner->getSymbol()); + DEBUG(dbgs() << WS.getName() << ": weak alias of '" << *ResolvedSym << "'\n"); + assert(SymbolIndices.count(ResolvedSym) > 0); uint32_t Index = SymbolIndices.find(ResolvedSym)->second; - DEBUG(dbgs() << "Weak alias: '" << WS << "' -> '" << ResolvedSym << "' = " << Index << "\n"); - SymbolIndices[&WS] = Index; + DEBUG(dbgs() << " -> index:" << Index << "\n"); WasmExport Export; Export.FieldName = WS.getName(); @@ -1245,7 +1254,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, Export.Kind = wasm::WASM_EXTERNAL_FUNCTION; else Export.Kind = wasm::WASM_EXTERNAL_GLOBAL; - WeakSymbols.push_back(Export.FieldName); + DEBUG(dbgs() << " -> export " << Exports.size() << "\n"); Exports.push_back(Export); } @@ -1254,15 +1263,7 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, if (Fixup.Type != wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB) continue; - WasmFunctionType F; - F.Returns = Fixup.Symbol->getReturns(); - F.Params = Fixup.Symbol->getParams(); - auto Pair = - FunctionTypeIndices.insert(std::make_pair(F, FunctionTypes.size())); - if (Pair.second) - FunctionTypes.push_back(F); - - TypeIndices[Fixup.Symbol] = Pair.first->second; + registerFunctionType(*Fixup.Symbol); } // Write out the Wasm header. diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index fc5234950391b..956ae70b38d19 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -197,8 +197,7 @@ public: void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, bool &IsPCRel, - uint64_t &FixedValue) override; + MCValue Target, uint64_t &FixedValue) override; void createFileSymbols(MCAssembler &Asm); void assignSectionNumbers(); @@ -708,9 +707,11 @@ bool WinCOFFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl( InSet, IsPCRel); } -void WinCOFFObjectWriter::recordRelocation( - MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) { +void WinCOFFObjectWriter::recordRelocation(MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { assert(Target.getSymA() && "Relocation must reference a symbol!"); const MCSymbol &A = Target.getSymA()->getSymbol(); diff --git a/lib/Object/WasmObjectFile.cpp b/lib/Object/WasmObjectFile.cpp index fff497ba55647..7f80bf0b83a0a 100644 --- a/lib/Object/WasmObjectFile.cpp +++ b/lib/Object/WasmObjectFile.cpp @@ -567,20 +567,16 @@ Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End) Ex.Name = readString(Ptr); Ex.Kind = readUint8(Ptr); Ex.Index = readVaruint32(Ptr); + WasmSymbol::SymbolType ExportType; + bool MakeSymbol = false; switch (Ex.Kind) { case wasm::WASM_EXTERNAL_FUNCTION: - SymbolMap.try_emplace(Ex.Name, Symbols.size()); - Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::FUNCTION_EXPORT, - Sections.size(), i); - DEBUG(dbgs() << "Adding export: " << Symbols.back() - << " sym index:" << Symbols.size() << "\n"); + ExportType = WasmSymbol::SymbolType::FUNCTION_EXPORT; + MakeSymbol = true; break; case wasm::WASM_EXTERNAL_GLOBAL: - SymbolMap.try_emplace(Ex.Name, Symbols.size()); - Symbols.emplace_back(Ex.Name, WasmSymbol::SymbolType::GLOBAL_EXPORT, - Sections.size(), i); - DEBUG(dbgs() << "Adding export: " << Symbols.back() - << " sym index:" << Symbols.size() << "\n"); + ExportType = WasmSymbol::SymbolType::GLOBAL_EXPORT; + MakeSymbol = true; break; case wasm::WASM_EXTERNAL_MEMORY: case wasm::WASM_EXTERNAL_TABLE: @@ -589,6 +585,20 @@ Error WasmObjectFile::parseExportSection(const uint8_t *Ptr, const uint8_t *End) return make_error<GenericBinaryError>( "Unexpected export kind", object_error::parse_failed); } + if (MakeSymbol) { + auto Pair = SymbolMap.try_emplace(Ex.Name, Symbols.size()); + if (Pair.second) { + Symbols.emplace_back(Ex.Name, ExportType, + Sections.size(), i); + DEBUG(dbgs() << "Adding export: " << Symbols.back() + << " sym index:" << Symbols.size() << "\n"); + } else { + uint32_t SymIndex = Pair.first->second; + Symbols[SymIndex] = WasmSymbol(Ex.Name, ExportType, Sections.size(), i); + DEBUG(dbgs() << "Replacing existing symbol: " << Symbols[SymIndex] + << " sym index:" << SymIndex << "\n"); + } + } Exports.push_back(Ex); } if (Ptr != End) @@ -665,15 +675,17 @@ Error WasmObjectFile::parseElemSection(const uint8_t *Ptr, const uint8_t *End) { } Error WasmObjectFile::parseDataSection(const uint8_t *Ptr, const uint8_t *End) { + const uint8_t *Start = Ptr; uint32_t Count = readVaruint32(Ptr); DataSegments.reserve(Count); while (Count--) { - wasm::WasmDataSegment Segment; - Segment.Index = readVaruint32(Ptr); - if (Error Err = readInitExpr(Segment.Offset, Ptr)) + WasmSegment Segment; + Segment.Data.MemoryIndex = readVaruint32(Ptr); + if (Error Err = readInitExpr(Segment.Data.Offset, Ptr)) return Err; uint32_t Size = readVaruint32(Ptr); - Segment.Content = ArrayRef<uint8_t>(Ptr, Size); + Segment.Data.Content = ArrayRef<uint8_t>(Ptr, Size); + Segment.SectionOffset = Ptr - Start; Ptr += Size; DataSegments.push_back(Segment); } diff --git a/lib/Object/WindowsResource.cpp b/lib/Object/WindowsResource.cpp index 1371eacdf8f2d..246eee5ddb311 100644 --- a/lib/Object/WindowsResource.cpp +++ b/lib/Object/WindowsResource.cpp @@ -609,8 +609,8 @@ void WindowsResourceCOFFWriter::writeDirectoryTree() { for (auto const &Child : StringChildren) { auto *Entry = reinterpret_cast<coff_resource_dir_entry *>(BufferStart + CurrentOffset); - Entry->Identifier.NameOffset = - StringTableOffsets[Child.second->getStringIndex()]; + Entry->Identifier.setNameOffset( + StringTableOffsets[Child.second->getStringIndex()]); if (Child.second->checkIsDataNode()) { Entry->Offset.DataEntryOffset = NextLevelOffset; NextLevelOffset += sizeof(coff_resource_data_entry); diff --git a/lib/ObjectYAML/WasmYAML.cpp b/lib/ObjectYAML/WasmYAML.cpp index 2040efdc9d117..6a68cd265ad84 100644 --- a/lib/ObjectYAML/WasmYAML.cpp +++ b/lib/ObjectYAML/WasmYAML.cpp @@ -345,7 +345,8 @@ void MappingTraits<wasm::WasmInitExpr>::mapping(IO &IO, void MappingTraits<WasmYAML::DataSegment>::mapping( IO &IO, WasmYAML::DataSegment &Segment) { - IO.mapRequired("Index", Segment.Index); + IO.mapOptional("SectionOffset", Segment.SectionOffset); + IO.mapRequired("MemoryIndex", Segment.MemoryIndex); IO.mapRequired("Offset", Segment.Offset); IO.mapRequired("Content", Segment.Content); } diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp index acb9e8d015bce..bcd365236e46e 100644 --- a/lib/Option/OptTable.cpp +++ b/lib/Option/OptTable.cpp @@ -225,11 +225,15 @@ OptTable::suggestValueCompletions(StringRef Option, StringRef Arg) const { return {}; } -std::vector<std::string> OptTable::findByPrefix(StringRef Cur) const { +std::vector<std::string> +OptTable::findByPrefix(StringRef Cur, unsigned short DisableFlags) const { std::vector<std::string> Ret; for (const Info &In : OptionInfos.slice(FirstSearchableIndex)) { - if (!In.Prefixes) + if (!In.Prefixes || (!In.HelpText && !In.GroupID)) + continue; + if (In.Flags & DisableFlags) continue; + for (int I = 0; In.Prefixes[I]; I++) { std::string S = std::string(In.Prefixes[I]) + std::string(In.Name); if (StringRef(S).startswith(Cur)) diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index 0380bd991d717..9e0cf27aa17b5 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -281,33 +281,52 @@ AnalysisKey NoOpLoopAnalysis::Key; } // End anonymous namespace. +void PassBuilder::invokePeepholeEPCallbacks( + FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { + for (auto &C : PeepholeEPCallbacks) + C(FPM, Level); +} + void PassBuilder::registerModuleAnalyses(ModuleAnalysisManager &MAM) { #define MODULE_ANALYSIS(NAME, CREATE_PASS) \ MAM.registerPass([&] { return CREATE_PASS; }); #include "PassRegistry.def" + + for (auto &C : ModuleAnalysisRegistrationCallbacks) + C(MAM); } void PassBuilder::registerCGSCCAnalyses(CGSCCAnalysisManager &CGAM) { #define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ CGAM.registerPass([&] { return CREATE_PASS; }); #include "PassRegistry.def" + + for (auto &C : CGSCCAnalysisRegistrationCallbacks) + C(CGAM); } void PassBuilder::registerFunctionAnalyses(FunctionAnalysisManager &FAM) { #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ FAM.registerPass([&] { return CREATE_PASS; }); #include "PassRegistry.def" + + for (auto &C : FunctionAnalysisRegistrationCallbacks) + C(FAM); } void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) { #define LOOP_ANALYSIS(NAME, CREATE_PASS) \ LAM.registerPass([&] { return CREATE_PASS; }); #include "PassRegistry.def" + + for (auto &C : LoopAnalysisRegistrationCallbacks) + C(LAM); } FunctionPassManager PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, - bool DebugLogging) { + bool DebugLogging, + bool PrepareForThinLTO) { assert(Level != O0 && "Must request optimizations!"); FunctionPassManager FPM(DebugLogging); @@ -340,6 +359,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, if (!isOptimizingForSize(Level)) FPM.addPass(LibCallsShrinkWrapPass()); + invokePeepholeEPCallbacks(FPM, Level); + FPM.addPass(TailCallElimPass()); FPM.addPass(SimplifyCFGPass()); @@ -363,11 +384,19 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, LPM1.addPass(SimpleLoopUnswitchPass()); LPM2.addPass(IndVarSimplifyPass()); LPM2.addPass(LoopIdiomRecognizePass()); + + for (auto &C : LateLoopOptimizationsEPCallbacks) + C(LPM2, Level); + LPM2.addPass(LoopDeletionPass()); - // FIXME: The old pass manager has a hack to disable loop unrolling during - // ThinLTO when using sample PGO. Need to either fix it or port some - // workaround. - LPM2.addPass(LoopUnrollPass::createFull(Level)); + // Do not enable unrolling in PrepareForThinLTO phase during sample PGO + // because it changes IR to makes profile annotation in back compile + // inaccurate. + if (!PrepareForThinLTO || !PGOOpt || PGOOpt->SampleProfileFile.empty()) + LPM2.addPass(LoopUnrollPass::createFull(Level)); + + for (auto &C : LoopOptimizerEndEPCallbacks) + C(LPM2, Level); // We provide the opt remark emitter pass for LICM to use. We only need to do // this once as it is immutable. @@ -403,6 +432,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Run instcombine after redundancy and dead bit elimination to exploit // opportunities opened up by them. FPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(FPM, Level); // Re-consider control flow based optimizations after redundancy elimination, // redo DCE, etc. @@ -411,19 +441,24 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(DSEPass()); FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass())); + for (auto &C : ScalarOptimizerLateEPCallbacks) + C(FPM, Level); + // Finally, do an expensive DCE pass to catch all the dead code exposed by // the simplifications and basic cleanup after all the simplifications. FPM.addPass(ADCEPass()); FPM.addPass(SimplifyCFGPass()); FPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(FPM, Level); return FPM; } -static void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, - PassBuilder::OptimizationLevel Level, - bool RunProfileGen, std::string ProfileGenFile, - std::string ProfileUseFile) { +void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, + PassBuilder::OptimizationLevel Level, + bool RunProfileGen, + std::string ProfileGenFile, + std::string ProfileUseFile) { // Generally running simplification passes and the inliner with an high // threshold results in smaller executables, but there may be cases where // the size grows, so let's be conservative here and skip this simplification @@ -448,9 +483,8 @@ static void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies. FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks. FPM.addPass(InstCombinePass()); // Combine silly sequences. + invokePeepholeEPCallbacks(FPM, Level); - // FIXME: Here the old pass manager inserts peephole extensions. - // Add them when they're supported. CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPipeline))); @@ -490,7 +524,8 @@ getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) { ModulePassManager PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, - bool DebugLogging) { + bool DebugLogging, + bool PrepareForThinLTO) { ModulePassManager MPM(DebugLogging); // Do basic inference of function attributes from known properties of system @@ -530,6 +565,8 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // optimizations. FunctionPassManager GlobalCleanupPM(DebugLogging); GlobalCleanupPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(GlobalCleanupPM, Level); + GlobalCleanupPM.addPass(SimplifyCFGPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM))); @@ -544,8 +581,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile)); // Indirect call promotion that promotes intra-module targes only. - MPM.addPass(PGOIndirectCallPromotion( - false, PGOOpt && !PGOOpt->SampleProfileFile.empty())); + // Do not enable it in PrepareForThinLTO phase during sample PGO because + // it changes IR to makes profile annotation in back compile inaccurate. + if (!PrepareForThinLTO || PGOOpt->SampleProfileFile.empty()) + MPM.addPass(PGOIndirectCallPromotion( + false, PGOOpt && !PGOOpt->SampleProfileFile.empty())); } // Require the GlobalsAA analysis for the module so we can query it within @@ -570,7 +610,12 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // Run the inliner first. The theory is that we are walking bottom-up and so // the callees have already been fully optimized, and we want to inline them // into the callers so that our optimizations can reflect that. - MainCGPipeline.addPass(InlinerPass(getInlineParamsFromOptLevel(Level))); + // For PrepareForThinLTO pass, we disable hot-caller heuristic for sample PGO + // because it makes profile annotation in the backend inaccurate. + InlineParams IP = getInlineParamsFromOptLevel(Level); + if (PrepareForThinLTO && PGOOpt && !PGOOpt->SampleProfileFile.empty()) + IP.HotCallSiteThreshold = 0; + MainCGPipeline.addPass(InlinerPass(IP)); // Now deduce any function attributes based in the current code. MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); @@ -583,7 +628,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // Lastly, add the core function simplification pipeline nested inside the // CGSCC walk. MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( - buildFunctionSimplificationPipeline(Level, DebugLogging))); + buildFunctionSimplificationPipeline(Level, DebugLogging, + PrepareForThinLTO))); + + for (auto &C : CGSCCOptimizerLateEPCallbacks) + C(MainCGPipeline, Level); // We wrap the CGSCC pipeline in a devirtualization repeater. This will try // to detect when we devirtualize indirect calls and iterate the SCC passes @@ -643,6 +692,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // rather than on each loop in an inside-out manner, and so they are actually // function passes. + for (auto &C : VectorizerStartEPCallbacks) + C(OptimizePM, Level); + // First rotate loops that may have been un-rotated by prior passes. OptimizePM.addPass(createFunctionToLoopPassAdaptor(LoopRotatePass())); @@ -726,7 +778,8 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, MPM.addPass(ForceFunctionAttrsPass()); // Add the core simplification pipeline. - MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging)); + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, + /*PrepareForThinLTO=*/false)); // Now add the optimization pipeline. MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging)); @@ -747,7 +800,8 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level, // If we are planning to perform ThinLTO later, we don't bloat the code with // unrolling/vectorization/... now. Just simplify the module as much as we // can. - MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging)); + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, + /*PrepareForThinLTO=*/true)); // Run partial inlining pass to partially inline functions that have // large bodies. @@ -785,7 +839,8 @@ PassBuilder::buildThinLTODefaultPipeline(OptimizationLevel Level, !PGOOpt->ProfileUseFile.empty())); // Add the core simplification pipeline. - MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging)); + MPM.addPass(buildModuleSimplificationPipeline(Level, DebugLogging, + /*PrepareForThinLTO=*/false)); // Now add the optimization pipeline. MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging)); @@ -868,8 +923,11 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // simplification opportunities, and both can propagate functions through // function pointers. When this happens, we often have to resolve varargs // calls, etc, so let instcombine do this. - // FIXME: add peephole extensions here as the legacy PM does. - MPM.addPass(createModuleToFunctionPassAdaptor(InstCombinePass())); + FunctionPassManager PeepholeFPM(DebugLogging); + PeepholeFPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(PeepholeFPM, Level); + + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM))); // Note: historically, the PruneEH pass was run first to deduce nounwind and // generally clean up exception handling overhead. It isn't clear this is @@ -887,10 +945,10 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MPM.addPass(GlobalDCEPass()); FunctionPassManager FPM(DebugLogging); - // The IPO Passes may leave cruft around. Clean up after them. - // FIXME: add peephole extensions here as the legacy PM does. FPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(FPM, Level); + FPM.addPass(JumpThreadingPass()); // Break up allocas @@ -937,8 +995,11 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, MainFPM.add(AlignmentFromAssumptionsPass()); #endif - // FIXME: add peephole extensions to the PM here. + // FIXME: Conditionally run LoadCombine here, after it's ported + // (in case we still have this pass, given its questionable usefulness). + MainFPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(MainFPM, Level); MainFPM.addPass(JumpThreadingPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM))); @@ -1021,7 +1082,27 @@ static bool startsWithDefaultPipelineAliasPrefix(StringRef Name) { Name.startswith("lto"); } -static bool isModulePassName(StringRef Name) { +/// Tests whether registered callbacks will accept a given pass name. +/// +/// When parsing a pipeline text, the type of the outermost pipeline may be +/// omitted, in which case the type is automatically determined from the first +/// pass name in the text. This may be a name that is handled through one of the +/// callbacks. We check this through the oridinary parsing callbacks by setting +/// up a dummy PassManager in order to not force the client to also handle this +/// type of query. +template <typename PassManagerT, typename CallbacksT> +static bool callbacksAcceptPassName(StringRef Name, CallbacksT &Callbacks) { + if (!Callbacks.empty()) { + PassManagerT DummyPM; + for (auto &CB : Callbacks) + if (CB(Name, DummyPM, {})) + return true; + } + return false; +} + +template <typename CallbacksT> +static bool isModulePassName(StringRef Name, CallbacksT &Callbacks) { // Manually handle aliases for pre-configured pipeline fragments. if (startsWithDefaultPipelineAliasPrefix(Name)) return DefaultAliasRegex.match(Name); @@ -1046,10 +1127,11 @@ static bool isModulePassName(StringRef Name) { return true; #include "PassRegistry.def" - return false; + return callbacksAcceptPassName<ModulePassManager>(Name, Callbacks); } -static bool isCGSCCPassName(StringRef Name) { +template <typename CallbacksT> +static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) { // Explicitly handle pass manager names. if (Name == "cgscc") return true; @@ -1070,10 +1152,11 @@ static bool isCGSCCPassName(StringRef Name) { return true; #include "PassRegistry.def" - return false; + return callbacksAcceptPassName<CGSCCPassManager>(Name, Callbacks); } -static bool isFunctionPassName(StringRef Name) { +template <typename CallbacksT> +static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) { // Explicitly handle pass manager names. if (Name == "function") return true; @@ -1092,10 +1175,11 @@ static bool isFunctionPassName(StringRef Name) { return true; #include "PassRegistry.def" - return false; + return callbacksAcceptPassName<FunctionPassManager>(Name, Callbacks); } -static bool isLoopPassName(StringRef Name) { +template <typename CallbacksT> +static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks) { // Explicitly handle pass manager names. if (Name == "loop") return true; @@ -1112,7 +1196,7 @@ static bool isLoopPassName(StringRef Name) { return true; #include "PassRegistry.def" - return false; + return callbacksAcceptPassName<LoopPassManager>(Name, Callbacks); } Optional<std::vector<PassBuilder::PipelineElement>> @@ -1213,6 +1297,11 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM, MPM.addPass(createRepeatedPass(*Count, std::move(NestedMPM))); return true; } + + for (auto &C : ModulePipelineParsingCallbacks) + if (C(Name, MPM, InnerPipeline)) + return true; + // Normal passes can't have pipelines. return false; } @@ -1225,12 +1314,12 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM, assert(Matches.size() == 3 && "Must capture two matched strings!"); OptimizationLevel L = StringSwitch<OptimizationLevel>(Matches[2]) - .Case("O0", O0) - .Case("O1", O1) - .Case("O2", O2) - .Case("O3", O3) - .Case("Os", Os) - .Case("Oz", Oz); + .Case("O0", O0) + .Case("O1", O1) + .Case("O2", O2) + .Case("O3", O3) + .Case("Os", Os) + .Case("Oz", Oz); if (L == O0) // At O0 we do nothing at all! return true; @@ -1270,6 +1359,9 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM, } #include "PassRegistry.def" + for (auto &C : ModulePipelineParsingCallbacks) + if (C(Name, MPM, InnerPipeline)) + return true; return false; } @@ -1317,11 +1409,16 @@ bool PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, *MaxRepetitions, DebugLogging)); return true; } + + for (auto &C : CGSCCPipelineParsingCallbacks) + if (C(Name, CGPM, InnerPipeline)) + return true; + // Normal passes can't have pipelines. return false; } - // Now expand the basic registered passes from the .inc file. +// Now expand the basic registered passes from the .inc file. #define CGSCC_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ CGPM.addPass(CREATE_PASS); \ @@ -1342,6 +1439,9 @@ bool PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, } #include "PassRegistry.def" + for (auto &C : CGSCCPipelineParsingCallbacks) + if (C(Name, CGPM, InnerPipeline)) + return true; return false; } @@ -1379,11 +1479,16 @@ bool PassBuilder::parseFunctionPass(FunctionPassManager &FPM, FPM.addPass(createRepeatedPass(*Count, std::move(NestedFPM))); return true; } + + for (auto &C : FunctionPipelineParsingCallbacks) + if (C(Name, FPM, InnerPipeline)) + return true; + // Normal passes can't have pipelines. return false; } - // Now expand the basic registered passes from the .inc file. +// Now expand the basic registered passes from the .inc file. #define FUNCTION_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ FPM.addPass(CREATE_PASS); \ @@ -1403,6 +1508,9 @@ bool PassBuilder::parseFunctionPass(FunctionPassManager &FPM, } #include "PassRegistry.def" + for (auto &C : FunctionPipelineParsingCallbacks) + if (C(Name, FPM, InnerPipeline)) + return true; return false; } @@ -1430,11 +1538,16 @@ bool PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E, LPM.addPass(createRepeatedPass(*Count, std::move(NestedLPM))); return true; } + + for (auto &C : LoopPipelineParsingCallbacks) + if (C(Name, LPM, InnerPipeline)) + return true; + // Normal passes can't have pipelines. return false; } - // Now expand the basic registered passes from the .inc file. +// Now expand the basic registered passes from the .inc file. #define LOOP_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ LPM.addPass(CREATE_PASS); \ @@ -1455,6 +1568,9 @@ bool PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E, } #include "PassRegistry.def" + for (auto &C : LoopPipelineParsingCallbacks) + if (C(Name, LPM, InnerPipeline)) + return true; return false; } @@ -1473,6 +1589,9 @@ bool PassBuilder::parseAAPassName(AAManager &AA, StringRef Name) { } #include "PassRegistry.def" + for (auto &C : AAParsingCallbacks) + if (C(Name, AA)) + return true; return false; } @@ -1539,7 +1658,7 @@ bool PassBuilder::parseModulePassPipeline(ModulePassManager &MPM, return true; } -// Primary pass pipeline description parsing routine. +// Primary pass pipeline description parsing routine for a \c ModulePassManager // FIXME: Should this routine accept a TargetMachine or require the caller to // pre-populate the analysis managers with target-specific stuff? bool PassBuilder::parsePassPipeline(ModulePassManager &MPM, @@ -1553,21 +1672,70 @@ bool PassBuilder::parsePassPipeline(ModulePassManager &MPM, // automatically. StringRef FirstName = Pipeline->front().Name; - if (!isModulePassName(FirstName)) { - if (isCGSCCPassName(FirstName)) + if (!isModulePassName(FirstName, ModulePipelineParsingCallbacks)) { + if (isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks)) { Pipeline = {{"cgscc", std::move(*Pipeline)}}; - else if (isFunctionPassName(FirstName)) + } else if (isFunctionPassName(FirstName, + FunctionPipelineParsingCallbacks)) { Pipeline = {{"function", std::move(*Pipeline)}}; - else if (isLoopPassName(FirstName)) + } else if (isLoopPassName(FirstName, LoopPipelineParsingCallbacks)) { Pipeline = {{"function", {{"loop", std::move(*Pipeline)}}}}; - else + } else { + for (auto &C : TopLevelPipelineParsingCallbacks) + if (C(MPM, *Pipeline, VerifyEachPass, DebugLogging)) + return true; + // Unknown pass name! return false; + } } return parseModulePassPipeline(MPM, *Pipeline, VerifyEachPass, DebugLogging); } +// Primary pass pipeline description parsing routine for a \c CGSCCPassManager +bool PassBuilder::parsePassPipeline(CGSCCPassManager &CGPM, + StringRef PipelineText, bool VerifyEachPass, + bool DebugLogging) { + auto Pipeline = parsePipelineText(PipelineText); + if (!Pipeline || Pipeline->empty()) + return false; + + StringRef FirstName = Pipeline->front().Name; + if (!isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks)) + return false; + + return parseCGSCCPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging); +} + +// Primary pass pipeline description parsing routine for a \c +// FunctionPassManager +bool PassBuilder::parsePassPipeline(FunctionPassManager &FPM, + StringRef PipelineText, bool VerifyEachPass, + bool DebugLogging) { + auto Pipeline = parsePipelineText(PipelineText); + if (!Pipeline || Pipeline->empty()) + return false; + + StringRef FirstName = Pipeline->front().Name; + if (!isFunctionPassName(FirstName, FunctionPipelineParsingCallbacks)) + return false; + + return parseFunctionPassPipeline(FPM, *Pipeline, VerifyEachPass, + DebugLogging); +} + +// Primary pass pipeline description parsing routine for a \c LoopPassManager +bool PassBuilder::parsePassPipeline(LoopPassManager &CGPM, + StringRef PipelineText, bool VerifyEachPass, + bool DebugLogging) { + auto Pipeline = parsePipelineText(PipelineText); + if (!Pipeline || Pipeline->empty()) + return false; + + return parseLoopPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging); +} + bool PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) { // If the pipeline just consists of the word 'default' just replace the AA // manager with our default one. diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp index a1d18724fcd57..48c1643cb13c8 100644 --- a/lib/ProfileData/InstrProf.cpp +++ b/lib/ProfileData/InstrProf.cpp @@ -460,9 +460,9 @@ Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) { return Error::success(); } -void InstrProfValueSiteRecord::merge(SoftInstrProfErrors &SIPE, - InstrProfValueSiteRecord &Input, - uint64_t Weight) { +void InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input, + uint64_t Weight, + function_ref<void(instrprof_error)> Warn) { this->sortByTargetValues(); Input.sortByTargetValues(); auto I = ValueData.begin(); @@ -475,7 +475,7 @@ void InstrProfValueSiteRecord::merge(SoftInstrProfErrors &SIPE, bool Overflowed; I->Count = SaturatingMultiplyAdd(J->Count, Weight, I->Count, &Overflowed); if (Overflowed) - SIPE.addError(instrprof_error::counter_overflow); + Warn(instrprof_error::counter_overflow); ++I; continue; } @@ -483,25 +483,25 @@ void InstrProfValueSiteRecord::merge(SoftInstrProfErrors &SIPE, } } -void InstrProfValueSiteRecord::scale(SoftInstrProfErrors &SIPE, - uint64_t Weight) { +void InstrProfValueSiteRecord::scale(uint64_t Weight, + function_ref<void(instrprof_error)> Warn) { for (auto I = ValueData.begin(), IE = ValueData.end(); I != IE; ++I) { bool Overflowed; I->Count = SaturatingMultiply(I->Count, Weight, &Overflowed); if (Overflowed) - SIPE.addError(instrprof_error::counter_overflow); + Warn(instrprof_error::counter_overflow); } } // Merge Value Profile data from Src record to this record for ValueKind. // Scale merged value counts by \p Weight. -void InstrProfRecord::mergeValueProfData(uint32_t ValueKind, - InstrProfRecord &Src, - uint64_t Weight) { +void InstrProfRecord::mergeValueProfData( + uint32_t ValueKind, InstrProfRecord &Src, uint64_t Weight, + function_ref<void(instrprof_error)> Warn) { uint32_t ThisNumValueSites = getNumValueSites(ValueKind); uint32_t OtherNumValueSites = Src.getNumValueSites(ValueKind); if (ThisNumValueSites != OtherNumValueSites) { - SIPE.addError(instrprof_error::value_site_count_mismatch); + Warn(instrprof_error::value_site_count_mismatch); return; } if (!ThisNumValueSites) @@ -511,14 +511,15 @@ void InstrProfRecord::mergeValueProfData(uint32_t ValueKind, MutableArrayRef<InstrProfValueSiteRecord> OtherSiteRecords = Src.getValueSitesForKind(ValueKind); for (uint32_t I = 0; I < ThisNumValueSites; I++) - ThisSiteRecords[I].merge(SIPE, OtherSiteRecords[I], Weight); + ThisSiteRecords[I].merge(OtherSiteRecords[I], Weight, Warn); } -void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight) { +void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight, + function_ref<void(instrprof_error)> Warn) { // If the number of counters doesn't match we either have bad data // or a hash collision. if (Counts.size() != Other.Counts.size()) { - SIPE.addError(instrprof_error::count_mismatch); + Warn(instrprof_error::count_mismatch); return; } @@ -527,27 +528,30 @@ void InstrProfRecord::merge(InstrProfRecord &Other, uint64_t Weight) { Counts[I] = SaturatingMultiplyAdd(Other.Counts[I], Weight, Counts[I], &Overflowed); if (Overflowed) - SIPE.addError(instrprof_error::counter_overflow); + Warn(instrprof_error::counter_overflow); } for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) - mergeValueProfData(Kind, Other, Weight); + mergeValueProfData(Kind, Other, Weight, Warn); } -void InstrProfRecord::scaleValueProfData(uint32_t ValueKind, uint64_t Weight) { +void InstrProfRecord::scaleValueProfData( + uint32_t ValueKind, uint64_t Weight, + function_ref<void(instrprof_error)> Warn) { for (auto &R : getValueSitesForKind(ValueKind)) - R.scale(SIPE, Weight); + R.scale(Weight, Warn); } -void InstrProfRecord::scale(uint64_t Weight) { +void InstrProfRecord::scale(uint64_t Weight, + function_ref<void(instrprof_error)> Warn) { for (auto &Count : this->Counts) { bool Overflowed; Count = SaturatingMultiply(Count, Weight, &Overflowed); if (Overflowed) - SIPE.addError(instrprof_error::counter_overflow); + Warn(instrprof_error::counter_overflow); } for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) - scaleValueProfData(Kind, Weight); + scaleValueProfData(Kind, Weight, Warn); } // Map indirect call target name hash to name string. diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index 1ed1fb8b6f0b5..1b39a0695aac6 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -221,7 +221,7 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { #undef VP_READ_ADVANCE } -Error TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { +Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { // Skip empty lines and comments. while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) ++Line; @@ -377,13 +377,13 @@ Error RawInstrProfReader<IntPtrT>::readHeader( } template <class IntPtrT> -Error RawInstrProfReader<IntPtrT>::readName(InstrProfRecord &Record) { +Error RawInstrProfReader<IntPtrT>::readName(NamedInstrProfRecord &Record) { Record.Name = getName(Data->NameRef); return success(); } template <class IntPtrT> -Error RawInstrProfReader<IntPtrT>::readFuncHash(InstrProfRecord &Record) { +Error RawInstrProfReader<IntPtrT>::readFuncHash(NamedInstrProfRecord &Record) { Record.Hash = swap(Data->FuncHash); return success(); } @@ -445,7 +445,7 @@ Error RawInstrProfReader<IntPtrT>::readValueProfilingData( } template <class IntPtrT> -Error RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) { +Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record) { if (atEnd()) // At this point, ValueDataStart field points to the next header. if (Error E = readNextHeader(getNextHeaderPos())) @@ -550,7 +550,7 @@ data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, template <typename HashTableImpl> Error InstrProfReaderIndex<HashTableImpl>::getRecords( - StringRef FuncName, ArrayRef<InstrProfRecord> &Data) { + StringRef FuncName, ArrayRef<NamedInstrProfRecord> &Data) { auto Iter = HashTable->find(FuncName); if (Iter == HashTable->end()) return make_error<InstrProfError>(instrprof_error::unknown_function); @@ -564,7 +564,7 @@ Error InstrProfReaderIndex<HashTableImpl>::getRecords( template <typename HashTableImpl> Error InstrProfReaderIndex<HashTableImpl>::getRecords( - ArrayRef<InstrProfRecord> &Data) { + ArrayRef<NamedInstrProfRecord> &Data) { if (atEnd()) return make_error<InstrProfError>(instrprof_error::eof); @@ -644,7 +644,7 @@ IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, InstrProfSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs); // FIXME: This only computes an empty summary. Need to call addRecord for - // all InstrProfRecords to get the correct summary. + // all NamedInstrProfRecords to get the correct summary. this->Summary = Builder.getSummary(); return Cur; } @@ -707,7 +707,7 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() { Expected<InstrProfRecord> IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, uint64_t FuncHash) { - ArrayRef<InstrProfRecord> Data; + ArrayRef<NamedInstrProfRecord> Data; Error Err = Index->getRecords(FuncName, Data); if (Err) return std::move(Err); @@ -732,10 +732,10 @@ Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, return success(); } -Error IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) { +Error IndexedInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) { static unsigned RecordIndex = 0; - ArrayRef<InstrProfRecord> Data; + ArrayRef<NamedInstrProfRecord> Data; Error E = Index->getRecords(Data); if (E) diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp index 9efea78ed2a89..ce3f8806e12e7 100644 --- a/lib/ProfileData/InstrProfWriter.cpp +++ b/lib/ProfileData/InstrProfWriter.cpp @@ -176,38 +176,46 @@ void InstrProfWriter::setOutputSparse(bool Sparse) { this->Sparse = Sparse; } -Error InstrProfWriter::addRecord(InstrProfRecord &&I, uint64_t Weight) { - auto &ProfileDataMap = FunctionData[I.Name]; +void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight, + function_ref<void(Error)> Warn) { + auto Name = I.Name; + auto Hash = I.Hash; + addRecord(Name, Hash, std::move(I), Weight, Warn); +} + +void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash, + InstrProfRecord &&I, uint64_t Weight, + function_ref<void(Error)> Warn) { + auto &ProfileDataMap = FunctionData[Name]; bool NewFunc; ProfilingData::iterator Where; std::tie(Where, NewFunc) = - ProfileDataMap.insert(std::make_pair(I.Hash, InstrProfRecord())); + ProfileDataMap.insert(std::make_pair(Hash, InstrProfRecord())); InstrProfRecord &Dest = Where->second; + auto MapWarn = [&](instrprof_error E) { + Warn(make_error<InstrProfError>(E)); + }; + if (NewFunc) { // We've never seen a function with this name and hash, add it. Dest = std::move(I); - // Fix up the name to avoid dangling reference. - Dest.Name = FunctionData.find(Dest.Name)->getKey(); if (Weight > 1) - Dest.scale(Weight); + Dest.scale(Weight, MapWarn); } else { // We're updating a function we've seen before. - Dest.merge(I, Weight); + Dest.merge(I, Weight, MapWarn); } Dest.sortValueData(); - - return Dest.takeError(); } -Error InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW) { +void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, + function_ref<void(Error)> Warn) { for (auto &I : IPW.FunctionData) for (auto &Func : I.getValue()) - if (Error E = addRecord(std::move(Func.second), 1)) - return E; - return Error::success(); + addRecord(I.getKey(), Func.first, std::move(Func.second), 1, Warn); } bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) { @@ -323,11 +331,12 @@ static const char *ValueProfKindStr[] = { #include "llvm/ProfileData/InstrProfData.inc" }; -void InstrProfWriter::writeRecordInText(const InstrProfRecord &Func, +void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash, + const InstrProfRecord &Func, InstrProfSymtab &Symtab, raw_fd_ostream &OS) { - OS << Func.Name << "\n"; - OS << "# Func Hash:\n" << Func.Hash << "\n"; + OS << Name << "\n"; + OS << "# Func Hash:\n" << Hash << "\n"; OS << "# Num Counters:\n" << Func.Counts.size() << "\n"; OS << "# Counter Values:\n"; for (uint64_t Count : Func.Counts) @@ -375,6 +384,6 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) { for (const auto &I : FunctionData) if (shouldEncodeData(I.getValue())) for (const auto &Func : I.getValue()) - writeRecordInText(Func.second, Symtab, OS); + writeRecordInText(I.getKey(), Func.first, Func.second, Symtab, OS); return Error::success(); } diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 0345a5e3d2a1d..50173f5256bff 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -1236,7 +1236,7 @@ bool CommandLineParser::ParseCommandLineOptions(int argc, << ": Not enough positional command line arguments specified!\n" << "Must specify at least " << NumPositionalRequired << " positional argument" << (NumPositionalRequired > 1 ? "s" : "") - << ": See: " << argv[0] << " - help\n"; + << ": See: " << argv[0] << " -help\n"; ErrorParsing = true; } else if (!HasUnlimitedPositionals && diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp index 9398789cea871..d8422115eae81 100644 --- a/lib/Support/DynamicLibrary.cpp +++ b/lib/Support/DynamicLibrary.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/DynamicLibrary.h" #include "llvm-c/Support.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/Config/config.h" #include "llvm/Support/ManagedStatic.h" @@ -73,19 +74,37 @@ public: return true; } - void *Lookup(const char *Symbol) { - // Process handle gets first try. + void *LibLookup(const char *Symbol, DynamicLibrary::SearchOrdering Order) { + if (Order & SO_LoadOrder) { + for (void *Handle : Handles) { + if (void *Ptr = DLSym(Handle, Symbol)) + return Ptr; + } + } else { + for (void *Handle : llvm::reverse(Handles)) { + if (void *Ptr = DLSym(Handle, Symbol)) + return Ptr; + } + } + return nullptr; + } + + void *Lookup(const char *Symbol, DynamicLibrary::SearchOrdering Order) { + assert(!((Order & SO_LoadedFirst) && (Order & SO_LoadedLast)) && + "Invalid Ordering"); + + if (!Process || (Order & SO_LoadedFirst)) { + if (void *Ptr = LibLookup(Symbol, Order)) + return Ptr; + } if (Process) { + // Use OS facilities to search the current binary and all loaded libs. if (void *Ptr = DLSym(Process, Symbol)) return Ptr; -#ifndef NDEBUG - for (void *Handle : Handles) - assert(!DLSym(Handle, Symbol) && "Symbol exists in non process handle"); -#endif - } else { - // Iterate in reverse, so newer libraries/symbols override older. - for (auto &&I = Handles.rbegin(), E = Handles.rend(); I != E; ++I) { - if (void *Ptr = DLSym(*I, Symbol)) + + // Search any libs that might have been skipped because of RTLD_LOCAL. + if (Order & SO_LoadedLast) { + if (void *Ptr = LibLookup(Symbol, Order)) return Ptr; } } @@ -113,6 +132,8 @@ static llvm::ManagedStatic<llvm::sys::SmartMutex<true>> SymbolsMutex; #endif char DynamicLibrary::Invalid; +DynamicLibrary::SearchOrdering DynamicLibrary::SearchOrder = + DynamicLibrary::SO_Linker; namespace llvm { void *SearchForAddressOfSpecialSymbol(const char *SymbolName) { @@ -170,7 +191,7 @@ void *DynamicLibrary::SearchForAddressOfSymbol(const char *SymbolName) { // Now search the libraries. if (OpenedHandles.isConstructed()) { - if (void *Ptr = OpenedHandles->Lookup(SymbolName)) + if (void *Ptr = OpenedHandles->Lookup(SymbolName, SearchOrder)) return Ptr; } } diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp index a7d3a18003eee..fe69151665c68 100644 --- a/lib/Support/ErrorHandling.cpp +++ b/lib/Support/ErrorHandling.cpp @@ -20,15 +20,14 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Mutex.h" -#include "llvm/Support/MutexGuard.h" #include "llvm/Support/Signals.h" #include "llvm/Support/Threading.h" #include "llvm/Support/WindowsError.h" #include "llvm/Support/raw_ostream.h" #include <cassert> #include <cstdlib> +#include <mutex> +#include <new> #if defined(HAVE_UNISTD_H) # include <unistd.h> @@ -43,18 +42,25 @@ using namespace llvm; static fatal_error_handler_t ErrorHandler = nullptr; static void *ErrorHandlerUserData = nullptr; -static ManagedStatic<sys::Mutex> ErrorHandlerMutex; +static fatal_error_handler_t BadAllocErrorHandler = nullptr; +static void *BadAllocErrorHandlerUserData = nullptr; + +// Mutexes to synchronize installing error handlers and calling error handlers. +// Do not use ManagedStatic, or that may allocate memory while attempting to +// report an OOM. +static std::mutex ErrorHandlerMutex; +static std::mutex BadAllocErrorHandlerMutex; void llvm::install_fatal_error_handler(fatal_error_handler_t handler, void *user_data) { - llvm::MutexGuard Lock(*ErrorHandlerMutex); + std::lock_guard<std::mutex> Lock(ErrorHandlerMutex); assert(!ErrorHandler && "Error handler already registered!\n"); ErrorHandler = handler; ErrorHandlerUserData = user_data; } void llvm::remove_fatal_error_handler() { - llvm::MutexGuard Lock(*ErrorHandlerMutex); + std::lock_guard<std::mutex> Lock(ErrorHandlerMutex); ErrorHandler = nullptr; ErrorHandlerUserData = nullptr; } @@ -77,7 +83,7 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) { { // Only acquire the mutex while reading the handler, so as not to invoke a // user-supplied callback under a lock. - llvm::MutexGuard Lock(*ErrorHandlerMutex); + std::lock_guard<std::mutex> Lock(ErrorHandlerMutex); handler = ErrorHandler; handlerData = ErrorHandlerUserData; } @@ -104,6 +110,48 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) { exit(1); } +void llvm::install_bad_alloc_error_handler(fatal_error_handler_t handler, + void *user_data) { + std::lock_guard<std::mutex> Lock(BadAllocErrorHandlerMutex); + assert(!ErrorHandler && "Bad alloc error handler already registered!\n"); + BadAllocErrorHandler = handler; + BadAllocErrorHandlerUserData = user_data; +} + +void llvm::remove_bad_alloc_error_handler() { + std::lock_guard<std::mutex> Lock(BadAllocErrorHandlerMutex); + BadAllocErrorHandler = nullptr; + BadAllocErrorHandlerUserData = nullptr; +} + +void llvm::report_bad_alloc_error(const char *Reason, bool GenCrashDiag) { + fatal_error_handler_t Handler = nullptr; + void *HandlerData = nullptr; + { + // Only acquire the mutex while reading the handler, so as not to invoke a + // user-supplied callback under a lock. + std::lock_guard<std::mutex> Lock(BadAllocErrorHandlerMutex); + Handler = BadAllocErrorHandler; + HandlerData = BadAllocErrorHandlerUserData; + } + + if (Handler) { + Handler(HandlerData, Reason, GenCrashDiag); + llvm_unreachable("bad alloc handler should not return"); + } + +#ifdef LLVM_ENABLE_EXCEPTIONS + // If exceptions are enabled, make OOM in malloc look like OOM in new. + throw std::bad_alloc(); +#else + // Don't call the normal error handler. It may allocate memory. Directly write + // an OOM to stderr and abort. + char OOMMessage[] = "LLVM ERROR: out of memory\n"; + (void)::write(2, OOMMessage, strlen(OOMMessage)); + abort(); +#endif +} + void llvm::llvm_unreachable_internal(const char *msg, const char *file, unsigned line) { // This code intentionally doesn't call the ErrorHandler callback, because diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 232efe648b030..9f22f89b3c9ef 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -281,11 +281,17 @@ enum ProcessorVendors { }; enum ProcessorTypes { - INTEL_ATOM = 1, + INTEL_BONNELL = 1, INTEL_CORE2, INTEL_COREI7, AMDFAM10H, AMDFAM15H, + INTEL_SILVERMONT, + INTEL_KNL, + AMD_BTVER1, + AMD_BTVER2, + AMDFAM17H, + // Entries below this are not in libgcc/compiler-rt. INTEL_i386, INTEL_i486, INTEL_PENTIUM, @@ -295,16 +301,13 @@ enum ProcessorTypes { INTEL_PENTIUM_IV, INTEL_PENTIUM_M, INTEL_CORE_DUO, - INTEL_XEONPHI, INTEL_X86_64, INTEL_NOCONA, INTEL_PRESCOTT, AMD_i486, AMDPENTIUM, AMDATHLON, - AMDFAM14H, - AMDFAM16H, - AMDFAM17H, + INTEL_GOLDMONT, CPU_TYPE_MAX }; @@ -317,34 +320,26 @@ enum ProcessorSubtypes { AMDFAM10H_ISTANBUL, AMDFAM15H_BDVER1, AMDFAM15H_BDVER2, - INTEL_PENTIUM_MMX, - INTEL_CORE2_65, - INTEL_CORE2_45, + AMDFAM15H_BDVER3, + AMDFAM15H_BDVER4, + AMDFAM17H_ZNVER1, INTEL_COREI7_IVYBRIDGE, INTEL_COREI7_HASWELL, INTEL_COREI7_BROADWELL, INTEL_COREI7_SKYLAKE, INTEL_COREI7_SKYLAKE_AVX512, - INTEL_ATOM_BONNELL, - INTEL_ATOM_SILVERMONT, - INTEL_ATOM_GOLDMONT, - INTEL_KNIGHTS_LANDING, + // Entries below this are not in libgcc/compiler-rt. + INTEL_PENTIUM_MMX, + INTEL_CORE2_65, + INTEL_CORE2_45, AMDPENTIUM_K6, AMDPENTIUM_K62, AMDPENTIUM_K63, AMDPENTIUM_GEODE, - AMDATHLON_TBIRD, - AMDATHLON_MP, + AMDATHLON_CLASSIC, AMDATHLON_XP, + AMDATHLON_K8, AMDATHLON_K8SSE3, - AMDATHLON_OPTERON, - AMDATHLON_FX, - AMDATHLON_64, - AMD_BTVER1, - AMD_BTVER2, - AMDFAM15H_BDVER3, - AMDFAM15H_BDVER4, - AMDFAM17H_ZNVER1, CPU_SUBTYPE_MAX }; @@ -360,9 +355,28 @@ enum ProcessorFeatures { FEATURE_SSE4_2, FEATURE_AVX, FEATURE_AVX2, - FEATURE_AVX512, - FEATURE_AVX512SAVE, - FEATURE_MOVBE, + FEATURE_SSE4_A, + FEATURE_FMA4, + FEATURE_XOP, + FEATURE_FMA, + FEATURE_AVX512F, + FEATURE_BMI, + FEATURE_BMI2, + FEATURE_AES, + FEATURE_PCLMUL, + FEATURE_AVX512VL, + FEATURE_AVX512BW, + FEATURE_AVX512DQ, + FEATURE_AVX512CD, + FEATURE_AVX512ER, + FEATURE_AVX512PF, + FEATURE_AVX512VBMI, + FEATURE_AVX512IFMA, + FEATURE_AVX5124VNNIW, + FEATURE_AVX5124FMAPS, + FEATURE_AVX512VPOPCNTDQ, + // Only one bit free left in the first 32 features. + FEATURE_MOVBE = 32, FEATURE_ADX, FEATURE_EM64T }; @@ -406,7 +420,6 @@ static bool isCpuIdSupported() { /// the specified arguments. If we can't run cpuid on the host, return true. static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { -#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) #if defined(__GNUC__) || defined(__clang__) #if defined(__x86_64__) // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. @@ -416,14 +429,16 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, "xchgq\t%%rbx, %%rsi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value)); + return false; #elif defined(__i386__) __asm__("movl\t%%ebx, %%esi\n\t" "cpuid\n\t" "xchgl\t%%ebx, %%esi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value)); + return false; #else - assert(0 && "This method is defined only for x86."); + return true; #endif #elif defined(_MSC_VER) // The MSVC intrinsic is portable across x86 and x64. @@ -433,7 +448,6 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, *rEBX = registers[1]; *rECX = registers[2]; *rEDX = registers[3]; -#endif return false; #else return true; @@ -446,16 +460,16 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX, unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { -#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) #if defined(__x86_64__) || defined(_M_X64) #if defined(__GNUC__) || defined(__clang__) - // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. + // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. // FIXME: should we save this for Clang? __asm__("movq\t%%rbx, %%rsi\n\t" "cpuid\n\t" "xchgq\t%%rbx, %%rsi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value), "c"(subleaf)); + return false; #elif defined(_MSC_VER) int registers[4]; __cpuidex(registers, value, subleaf); @@ -463,6 +477,9 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, *rEBX = registers[1]; *rECX = registers[2]; *rEDX = registers[3]; + return false; +#else + return true; #endif #elif defined(__i386__) || defined(_M_IX86) #if defined(__GNUC__) || defined(__clang__) @@ -471,6 +488,7 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, "xchgl\t%%ebx, %%esi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value), "c"(subleaf)); + return false; #elif defined(_MSC_VER) __asm { mov eax,value @@ -485,16 +503,16 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, mov esi,rEDX mov dword ptr [esi],edx } -#endif + return false; #else - assert(0 && "This method is defined only for x86."); + return true; #endif - return false; #else return true; #endif } +// Read control register 0 (XCR0). Used to detect features such as AVX. static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { #if defined(__GNUC__) || defined(__clang__) // Check xgetbv; this uses a .byte sequence instead of the instruction @@ -526,9 +544,10 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family, } static void -getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, - unsigned int Brand_id, unsigned int Features, - unsigned *Type, unsigned *Subtype) { +getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, + unsigned Brand_id, unsigned Features, + unsigned Features2, unsigned *Type, + unsigned *Subtype) { if (Brand_id != 0) return; switch (Family) { @@ -681,12 +700,7 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, // Skylake Xeon: case 0x55: *Type = INTEL_COREI7; - // Check that we really have AVX512 - if (Features & (1 << FEATURE_AVX512)) { - *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" - } else { - *Subtype = INTEL_COREI7_SKYLAKE; // "skylake" - } + *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512" break; case 0x1c: // Most 45 nm Intel Atom processors @@ -694,8 +708,7 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, case 0x27: // 32 nm Atom Medfield case 0x35: // 32 nm Atom Midview case 0x36: // 32 nm Atom Midview - *Type = INTEL_ATOM; - *Subtype = INTEL_ATOM_BONNELL; + *Type = INTEL_BONNELL; break; // "bonnell" // Atom Silvermont codes from the Intel software optimization guide. @@ -705,27 +718,23 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, case 0x5a: case 0x5d: case 0x4c: // really airmont - *Type = INTEL_ATOM; - *Subtype = INTEL_ATOM_SILVERMONT; + *Type = INTEL_SILVERMONT; break; // "silvermont" // Goldmont: case 0x5c: case 0x5f: - *Type = INTEL_ATOM; - *Subtype = INTEL_ATOM_GOLDMONT; + *Type = INTEL_GOLDMONT; break; // "goldmont" case 0x57: - *Type = INTEL_XEONPHI; // knl - *Subtype = INTEL_KNIGHTS_LANDING; + *Type = INTEL_KNL; // knl break; default: // Unknown family 6 CPU, try to guess. - if (Features & (1 << FEATURE_AVX512)) { - *Type = INTEL_XEONPHI; // knl - *Subtype = INTEL_KNIGHTS_LANDING; + if (Features & (1 << FEATURE_AVX512F)) { + *Type = INTEL_KNL; // knl break; } - if (Features & (1 << FEATURE_ADX)) { + if (Features2 & (1 << (FEATURE_ADX - 32))) { *Type = INTEL_COREI7; *Subtype = INTEL_COREI7_BROADWELL; break; @@ -741,9 +750,8 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, break; } if (Features & (1 << FEATURE_SSE4_2)) { - if (Features & (1 << FEATURE_MOVBE)) { - *Type = INTEL_ATOM; - *Subtype = INTEL_ATOM_SILVERMONT; + if (Features2 & (1 << (FEATURE_MOVBE - 32))) { + *Type = INTEL_SILVERMONT; } else { *Type = INTEL_COREI7; *Subtype = INTEL_COREI7_NEHALEM; @@ -756,16 +764,15 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, break; } if (Features & (1 << FEATURE_SSSE3)) { - if (Features & (1 << FEATURE_MOVBE)) { - *Type = INTEL_ATOM; - *Subtype = INTEL_ATOM_BONNELL; // "bonnell" + if (Features2 & (1 << (FEATURE_MOVBE - 32))) { + *Type = INTEL_BONNELL; // "bonnell" } else { *Type = INTEL_CORE2; // "core2" *Subtype = INTEL_CORE2_65; } break; } - if (Features & (1 << FEATURE_EM64T)) { + if (Features2 & (1 << (FEATURE_EM64T - 32))) { *Type = INTEL_X86_64; break; // x86-64 } @@ -796,8 +803,8 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron // processor, and Mobile Intel Celeron processor. All processors // are model 02h and manufactured using the 0.13 micron process. - *Type = - ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV); + *Type = ((Features2 & (1 << (FEATURE_EM64T - 32))) ? INTEL_X86_64 + : INTEL_PENTIUM_IV); break; case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D @@ -811,13 +818,13 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, // Extreme Edition, Intel Xeon processor, Intel Xeon processor // MP, Intel Celeron D processor. All processors are model 06h // and manufactured using the 65 nm process. - *Type = - ((Features & (1 << FEATURE_EM64T)) ? INTEL_NOCONA : INTEL_PRESCOTT); + *Type = ((Features2 & (1 << (FEATURE_EM64T - 32))) ? INTEL_NOCONA + : INTEL_PRESCOTT); break; default: - *Type = - ((Features & (1 << FEATURE_EM64T)) ? INTEL_X86_64 : INTEL_PENTIUM_IV); + *Type = ((Features2 & (1 << (FEATURE_EM64T - 32))) ? INTEL_X86_64 + : INTEL_PENTIUM_IV); break; } break; @@ -827,10 +834,8 @@ getIntelProcessorTypeAndSubtype(unsigned int Family, unsigned int Model, } } -static void getAMDProcessorTypeAndSubtype(unsigned int Family, - unsigned int Model, - unsigned int Features, - unsigned *Type, +static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, + unsigned Features, unsigned *Type, unsigned *Subtype) { // FIXME: this poorly matches the generated SubtargetFeatureKV table. There // appears to be no way to generate the wide variety of AMD-specific targets @@ -860,38 +865,20 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family, break; case 6: *Type = AMDATHLON; - switch (Model) { - case 4: - *Subtype = AMDATHLON_TBIRD; - break; // "athlon-tbird" - case 6: - case 7: - case 8: - *Subtype = AMDATHLON_MP; - break; // "athlon-mp" - case 10: + if (Features & (1 << FEATURE_SSE)) { *Subtype = AMDATHLON_XP; break; // "athlon-xp" } - break; + *Subtype = AMDATHLON_CLASSIC; + break; // "athlon" case 15: *Type = AMDATHLON; if (Features & (1 << FEATURE_SSE3)) { *Subtype = AMDATHLON_K8SSE3; break; // "k8-sse3" } - switch (Model) { - case 1: - *Subtype = AMDATHLON_OPTERON; - break; // "opteron" - case 5: - *Subtype = AMDATHLON_FX; - break; // "athlon-fx"; also opteron - default: - *Subtype = AMDATHLON_64; - break; // "athlon64" - } - break; + *Subtype = AMDATHLON_K8; + break; // "k8" case 16: *Type = AMDFAM10H; // "amdfam10" switch (Model) { @@ -907,19 +894,13 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family, } break; case 20: - *Type = AMDFAM14H; - *Subtype = AMD_BTVER1; + *Type = AMD_BTVER1; break; // "btver1"; case 21: *Type = AMDFAM15H; - if (!(Features & - (1 << FEATURE_AVX))) { // If no AVX support, provide a sane fallback. - *Subtype = AMD_BTVER1; - break; // "btver1" - } - if (Model >= 0x50 && Model <= 0x6f) { + if (Model >= 0x60 && Model <= 0x7f) { *Subtype = AMDFAM15H_BDVER4; - break; // "bdver4"; 50h-6Fh: Excavator + break; // "bdver4"; 60h-7Fh: Excavator } if (Model >= 0x30 && Model <= 0x3f) { *Subtype = AMDFAM15H_BDVER3; @@ -935,39 +916,52 @@ static void getAMDProcessorTypeAndSubtype(unsigned int Family, } break; case 22: - *Type = AMDFAM16H; - if (!(Features & - (1 << FEATURE_AVX))) { // If no AVX support provide a sane fallback. - *Subtype = AMD_BTVER1; - break; // "btver1"; - } - *Subtype = AMD_BTVER2; + *Type = AMD_BTVER2; break; // "btver2" case 23: *Type = AMDFAM17H; - if (Features & (1 << FEATURE_ADX)) { - *Subtype = AMDFAM17H_ZNVER1; - break; // "znver1" - } - *Subtype = AMD_BTVER1; + *Subtype = AMDFAM17H_ZNVER1; break; default: break; // "generic" } } -static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX, - unsigned MaxLeaf) { +static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, + unsigned *FeaturesOut, + unsigned *Features2Out) { unsigned Features = 0; - unsigned int EAX, EBX; - Features |= (((EDX >> 23) & 1) << FEATURE_MMX); - Features |= (((EDX >> 25) & 1) << FEATURE_SSE); - Features |= (((EDX >> 26) & 1) << FEATURE_SSE2); - Features |= (((ECX >> 0) & 1) << FEATURE_SSE3); - Features |= (((ECX >> 9) & 1) << FEATURE_SSSE3); - Features |= (((ECX >> 19) & 1) << FEATURE_SSE4_1); - Features |= (((ECX >> 20) & 1) << FEATURE_SSE4_2); - Features |= (((ECX >> 22) & 1) << FEATURE_MOVBE); + unsigned Features2 = 0; + unsigned EAX, EBX; + + if ((EDX >> 15) & 1) + Features |= 1 << FEATURE_CMOV; + if ((EDX >> 23) & 1) + Features |= 1 << FEATURE_MMX; + if ((EDX >> 25) & 1) + Features |= 1 << FEATURE_SSE; + if ((EDX >> 26) & 1) + Features |= 1 << FEATURE_SSE2; + + if ((ECX >> 0) & 1) + Features |= 1 << FEATURE_SSE3; + if ((ECX >> 1) & 1) + Features |= 1 << FEATURE_PCLMUL; + if ((ECX >> 9) & 1) + Features |= 1 << FEATURE_SSSE3; + if ((ECX >> 12) & 1) + Features |= 1 << FEATURE_FMA; + if ((ECX >> 19) & 1) + Features |= 1 << FEATURE_SSE4_1; + if ((ECX >> 20) & 1) + Features |= 1 << FEATURE_SSE4_2; + if ((ECX >> 23) & 1) + Features |= 1 << FEATURE_POPCNT; + if ((ECX >> 25) & 1) + Features |= 1 << FEATURE_AES; + + if ((ECX >> 22) & 1) + Features2 |= 1 << (FEATURE_MOVBE - 32); // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV // indicates that the AVX registers will be saved and restored on context @@ -976,20 +970,65 @@ static unsigned getAvailableFeatures(unsigned int ECX, unsigned int EDX, bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6); bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); + + if (HasAVX) + Features |= 1 << FEATURE_AVX; + bool HasLeaf7 = MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); - bool HasADX = HasLeaf7 && ((EBX >> 19) & 1); - bool HasAVX2 = HasAVX && HasLeaf7 && (EBX & 0x20); - bool HasAVX512 = HasLeaf7 && HasAVX512Save && ((EBX >> 16) & 1); - Features |= (HasAVX << FEATURE_AVX); - Features |= (HasAVX2 << FEATURE_AVX2); - Features |= (HasAVX512 << FEATURE_AVX512); - Features |= (HasAVX512Save << FEATURE_AVX512SAVE); - Features |= (HasADX << FEATURE_ADX); - - getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); - Features |= (((EDX >> 29) & 0x1) << FEATURE_EM64T); - return Features; + + if (HasLeaf7 && ((EBX >> 3) & 1)) + Features |= 1 << FEATURE_BMI; + if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) + Features |= 1 << FEATURE_AVX2; + if (HasLeaf7 && ((EBX >> 9) & 1)) + Features |= 1 << FEATURE_BMI2; + if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512F; + if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512DQ; + if (HasLeaf7 && ((EBX >> 19) & 1)) + Features2 |= 1 << (FEATURE_ADX - 32); + if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512IFMA; + if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512PF; + if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512ER; + if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512CD; + if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512BW; + if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512VL; + + if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512VBMI; + if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX512VPOPCNTDQ; + + if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX5124VNNIW; + if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) + Features |= 1 << FEATURE_AVX5124FMAPS; + + unsigned MaxExtLevel; + getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); + + bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && + !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); + if (HasExtLeaf1 && ((ECX >> 6) & 1)) + Features |= 1 << FEATURE_SSE4_A; + if (HasExtLeaf1 && ((ECX >> 11) & 1)) + Features |= 1 << FEATURE_XOP; + if (HasExtLeaf1 && ((ECX >> 16) & 1)) + Features |= 1 << FEATURE_FMA4; + + if (HasExtLeaf1 && ((EDX >> 29) & 1)) + Features2 |= 1 << (FEATURE_EM64T - 32); + + *FeaturesOut = Features; + *Features2Out = Features2; } StringRef sys::getHostCPUName() { @@ -1004,23 +1043,22 @@ StringRef sys::getHostCPUName() { if(!isCpuIdSupported()) return "generic"; #endif - if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX)) - return "generic"; - if (getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX)) + if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) return "generic"; + getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); unsigned Brand_id = EBX & 0xff; unsigned Family = 0, Model = 0; - unsigned Features = 0; + unsigned Features = 0, Features2 = 0; detectX86FamilyModel(EAX, &Family, &Model); - Features = getAvailableFeatures(ECX, EDX, MaxLeaf); + getAvailableFeatures(ECX, EDX, MaxLeaf, &Features, &Features2); unsigned Type; unsigned Subtype; if (Vendor == SIG_INTEL) { - getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, &Type, - &Subtype); + getIntelProcessorTypeAndSubtype(Family, Model, Brand_id, Features, + Features2, &Type, &Subtype); switch (Type) { case INTEL_i386: return "i386"; @@ -1049,7 +1087,7 @@ StringRef sys::getHostCPUName() { case INTEL_CORE2_45: return "penryn"; default: - return "core2"; + llvm_unreachable("Unexpected subtype!"); } case INTEL_COREI7: switch (Subtype) { @@ -1070,21 +1108,16 @@ StringRef sys::getHostCPUName() { case INTEL_COREI7_SKYLAKE_AVX512: return "skylake-avx512"; default: - return "corei7"; + llvm_unreachable("Unexpected subtype!"); } - case INTEL_ATOM: - switch (Subtype) { - case INTEL_ATOM_BONNELL: - return "bonnell"; - case INTEL_ATOM_GOLDMONT: - return "goldmont"; - case INTEL_ATOM_SILVERMONT: - return "silvermont"; - default: - return "atom"; - } - case INTEL_XEONPHI: - return "knl"; /*update for more variants added*/ + case INTEL_BONNELL: + return "bonnell"; + case INTEL_SILVERMONT: + return "silvermont"; + case INTEL_GOLDMONT: + return "goldmont"; + case INTEL_KNL: + return "knl"; case INTEL_X86_64: return "x86-64"; case INTEL_NOCONA: @@ -1092,7 +1125,7 @@ StringRef sys::getHostCPUName() { case INTEL_PRESCOTT: return "prescott"; default: - return "generic"; + break; } } else if (Vendor == SIG_AMD) { getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype); @@ -1114,31 +1147,24 @@ StringRef sys::getHostCPUName() { } case AMDATHLON: switch (Subtype) { - case AMDATHLON_TBIRD: - return "athlon-tbird"; - case AMDATHLON_MP: - return "athlon-mp"; + case AMDATHLON_CLASSIC: + return "athlon"; case AMDATHLON_XP: return "athlon-xp"; + case AMDATHLON_K8: + return "k8"; case AMDATHLON_K8SSE3: return "k8-sse3"; - case AMDATHLON_OPTERON: - return "opteron"; - case AMDATHLON_FX: - return "athlon-fx"; - case AMDATHLON_64: - return "athlon64"; default: - return "athlon"; + llvm_unreachable("Unexpected subtype!"); } case AMDFAM10H: - if(Subtype == AMDFAM10H_BARCELONA) - return "barcelona"; return "amdfam10"; - case AMDFAM14H: + case AMD_BTVER1: return "btver1"; case AMDFAM15H: switch (Subtype) { + default: // There are gaps in the subtype detection. case AMDFAM15H_BDVER1: return "bdver1"; case AMDFAM15H_BDVER2: @@ -1147,31 +1173,13 @@ StringRef sys::getHostCPUName() { return "bdver3"; case AMDFAM15H_BDVER4: return "bdver4"; - case AMD_BTVER1: - return "btver1"; - default: - return "amdfam15"; - } - case AMDFAM16H: - switch (Subtype) { - case AMD_BTVER1: - return "btver1"; - case AMD_BTVER2: - return "btver2"; - default: - return "amdfam16"; } + case AMD_BTVER2: + return "btver2"; case AMDFAM17H: - switch (Subtype) { - case AMD_BTVER1: - return "btver1"; - case AMDFAM17H_ZNVER1: - return "znver1"; - default: - return "amdfam17"; - } + return "znver1"; default: - return "generic"; + break; } } return "generic"; @@ -1494,7 +1502,8 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; } #endif std::string sys::getProcessTriple() { - Triple PT(Triple::normalize(LLVM_HOST_TRIPLE)); + std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); + Triple PT(Triple::normalize(TargetTripleString)); if (sizeof(void *) == 8 && PT.isArch32Bit()) PT = PT.get64BitArchVariant(); diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp index bdd02105f6f0e..b1d5e7c0d9912 100644 --- a/lib/Support/Mutex.cpp +++ b/lib/Support/Mutex.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Mutex.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Config/config.h" //===----------------------------------------------------------------------===// @@ -47,6 +48,10 @@ MutexImpl::MutexImpl( bool recursive) // Declare the pthread_mutex data structures pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(malloc(sizeof(pthread_mutex_t))); + + if (mutex == nullptr) + report_bad_alloc_error("Mutex allocation failed"); + pthread_mutexattr_t attr; // Initialize the mutex attributes diff --git a/lib/Support/Unix/DynamicLibrary.inc b/lib/Support/Unix/DynamicLibrary.inc index aad77f19c35a5..f05103ccd1ebe 100644 --- a/lib/Support/Unix/DynamicLibrary.inc +++ b/lib/Support/Unix/DynamicLibrary.inc @@ -20,6 +20,9 @@ DynamicLibrary::HandleSet::~HandleSet() { ::dlclose(Handle); if (Process) ::dlclose(Process); + + // llvm_shutdown called, Return to default + DynamicLibrary::SearchOrder = DynamicLibrary::SO_Linker; } void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) { diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc index 0ba6a25aa198d..5580e63893c6d 100644 --- a/lib/Support/Unix/Host.inc +++ b/lib/Support/Unix/Host.inc @@ -34,18 +34,31 @@ static std::string getOSVersion() { return info.release; } -std::string sys::getDefaultTargetTriple() { - std::string TargetTripleString(LLVM_DEFAULT_TARGET_TRIPLE); - - // On darwin, we want to update the version to match that of the - // target. +static std::string updateTripleOSVersion(std::string TargetTripleString) { + // On darwin, we want to update the version to match that of the target. std::string::size_type DarwinDashIdx = TargetTripleString.find("-darwin"); if (DarwinDashIdx != std::string::npos) { TargetTripleString.resize(DarwinDashIdx + strlen("-darwin")); TargetTripleString += getOSVersion(); + return TargetTripleString; + } + std::string::size_type MacOSDashIdx = TargetTripleString.find("-macos"); + if (MacOSDashIdx != std::string::npos) { + TargetTripleString.resize(MacOSDashIdx); + // Reset the OS to darwin as the OS version from `uname` doesn't use the + // macOS version scheme. + TargetTripleString += "-darwin"; + TargetTripleString += getOSVersion(); } + return TargetTripleString; +} + +std::string sys::getDefaultTargetTriple() { + std::string TargetTripleString = + updateTripleOSVersion(LLVM_DEFAULT_TARGET_TRIPLE); - // Override the default target with an environment variable named by LLVM_TARGET_TRIPLE_ENV. + // Override the default target with an environment variable named by + // LLVM_TARGET_TRIPLE_ENV. #if defined(LLVM_TARGET_TRIPLE_ENV) if (const char *EnvTriple = std::getenv(LLVM_TARGET_TRIPLE_ENV)) TargetTripleString = EnvTriple; diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc index 1704fa4799428..c866d5b5a84ef 100644 --- a/lib/Support/Unix/Program.inc +++ b/lib/Support/Unix/Program.inc @@ -40,9 +40,6 @@ #include <unistd.h> #endif #ifdef HAVE_POSIX_SPAWN -#ifdef __sun__ -#define _RESTRICT_KYWD -#endif #include <spawn.h> #if defined(__APPLE__) diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc index caf1a0a658de0..083ea902eeb29 100644 --- a/lib/Support/Windows/DynamicLibrary.inc +++ b/lib/Support/Windows/DynamicLibrary.inc @@ -28,6 +28,8 @@ DynamicLibrary::HandleSet::~HandleSet() { // 'Process' should not be released on Windows. assert((!Process || Process==this) && "Bad Handle"); + // llvm_shutdown called, Return to default + DynamicLibrary::SearchOrder = DynamicLibrary::SO_Linker; } void *DynamicLibrary::HandleSet::DLOpen(const char *File, std::string *Err) { diff --git a/lib/Support/Windows/Host.inc b/lib/Support/Windows/Host.inc index 7e196cf0ce18a..90a6fb316703f 100644 --- a/lib/Support/Windows/Host.inc +++ b/lib/Support/Windows/Host.inc @@ -17,6 +17,10 @@ using namespace llvm; +static std::string updateTripleOSVersion(std::string Triple) { + return Triple; +} + std::string sys::getDefaultTargetTriple() { const char *Triple = LLVM_DEFAULT_TARGET_TRIPLE; diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp index 4a7e0b2b803ee..db1fbe069f4d2 100644 --- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp +++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp @@ -509,7 +509,7 @@ int AArch64A57FPLoadBalancing::scavengeRegister(Chain *G, Color C, assert(ChainBegin != ChainEnd && "Chain should contain instructions"); do { --I; - Units.accumulateBackward(*I); + Units.accumulate(*I); } while (I != ChainBegin); // Make sure we allocate in-order, to get the cheapest registers first. diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp index 6f8dd3e3ac0ca..b3b738584b409 100644 --- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp +++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp @@ -113,7 +113,7 @@ struct LDTLSCleanup : public MachineFunctionPass { return Copy; } - // Create a virtal register in *TLSBaseAddrReg, and populate it by + // Create a virtual register in *TLSBaseAddrReg, and populate it by // inserting a copy instruction after I. Returns the new instruction. MachineInstr *setRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) { MachineFunction *MF = I.getParent()->getParent(); diff --git a/lib/Target/AArch64/AArch64CondBrTuning.cpp b/lib/Target/AArch64/AArch64CondBrTuning.cpp index 0a948812ff337..51700f9059799 100644 --- a/lib/Target/AArch64/AArch64CondBrTuning.cpp +++ b/lib/Target/AArch64/AArch64CondBrTuning.cpp @@ -167,6 +167,7 @@ bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI, case AArch64::SUBWrs: case AArch64::SUBWrx: IsFlagSetting = false; + LLVM_FALLTHROUGH; case AArch64::ADDSWri: case AArch64::ADDSWrr: case AArch64::ADDSWrs: @@ -226,6 +227,7 @@ bool AArch64CondBrTuning::tryToTuneBranch(MachineInstr &MI, case AArch64::SUBXrs: case AArch64::SUBXrx: IsFlagSetting = false; + LLVM_FALLTHROUGH; case AArch64::ADDSXri: case AArch64::ADDSXrr: case AArch64::ADDSXrs: diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index 7bf2097c17cee..3682b62d2b84d 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -2114,7 +2114,7 @@ bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected value type."); - case MVT::i1: VTIsi1 = true; + case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH; case MVT::i8: Opc = OpcTable[Idx][0]; break; case MVT::i16: Opc = OpcTable[Idx][1]; break; case MVT::i32: Opc = OpcTable[Idx][2]; break; diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index aaf32a499bc3c..60fde5caa3393 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8364,9 +8364,9 @@ static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, /// EXTR instruction extracts a contiguous chunk of bits from two existing /// registers viewed as a high/low pair. This function looks for the pattern: -/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an -/// EXTR. Can't quite be done in TableGen because the two immediates aren't -/// independent. +/// <tt>(or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N))</tt> and replaces it +/// with an EXTR. Can't quite be done in TableGen because the two immediates +/// aren't independent. static SDValue tryCombineToEXTR(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; @@ -9531,7 +9531,7 @@ static SDValue performPostLD1Combine(SDNode *N, return SDValue(); } -/// Simplify \Addr given that the top byte of it is ignored by HW during +/// Simplify ``Addr`` given that the top byte of it is ignored by HW during /// address translation. static bool performTBISimplification(SDValue Addr, TargetLowering::DAGCombinerInfo &DCI, diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 314e89bbca863..dba3e4bdf82f1 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1282,6 +1282,7 @@ static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { case AArch64CC::HI: // Z clear and C set case AArch64CC::LS: // Z set or C clear UsedFlags.Z = true; + LLVM_FALLTHROUGH; case AArch64CC::HS: // C set case AArch64CC::LO: // C clear UsedFlags.C = true; @@ -1300,6 +1301,7 @@ static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { case AArch64CC::GT: // Z clear, N and V the same case AArch64CC::LE: // Z set, N and V differ UsedFlags.Z = true; + LLVM_FALLTHROUGH; case AArch64CC::GE: // N and V the same case AArch64CC::LT: // N and V differ UsedFlags.N = true; @@ -3669,12 +3671,17 @@ enum class FMAInstKind { Default, Indexed, Accumulator }; /// F|MUL I=A,B,0 /// F|ADD R,I,C /// ==> F|MADD R,A,B,C +/// \param MF Containing MachineFunction +/// \param MRI Register information +/// \param TII Target information /// \param Root is the F|ADD instruction /// \param [out] InsInstrs is a vector of machine instructions and will /// contain the generated madd instruction /// \param IdxMulOpd is index of operand in Root that is the result of /// the F|MUL. In the example above IdxMulOpd is 1. /// \param MaddOpc the opcode fo the f|madd instruction +/// \param RC Register class of operands +/// \param kind of fma instruction (addressing mode) to be generated static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, @@ -3733,6 +3740,9 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, /// ADD R,I,Imm /// ==> ORR V, ZR, Imm /// ==> MADD R,A,B,V +/// \param MF Containing MachineFunction +/// \param MRI Register information +/// \param TII Target information /// \param Root is the ADD instruction /// \param [out] InsInstrs is a vector of machine instructions and will /// contain the generated madd instruction @@ -3741,6 +3751,7 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, /// \param MaddOpc the opcode fo the madd instruction /// \param VR is a virtual register that holds the value of an ADD operand /// (V in the example above). +/// \param RC Register class of operands static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, @@ -4216,26 +4227,36 @@ void AArch64InstrInfo::genAlternativeCodeSequence( /// \brief Replace csincr-branch sequence by simple conditional branch /// /// Examples: -/// 1. +/// 1. \code /// csinc w9, wzr, wzr, <condition code> /// tbnz w9, #0, 0x44 +/// \endcode /// to +/// \code /// b.<inverted condition code> +/// \endcode /// -/// 2. +/// 2. \code /// csinc w9, wzr, wzr, <condition code> /// tbz w9, #0, 0x44 +/// \endcode /// to +/// \code /// b.<condition code> +/// \endcode /// /// Replace compare and branch sequence by TBZ/TBNZ instruction when the /// compare's constant operand is power of 2. /// /// Examples: +/// \code /// and w8, w8, #0x400 /// cbnz w8, L1 +/// \endcode /// to +/// \code /// tbnz w8, #10, L1 +/// \endcode /// /// \param MI Conditional Branch /// \return True when the simple conditional branch is generated @@ -4409,6 +4430,13 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { return makeArrayRef(TargetFlags); } +ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> +AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const { + static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] = + {{MOSuppressPair, "aarch64-suppress-pair"}}; + return makeArrayRef(TargetFlags); +} + unsigned AArch64InstrInfo::getOutliningBenefit(size_t SequenceSize, size_t Occurrences, bool CanBeTailCall) const { diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index 58e9ce583d44c..0809ede4df2a5 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -263,8 +263,8 @@ public: /// \param Pattern - combiner pattern bool isThroughputPattern(MachineCombinerPattern Pattern) const override; /// Return true when there is potentially a faster code sequence - /// for an instruction chain ending in <Root>. All potential patterns are - /// listed in the <Patterns> array. + /// for an instruction chain ending in ``Root``. All potential patterns are + /// listed in the ``Patterns`` array. bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns) const override; @@ -289,6 +289,8 @@ public: getSerializableDirectMachineOperandTargetFlags() const override; ArrayRef<std::pair<unsigned, const char *>> getSerializableBitmaskMachineOperandTargetFlags() const override; + ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> + getSerializableMachineMemOperandTargetFlags() const override; bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override; unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences, diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 6cb723d187af5..0be14673eb20b 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -313,9 +313,6 @@ def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; //===----------------------------------------------------------------------===// // AArch64 Instruction Predicate Definitions. -def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; -def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">; - // We could compute these on a per-module basis but doing so requires accessing // the Function object through the <Target>Subtarget and objections were raised // to that (see post-commit review comments for r301750). @@ -714,10 +711,10 @@ def : InstAlias<"negs $dst, $src$shift", defm UDIV : Div<0, "udiv", udiv>; defm SDIV : Div<1, "sdiv", sdiv>; -def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr $Rn, $Rm)>; -def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr $Rn, $Rm)>; -def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr $Rn, $Rm)>; -def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr $Rn, $Rm)>; +def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>; +def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>; +def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>; +def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>; // Variable shift defm ASRV : Shift<0b10, "asr", sra>; diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp index 07ce0e863c5e0..7e275e4d2f463 100644 --- a/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -33,6 +33,8 @@ #define DEBUG_TYPE "aarch64-isel" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" + using namespace llvm; #ifndef LLVM_BUILD_GLOBAL_ISEL @@ -212,6 +214,7 @@ static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, return GenericOpc; } } + break; case AArch64::FPRRegBankID: switch (OpSize) { case 32: @@ -243,7 +246,8 @@ static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, return GenericOpc; } } - }; + break; + } return GenericOpc; } @@ -267,6 +271,7 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, case 64: return isStore ? AArch64::STRXui : AArch64::LDRXui; } + break; case AArch64::FPRRegBankID: switch (OpSize) { case 8: @@ -278,7 +283,8 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, case 64: return isStore ? AArch64::STRDui : AArch64::LDRDui; } - }; + break; + } return GenericOpc; } @@ -1319,6 +1325,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) const { case TargetOpcode::G_VASTART: return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) : selectVaStartAAPCS(I, MF, MRI); + case TargetOpcode::G_IMPLICIT_DEF: + I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); + return true; } return false; diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 4b568f3fba2ba..4a0a7c36baf8b 100644 --- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -291,11 +291,10 @@ bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, unsigned DstPtr; if (Align > PtrSize) { // Realign the list to the actual required alignment. - unsigned AlignMinus1 = MRI.createGenericVirtualRegister(IntPtrTy); - MIRBuilder.buildConstant(AlignMinus1, Align - 1); + auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1); unsigned ListTmp = MRI.createGenericVirtualRegister(PtrTy); - MIRBuilder.buildGEP(ListTmp, List, AlignMinus1); + MIRBuilder.buildGEP(ListTmp, List, AlignMinus1->getOperand(0).getReg()); DstPtr = MRI.createGenericVirtualRegister(PtrTy); MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align)); diff --git a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp index f3c8e7e9bdc2b..4e65c0ab6011b 100644 --- a/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp +++ b/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp @@ -163,6 +163,7 @@ AArch64RedundantCopyElimination::knownRegValInBlock( case AArch64::ADDSWri: case AArch64::ADDSXri: IsCMN = true; + LLVM_FALLTHROUGH; // CMP is an alias for SUBS with a dead destination register. case AArch64::SUBSWri: case AArch64::SUBSXri: { diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index a9a9d5ce84297..a3238cf3b60f0 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -81,6 +81,7 @@ void AArch64Subtarget::initializeProperties() { break; case CortexA57: MaxInterleaveFactor = 4; + PrefFunctionAlignment = 4; break; case ExynosM1: MaxInterleaveFactor = 4; @@ -130,7 +131,9 @@ void AArch64Subtarget::initializeProperties() { break; case CortexA35: break; case CortexA53: break; - case CortexA72: break; + case CortexA72: + PrefFunctionAlignment = 4; + break; case CortexA73: break; case Others: break; } diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index 7933e58c49eed..db53946cbc77f 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -218,6 +218,13 @@ public: bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; } bool hasFuseAES() const { return HasFuseAES; } bool hasFuseLiterals() const { return HasFuseLiterals; } + + /// \brief Return true if the CPU supports any kind of instruction fusion. + bool hasFusion() const { + return hasArithmeticBccFusion() || hasArithmeticCbzFusion() || + hasFuseAES() || hasFuseLiterals(); + } + bool useRSqrt() const { return UseRSqrt; } unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } unsigned getVectorInsertExtractBaseCost() const { diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 1252f9403812e..6237b8f3e7b9b 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -277,17 +277,19 @@ public: ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override { + const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>(); ScheduleDAGMILive *DAG = createGenericSchedLive(C); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); - DAG->addMutation(createAArch64MacroFusionDAGMutation()); + if (ST.hasFusion()) + DAG->addMutation(createAArch64MacroFusionDAGMutation()); return DAG; } ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>(); - if (ST.hasFuseAES() || ST.hasFuseLiterals()) { + if (ST.hasFusion()) { // Run the Macro Fusion after RA again since literals are expanded from // pseudos then (v. addPreSched2()). ScheduleDAGMI *DAG = createGenericSchedPostRA(C); diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 0d860a7eef794..7870dce5c9c0f 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -756,7 +756,7 @@ static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst, // if shift == '11' then ReservedValue() if (shiftHi == 0x3) return Fail; - // Deliberate fallthrough + LLVM_FALLTHROUGH; case AArch64::ANDWrs: case AArch64::ANDSWrs: case AArch64::BICWrs: @@ -780,7 +780,7 @@ static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst, // if shift == '11' then ReservedValue() if (shiftHi == 0x3) return Fail; - // Deliberate fallthrough + LLVM_FALLTHROUGH; case AArch64::ANDXrs: case AArch64::ANDSXrs: case AArch64::BICXrs: diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index 475f91016840a..a7a7daf4b4a55 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -73,7 +73,7 @@ public: void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsResolved) const override; bool mayNeedRelaxation(const MCInst &Inst) const override; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, @@ -264,7 +264,7 @@ unsigned AArch64AsmBackend::getFixupKindContainereSizeInBytes(unsigned Kind) con void AArch64AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, uint64_t Value, - bool IsPCRel) const { + bool IsResolved) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); if (!Value) return; // Doesn't change encoding. diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index fc808ee0cdd64..c25bd8c8f6cc9 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -103,4 +103,6 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(const Triple &T) { AArch64MCAsmInfoCOFF::AArch64MCAsmInfoCOFF() { CommentString = ";"; + PrivateGlobalPrefix = ".L"; + PrivateLabelPrefix = ".L"; } diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp index 6f002860044c0..ed5370826647f 100644 --- a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -108,10 +108,11 @@ bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) { DFS(Start, Checklist); for (auto &BB : Checklist) { BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ? - BasicBlock::iterator(Load) : BB->end(); - if (MDR->getPointerDependencyFrom(MemoryLocation(Ptr), - true, StartIt, BB, Load).isClobber()) - return true; + BasicBlock::iterator(Load) : BB->end(); + auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true, + StartIt, BB, Load); + if (Q.isClobber() || Q.isUnknown()) + return true; } return false; } diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index b312dbc8d14d6..31ee9206ae27b 100644 --- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -380,7 +380,9 @@ bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) { FastMathFlags FMF = FPOp->getFastMathFlags(); bool UnsafeDiv = HasUnsafeFPMath || FMF.unsafeAlgebra() || FMF.allowReciprocal(); - if (ST->hasFP32Denormals() && !UnsafeDiv) + + // With UnsafeDiv node will be optimized to just rcp and mul. + if (ST->hasFP32Denormals() || UnsafeDiv) return false; IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath); diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 96f819fd0e684..2553cf4da0feb 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2651,8 +2651,11 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0)); return DAG.getZExtOrTrunc(Shl, SL, VT); } - case ISD::OR: if (!isOrEquivalentToAdd(DAG, LHS)) break; - case ISD::ADD: { // Fall through from above + case ISD::OR: + if (!isOrEquivalentToAdd(DAG, LHS)) + break; + LLVM_FALLTHROUGH; + case ISD::ADD: { // shl (or|add x, c2), c1 => or|add (shl x, c1), (c2 << c1) if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(LHS->getOperand(1))) { SDValue Shl = DAG.getNode(ISD::SHL, SL, VT, LHS->getOperand(0), diff --git a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp index 846e7dff5f8cc..7e0e9802c0e6d 100644 --- a/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp +++ b/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp @@ -10,6 +10,7 @@ #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -34,9 +35,14 @@ public: AMDGPULowerIntrinsics() : ModulePass(ID) {} bool runOnModule(Module &M) override; + bool expandMemIntrinsicUses(Function &F); StringRef getPassName() const override { return "AMDGPU Lower Intrinsics"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<TargetTransformInfoWrapperPass>(); + } }; } @@ -55,7 +61,7 @@ static bool shouldExpandOperationWithSize(Value *Size) { return !CI || (CI->getZExtValue() > MaxStaticSize); } -static bool expandMemIntrinsicUses(Function &F) { +bool AMDGPULowerIntrinsics::expandMemIntrinsicUses(Function &F) { Intrinsic::ID ID = F.getIntrinsicID(); bool Changed = false; @@ -67,7 +73,10 @@ static bool expandMemIntrinsicUses(Function &F) { case Intrinsic::memcpy: { auto *Memcpy = cast<MemCpyInst>(Inst); if (shouldExpandOperationWithSize(Memcpy->getLength())) { - expandMemCpyAsLoop(Memcpy); + Function *ParentFunc = Memcpy->getParent()->getParent(); + const TargetTransformInfo &TTI = + getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*ParentFunc); + expandMemCpyAsLoop(Memcpy, TTI); Changed = true; Memcpy->eraseFromParent(); } diff --git a/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp b/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp new file mode 100644 index 0000000000000..7263ba73d1550 --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp @@ -0,0 +1,64 @@ +//===--- AMDGPUMacroFusion.cpp - AMDGPU Macro Fusion ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the AMDGPU implementation of the DAG scheduling +/// mutation to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUMacroFusion.h" +#include "AMDGPUSubtarget.h" +#include "SIInstrInfo.h" + +#include "llvm/CodeGen/MacroFusion.h" + +using namespace llvm; + +namespace { + +/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all. +static bool shouldScheduleAdjacent(const TargetInstrInfo &TII_, + const TargetSubtargetInfo &TSI, + const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + const SIInstrInfo &TII = static_cast<const SIInstrInfo&>(TII_); + + switch (SecondMI.getOpcode()) { + case AMDGPU::V_ADDC_U32_e64: + case AMDGPU::V_SUBB_U32_e64: + case AMDGPU::V_CNDMASK_B32_e64: { + // Try to cluster defs of condition registers to their uses. This improves + // the chance VCC will be available which will allow shrinking to VOP2 + // encodings. + if (!FirstMI) + return true; + + const MachineOperand *Src2 = TII.getNamedOperand(SecondMI, + AMDGPU::OpName::src2); + return FirstMI->definesRegister(Src2->getReg()); + } + default: + return false; + } + + return false; +} + +} // end namespace + + +namespace llvm { + +std::unique_ptr<ScheduleDAGMutation> createAMDGPUMacroFusionDAGMutation () { + return createMacroFusionDAGMutation(shouldScheduleAdjacent); +} + +} // end namespace llvm diff --git a/lib/Target/AMDGPU/AMDGPUMacroFusion.h b/lib/Target/AMDGPU/AMDGPUMacroFusion.h new file mode 100644 index 0000000000000..844958580a65b --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUMacroFusion.h @@ -0,0 +1,19 @@ +//===- AMDGPUMacroFusion.h - AMDGPU Macro Fusion ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +/// Note that you have to add: +/// DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); +/// to AMDGPUPassConfig::createMachineScheduler() to have an effect. +std::unique_ptr<ScheduleDAGMutation> createAMDGPUMacroFusionDAGMutation(); + +} // llvm diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index be47b900c6f06..1bc5a52053ecb 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -13,6 +13,14 @@ //===----------------------------------------------------------------------===// #include "AMDGPUSubtarget.h" +#include "AMDGPU.h" +#include "AMDGPUTargetMachine.h" +#ifdef LLVM_BUILD_GLOBAL_ISEL +#include "AMDGPUCallLowering.h" +#include "AMDGPUInstructionSelector.h" +#include "AMDGPULegalizerInfo.h" +#include "AMDGPURegisterBankInfo.h" +#endif #include "SIMachineFunctionInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/MachineScheduler.h" @@ -72,6 +80,31 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, return *this; } +#ifdef LLVM_BUILD_GLOBAL_ISEL +namespace { + +struct SIGISelActualAccessor : public GISelAccessor { + std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; + std::unique_ptr<InstructionSelector> InstSelector; + std::unique_ptr<LegalizerInfo> Legalizer; + std::unique_ptr<RegisterBankInfo> RegBankInfo; + const AMDGPUCallLowering *getCallLowering() const override { + return CallLoweringInfo.get(); + } + const InstructionSelector *getInstructionSelector() const override { + return InstSelector.get(); + } + const LegalizerInfo *getLegalizerInfo() const override { + return Legalizer.get(); + } + const RegisterBankInfo *getRegBankInfo() const override { + return RegBankInfo.get(); + } +}; + +} // end anonymous namespace +#endif + AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const TargetMachine &TM) : AMDGPUGenSubtargetInfo(TT, GPU, FS), @@ -265,18 +298,21 @@ bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { case Intrinsic::amdgcn_workitem_id_x: case Intrinsic::r600_read_tidig_x: IdQuery = true; + LLVM_FALLTHROUGH; case Intrinsic::r600_read_local_size_x: Dim = 0; break; case Intrinsic::amdgcn_workitem_id_y: case Intrinsic::r600_read_tidig_y: IdQuery = true; + LLVM_FALLTHROUGH; case Intrinsic::r600_read_local_size_y: Dim = 1; break; case Intrinsic::amdgcn_workitem_id_z: case Intrinsic::r600_read_tidig_z: IdQuery = true; + LLVM_FALLTHROUGH; case Intrinsic::r600_read_local_size_z: Dim = 2; break; @@ -317,11 +353,23 @@ R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS, TLInfo(TM, *this) {} SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS, - const TargetMachine &TM) : - AMDGPUSubtarget(TT, GPU, FS, TM), - InstrInfo(*this), - FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), - TLInfo(TM, *this) {} + const TargetMachine &TM) + : AMDGPUSubtarget(TT, GPU, FS, TM), InstrInfo(*this), + FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0), + TLInfo(TM, *this) { +#ifndef LLVM_BUILD_GLOBAL_ISEL + GISelAccessor *GISel = new GISelAccessor(); +#else + SIGISelActualAccessor *GISel = new SIGISelActualAccessor(); + GISel->CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering())); + GISel->Legalizer.reset(new AMDGPULegalizerInfo()); + + GISel->RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo())); + GISel->InstSelector.reset(new AMDGPUInstructionSelector( + *this, *static_cast<AMDGPURegisterBankInfo *>(GISel->RegBankInfo.get()))); +#endif + setGISelAccessor(*GISel); +} void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const { diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 425fd35d47de6..dc868f010d85c 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -19,9 +19,7 @@ #include "AMDGPUCallLowering.h" #include "AMDGPUInstructionSelector.h" #include "AMDGPULegalizerInfo.h" -#ifdef LLVM_BUILD_GLOBAL_ISEL -#include "AMDGPURegisterBankInfo.h" -#endif +#include "AMDGPUMacroFusion.h" #include "AMDGPUTargetObjectFile.h" #include "AMDGPUTargetTransformInfo.h" #include "GCNIterativeScheduler.h" @@ -85,7 +83,7 @@ static cl::opt<bool> EnableLoadStoreVectorizer( static cl::opt<bool> ScalarizeGlobal( "amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), - cl::init(false), + cl::init(true), cl::Hidden); // Option to run internalize pass. @@ -176,6 +174,7 @@ createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { new GCNScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C)); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); + DAG->addMutation(createAMDGPUMacroFusionDAGMutation()); return DAG; } @@ -389,31 +388,6 @@ const R600Subtarget *R600TargetMachine::getSubtargetImpl( // GCN Target Machine (SI+) //===----------------------------------------------------------------------===// -#ifdef LLVM_BUILD_GLOBAL_ISEL -namespace { - -struct SIGISelActualAccessor : public GISelAccessor { - std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; - std::unique_ptr<InstructionSelector> InstSelector; - std::unique_ptr<LegalizerInfo> Legalizer; - std::unique_ptr<RegisterBankInfo> RegBankInfo; - const AMDGPUCallLowering *getCallLowering() const override { - return CallLoweringInfo.get(); - } - const InstructionSelector *getInstructionSelector() const override { - return InstSelector.get(); - } - const LegalizerInfo *getLegalizerInfo() const override { - return Legalizer.get(); - } - const RegisterBankInfo *getRegBankInfo() const override { - return RegBankInfo.get(); - } -}; - -} // end anonymous namespace -#endif - GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, @@ -435,21 +409,6 @@ const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const { // function that reside in TargetOptions. resetTargetOptions(F); I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this); - -#ifndef LLVM_BUILD_GLOBAL_ISEL - GISelAccessor *GISel = new GISelAccessor(); -#else - SIGISelActualAccessor *GISel = new SIGISelActualAccessor(); - GISel->CallLoweringInfo.reset( - new AMDGPUCallLowering(*I->getTargetLowering())); - GISel->Legalizer.reset(new AMDGPULegalizerInfo()); - - GISel->RegBankInfo.reset(new AMDGPURegisterBankInfo(*I->getRegisterInfo())); - GISel->InstSelector.reset(new AMDGPUInstructionSelector(*I, - *static_cast<AMDGPURegisterBankInfo*>(GISel->RegBankInfo.get()))); -#endif - - I->setGISelAccessor(*GISel); } I->setScalarizeGlobalBehavior(ScalarizeGlobal); diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 7b8756050b752..e3c90f250600a 100644 --- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1058,17 +1058,13 @@ public: OperandMatchResultTy parseOModOperand(OperandVector &Operands); - void cvtId(MCInst &Inst, const OperandVector &Operands); - void cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands); - - void cvtVOP3Impl(MCInst &Inst, - const OperandVector &Operands, - OptionalImmIndexMap &OptionalIdx); + void cvtVOP3(MCInst &Inst, const OperandVector &Operands, + OptionalImmIndexMap &OptionalIdx); void cvtVOP3(MCInst &Inst, const OperandVector &Operands); - void cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands); void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); - void cvtMIMG(MCInst &Inst, const OperandVector &Operands); + void cvtMIMG(MCInst &Inst, const OperandVector &Operands, + bool IsAtomic = false); void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); @@ -3870,13 +3866,19 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { // mimg //===----------------------------------------------------------------------===// -void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands) { +void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, + bool IsAtomic) { unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); } + if (IsAtomic) { + // Add src, same as dst + ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1); + } + OptionalImmIndexMap OptionalIdx; for (unsigned E = Operands.size(); I != E; ++I) { @@ -3904,39 +3906,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands) { } void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { - unsigned I = 1; - const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); - for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { - ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); - } - - // Add src, same as dst - ((AMDGPUOperand &)*Operands[I]).addRegOperands(Inst, 1); - - OptionalImmIndexMap OptionalIdx; - - for (unsigned E = Operands.size(); I != E; ++I) { - AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); - - // Add the register arguments - if (Op.isRegOrImm()) { - Op.addRegOrImmOperands(Inst, 1); - continue; - } else if (Op.isImmModifier()) { - OptionalIdx[Op.getImmTy()] = I; - } else { - llvm_unreachable("unexpected operand type"); - } - } - - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); + cvtMIMG(Inst, Operands, true); } AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const { @@ -4118,25 +4088,6 @@ OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) return MatchOperand_NoMatch; } -void AMDGPUAsmParser::cvtId(MCInst &Inst, const OperandVector &Operands) { - unsigned I = 1; - const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); - for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { - ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); - } - for (unsigned E = Operands.size(); I != E; ++I) - ((AMDGPUOperand &)*Operands[I]).addRegOrImmOperands(Inst, 1); -} - -void AMDGPUAsmParser::cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands) { - uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; - if (TSFlags & SIInstrFlags::VOP3) { - cvtVOP3(Inst, Operands); - } else { - cvtId(Inst, Operands); - } -} - static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { // 1. This operand is input modifiers return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS @@ -4148,91 +4099,78 @@ static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; } -void AMDGPUAsmParser::cvtVOP3Impl(MCInst &Inst, const OperandVector &Operands, - OptionalImmIndexMap &OptionalIdx) { +void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, + OptionalImmIndexMap &OptionalIdx) { + unsigned Opc = Inst.getOpcode(); + unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); } - for (unsigned E = Operands.size(); I != E; ++I) { - AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); - if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { - Op.addRegOrImmWithFPInputModsOperands(Inst, 2); - } else if (Op.isImmModifier()) { - OptionalIdx[Op.getImmTy()] = I; - } else if (Op.isRegOrImm()) { - Op.addRegOrImmOperands(Inst, 1); - } else { - llvm_unreachable("unhandled operand type"); + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { + // This instruction has src modifiers + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + Op.addRegOrImmWithFPInputModsOperands(Inst, 2); + } else if (Op.isImmModifier()) { + OptionalIdx[Op.getImmTy()] = I; + } else if (Op.isRegOrImm()) { + Op.addRegOrImmOperands(Inst, 1); + } else { + llvm_unreachable("unhandled operand type"); + } + } + } else { + // No src modifiers + for (unsigned E = Operands.size(); I != E; ++I) { + AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); + if (Op.isMod()) { + OptionalIdx[Op.getImmTy()] = I; + } else { + Op.addRegOrImmOperands(Inst, 1); + } } } -} -void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { - OptionalImmIndexMap OptionalIdx; - - cvtVOP3Impl(Inst, Operands, OptionalIdx); + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); + } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); + if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); + } // special case v_mac_{f16, f32}: // it has src2 register operand that is tied to dst operand // we don't allow modifiers for this operand in assembler so src2_modifiers // should be 0 - if (Inst.getOpcode() == AMDGPU::V_MAC_F32_e64_si || - Inst.getOpcode() == AMDGPU::V_MAC_F32_e64_vi || - Inst.getOpcode() == AMDGPU::V_MAC_F16_e64_vi) { + if (Opc == AMDGPU::V_MAC_F32_e64_si || Opc == AMDGPU::V_MAC_F32_e64_vi || + Opc == AMDGPU::V_MAC_F16_e64_vi) { auto it = Inst.begin(); - std::advance( - it, - AMDGPU::getNamedOperandIdx(Inst.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ? - AMDGPU::V_MAC_F16_e64 : - AMDGPU::V_MAC_F32_e64, - AMDGPU::OpName::src2_modifiers)); + std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 ++it; Inst.insert(it, Inst.getOperand(0)); // src2 = dst } } -void AMDGPUAsmParser::cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands) { +void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { OptionalImmIndexMap OptionalIdx; - - unsigned I = 1; - const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); - for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { - ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); - } - - for (unsigned E = Operands.size(); I != E; ++I) { - AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); - if (Op.isMod()) { - OptionalIdx[Op.getImmTy()] = I; - } else { - Op.addRegOrImmOperands(Inst, 1); - } - } - - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); + cvtVOP3(Inst, Operands, OptionalIdx); } void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { OptionalImmIndexMap OptIdx; - cvtVOP3Impl(Inst, Operands, OptIdx); + cvtVOP3(Inst, Operands, OptIdx); // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 // instruction, and then figure out where to actually put the modifiers int Opc = Inst.getOpcode(); - if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { - addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI); - } - addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, -1); @@ -4284,7 +4222,7 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); - Inst.getOperand(ModIdx).setImm(ModVal); + Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); } } diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt index 917d9cfa69054..971208c5db847 100644 --- a/lib/Target/AMDGPU/CMakeLists.txt +++ b/lib/Target/AMDGPU/CMakeLists.txt @@ -47,6 +47,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPUIntrinsicInfo.cpp AMDGPUISelDAGToDAG.cpp AMDGPULowerIntrinsics.cpp + AMDGPUMacroFusion.cpp AMDGPUMCInstLower.cpp AMDGPUMachineCFGStructurizer.cpp AMDGPUMachineFunction.cpp diff --git a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp index 8ead480673363..2e7641cda3755 100644 --- a/lib/Target/AMDGPU/GCNIterativeScheduler.cpp +++ b/lib/Target/AMDGPU/GCNIterativeScheduler.cpp @@ -17,7 +17,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" namespace llvm { std::vector<const SUnit*> makeMinRegSchedule(ArrayRef<const SUnit*> TopRoots, diff --git a/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/lib/Target/AMDGPU/GCNMinRegStrategy.cpp index d378df674be9b..0657f67b217de 100644 --- a/lib/Target/AMDGPU/GCNMinRegStrategy.cpp +++ b/lib/Target/AMDGPU/GCNMinRegStrategy.cpp @@ -15,7 +15,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" namespace { class GCNMinRegScheduler { diff --git a/lib/Target/AMDGPU/GCNRegPressure.cpp b/lib/Target/AMDGPU/GCNRegPressure.cpp index 390a8286c76a8..1d02c7fdffbf5 100644 --- a/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -16,7 +16,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 8ec46665daf56..155b400ba022b 100644 --- a/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -20,7 +20,7 @@ #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/Support/MathExtras.h" -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" using namespace llvm; diff --git a/lib/Target/AMDGPU/GCNSchedStrategy.h b/lib/Target/AMDGPU/GCNSchedStrategy.h index 3ed3cd5b3b1ce..060d2ca72d93d 100644 --- a/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -66,7 +66,7 @@ class GCNScheduleDAGMILive : public ScheduleDAGMILive { const SIMachineFunctionInfo &MFI; - // Occupancy target at the begining of function scheduling cycle. + // Occupancy target at the beginning of function scheduling cycle. unsigned StartingOccupancy; // Minimal real occupancy recorder for the function. diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 2b408ff10caae..a50e3eb8d9cee 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -32,7 +32,7 @@ public: void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsResolved) const override; bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const override { @@ -100,7 +100,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, void AMDGPUAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, uint64_t Value, - bool IsPCRel) const { + bool IsResolved) const { Value = adjustFixupValue(Fixup, Value, &Asm.getContext()); if (!Value) return; // Doesn't change encoding. diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td index a515eecc222af..06e2c11b01935 100644 --- a/lib/Target/AMDGPU/MIMGInstructions.td +++ b/lib/Target/AMDGPU/MIMGInstructions.td @@ -26,6 +26,7 @@ class MIMG_Helper <dag outs, dag ins, string asm, let isAsmParserOnly = !if(!eq(dns,""), 1, 0); let AsmMatchConverter = "cvtMIMG"; let usesCustomInserter = 1; + let SchedRW = [WriteVMEM]; } class MIMG_NoSampler_Helper <bits<7> op, string asm, diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp index 6993e8a62a9c2..00cbd24b84fbc 100644 --- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -555,7 +555,7 @@ public: CFStack.pushBranch(AMDGPU::CF_PUSH_EG); } else CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE); - + LLVM_FALLTHROUGH; case AMDGPU::CF_ALU: I = MI; AluClauses.push_back(MakeALUClause(MBB, I)); diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 215791f4f92dd..69a63b6941ef2 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1618,7 +1618,8 @@ EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, return VT.changeVectorElementTypeToInteger(); } -bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT) const { +bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT, + const SelectionDAG &DAG) const { // Local and Private addresses do not handle vectors. Limit to i32 if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS)) { return (MemVT.getSizeInBits() <= 32); diff --git a/lib/Target/AMDGPU/R600ISelLowering.h b/lib/Target/AMDGPU/R600ISelLowering.h index d6a0876a6ee7d..2a774693f02b3 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.h +++ b/lib/Target/AMDGPU/R600ISelLowering.h @@ -44,7 +44,8 @@ public: EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override; - bool canMergeStoresTo(unsigned AS, EVT MemVT) const override; + bool canMergeStoresTo(unsigned AS, EVT MemVT, + const SelectionDAG &DAG) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, diff --git a/lib/Target/AMDGPU/R600MachineScheduler.cpp b/lib/Target/AMDGPU/R600MachineScheduler.cpp index 47fda1c8fa827..a7e540f9d14d3 100644 --- a/lib/Target/AMDGPU/R600MachineScheduler.cpp +++ b/lib/Target/AMDGPU/R600MachineScheduler.cpp @@ -22,7 +22,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { assert(dag->hasVRegLiveness() && "R600SchedStrategy needs vreg liveness"); diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index f391f67a241f1..3af242d9ea660 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -137,6 +137,7 @@ static bool isInlineConstantIfFolded(const SIInstrInfo *TII, = TII->get(IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16); return TII->isInlineConstant(OpToFold, MadDesc.OpInfo[OpNo].OperandType); } + return false; } default: return false; diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index d39b345bdf032..2ba570b9ebbbc 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -547,7 +547,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.align = 0; const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(4)); - Info.vol = !Vol || !Vol->isNullValue(); + Info.vol = !Vol || !Vol->isZero(); Info.readMem = true; Info.writeMem = true; return true; @@ -713,7 +713,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, } } -bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT) const { +bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT, + const SelectionDAG &DAG) const { if (AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS) { return (MemVT.getSizeInBits() <= 4 * 32); } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) { @@ -2374,20 +2375,16 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N, } case ISD::INTRINSIC_WO_CHAIN: { unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); - switch (IID) { - case Intrinsic::amdgcn_cvt_pkrtz: { + if (IID == Intrinsic::amdgcn_cvt_pkrtz) { SDValue Src0 = N->getOperand(1); SDValue Src1 = N->getOperand(2); SDLoc SL(N); SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_PKRTZ_F16_F32, SL, MVT::i32, Src0, Src1); - Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2f16, Cvt)); return; } - default: - break; - } + break; } case ISD::SELECT: { SDLoc SL(N); @@ -3736,7 +3733,9 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); EVT VT = Op.getValueType(); - bool Unsafe = DAG.getTarget().Options.UnsafeFPMath; + const SDNodeFlags Flags = Op->getFlags(); + bool Unsafe = DAG.getTarget().Options.UnsafeFPMath || + Flags.hasUnsafeAlgebra() || Flags.hasAllowReciprocal(); if (!Unsafe && VT == MVT::f32 && Subtarget->hasFP32Denormals()) return SDValue(); @@ -3771,15 +3770,11 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, } } - const SDNodeFlags Flags = Op->getFlags(); - - if (Unsafe || Flags.hasAllowReciprocal()) { + if (Unsafe) { // Turn into multiply by the reciprocal. // x / y -> x * (1.0 / y) - SDNodeFlags NewFlags; - NewFlags.setUnsafeAlgebra(true); SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS); - return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, NewFlags); + return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, Flags); } return SDValue(); @@ -4622,15 +4617,99 @@ SDValue SITargetLowering::performClassCombine(SDNode *N, return SDValue(); } +static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) { + if (!DAG.getTargetLoweringInfo().hasFloatingPointExceptions()) + return true; + + return DAG.isKnownNeverNaN(Op); +} + +static bool isCanonicalized(SDValue Op, const SISubtarget *ST, + unsigned MaxDepth=5) { + // If source is a result of another standard FP operation it is already in + // canonical form. + + switch (Op.getOpcode()) { + default: + break; + + // These will flush denorms if required. + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FSQRT: + case ISD::FCEIL: + case ISD::FFLOOR: + case ISD::FMA: + case ISD::FMAD: + + case ISD::FCANONICALIZE: + return true; + + case ISD::FP_ROUND: + return Op.getValueType().getScalarType() != MVT::f16 || + ST->hasFP16Denormals(); + + case ISD::FP_EXTEND: + return Op.getOperand(0).getValueType().getScalarType() != MVT::f16 || + ST->hasFP16Denormals(); + + case ISD::FP16_TO_FP: + case ISD::FP_TO_FP16: + return ST->hasFP16Denormals(); + + // It can/will be lowered or combined as a bit operation. + // Need to check their input recursively to handle. + case ISD::FNEG: + case ISD::FABS: + return (MaxDepth > 0) && + isCanonicalized(Op.getOperand(0), ST, MaxDepth - 1); + + case ISD::FSIN: + case ISD::FCOS: + case ISD::FSINCOS: + return Op.getValueType().getScalarType() != MVT::f16; + + // In pre-GFX9 targets V_MIN_F32 and others do not flush denorms. + // For such targets need to check their input recursively. + // TODO: on GFX9+ we could return true without checking provided no-nan + // mode, since canonicalization is also used to quiet sNaNs. + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: + + return (MaxDepth > 0) && + isCanonicalized(Op.getOperand(0), ST, MaxDepth - 1) && + isCanonicalized(Op.getOperand(1), ST, MaxDepth - 1); + + case ISD::ConstantFP: { + auto F = cast<ConstantFPSDNode>(Op)->getValueAPF(); + return !F.isDenormal() && !(F.isNaN() && F.isSignaling()); + } + } + return false; +} + // Constant fold canonicalize. SDValue SITargetLowering::performFCanonicalizeCombine( SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; ConstantFPSDNode *CFP = isConstOrConstSplatFP(N->getOperand(0)); - if (!CFP) + + if (!CFP) { + SDValue N0 = N->getOperand(0); + + bool IsIEEEMode = Subtarget->enableIEEEBit(DAG.getMachineFunction()); + + if ((IsIEEEMode || isKnownNeverSNan(DAG, N0)) && + isCanonicalized(N0, getSubtarget())) + return N0; + return SDValue(); + } - SelectionDAG &DAG = DCI.DAG; const APFloat &C = CFP->getValueAPF(); // Flush denormals to 0 if not enabled. @@ -4723,13 +4802,6 @@ SDValue SITargetLowering::performIntMed3ImmCombine( return DAG.getNode(ISD::TRUNCATE, SL, VT, Med3); } -static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) { - if (!DAG.getTargetLoweringInfo().hasFloatingPointExceptions()) - return true; - - return DAG.isKnownNeverNaN(Op); -} - SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL, SDValue Op0, diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index 24f88e632d38e..83392a7ab1b21 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -153,7 +153,8 @@ public: bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; - bool canMergeStoresTo(unsigned AS, EVT MemVT) const override; + bool canMergeStoresTo(unsigned AS, EVT MemVT, + const SelectionDAG &DAG) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index b6784ec14e9f8..160f8837d49c8 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2022,10 +2022,12 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB, return nullptr; case AMDGPU::V_MAC_F16_e64: IsF16 = true; + LLVM_FALLTHROUGH; case AMDGPU::V_MAC_F32_e64: break; case AMDGPU::V_MAC_F16_e32: IsF16 = true; + LLVM_FALLTHROUGH; case AMDGPU::V_MAC_F32_e32: { int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); @@ -4320,6 +4322,24 @@ SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const return new GCNHazardRecognizer(MF); } +std::pair<unsigned, unsigned> +SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { + return std::make_pair(TF & MO_MASK, TF & ~MO_MASK); +} + +ArrayRef<std::pair<unsigned, const char *>> +SIInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { + static const std::pair<unsigned, const char *> TargetFlags[] = { + { MO_GOTPCREL, "amdgpu-gotprel" }, + { MO_GOTPCREL32_LO, "amdgpu-gotprel32-lo" }, + { MO_GOTPCREL32_HI, "amdgpu-gotprel32-hi" }, + { MO_REL32_LO, "amdgpu-rel32-lo" }, + { MO_REL32_HI, "amdgpu-rel32-hi" } + }; + + return makeArrayRef(TargetFlags); +} + bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const { return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY && MI.modifiesRegister(AMDGPU::EXEC, &RI); diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index 74b48c7618087..d00c0d4a7f4ea 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -100,6 +100,8 @@ protected: public: enum TargetOperandFlags { + MO_MASK = 0x7, + MO_NONE = 0, // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL. MO_GOTPCREL = 1, @@ -781,9 +783,15 @@ public: void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry, MachineBasicBlock *LoopEnd) const; + std::pair<unsigned, unsigned> + decomposeMachineOperandsTargetFlags(unsigned TF) const override; + ArrayRef<std::pair<int, const char *>> getSerializableTargetIndices() const override; + ArrayRef<std::pair<unsigned, const char *>> + getSerializableDirectMachineOperandTargetFlags() const override; + ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override; diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index 4a81fb3b463a9..ffb01363e1313 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -1502,6 +1502,8 @@ def VOP_B32_F16_F16 : VOPProfile <[i32, f16, f16, untyped]>; def VOP_V2F16_V2F16_V2F16_V2F16 : VOPProfile <[v2f16, v2f16, v2f16, v2f16]>; def VOP_V2I16_V2I16_V2I16_V2I16 : VOPProfile <[v2i16, v2i16, v2i16, v2i16]>; +def VOP_F32_V2F16_V2F16_V2F16 : VOPProfile <[f32, v2f16, v2f16, v2f16]>; + def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>; def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>; diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp index bb17dbbdfbd62..34886c48f461d 100644 --- a/lib/Target/AMDGPU/SIMachineScheduler.cpp +++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp @@ -38,7 +38,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" // This scheduler implements a different scheduling algorithm than // GenericScheduler. diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 96a18544f02ac..874fbadca7f35 100644 --- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -110,10 +110,8 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII, } const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); - const MachineOperand *Src1Mod = - TII->getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); - - if (Src1 && (!isVGPR(Src1, TRI, MRI) || (Src1Mod && Src1Mod->getImm() != 0))) + if (Src1 && (!isVGPR(Src1, TRI, MRI) || + TII->hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers))) return false; // We don't need to check src0, all input types are legal, so just make sure @@ -122,58 +120,64 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII, return false; // Check output modifiers - if (TII->hasModifiersSet(MI, AMDGPU::OpName::omod)) - return false; - - return !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp); + return !TII->hasModifiersSet(MI, AMDGPU::OpName::omod) && + !TII->hasModifiersSet(MI, AMDGPU::OpName::clamp); } /// \brief This function checks \p MI for operands defined by a move immediate /// instruction and then folds the literal constant into the instruction if it -/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instruction -/// and will only fold literal constants if we are still in SSA. -static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, +/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions. +static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, MachineRegisterInfo &MRI, bool TryToCommute = true) { - - if (!MRI.isSSA()) - return; - assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI)); int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); - // Only one literal constant is allowed per instruction, so if src0 is a - // literal constant then we can't do any folding. - if (TII->isLiteralConstant(MI, Src0Idx)) - return; - // Try to fold Src0 MachineOperand &Src0 = MI.getOperand(Src0Idx); - if (Src0.isReg() && MRI.hasOneUse(Src0.getReg())) { + if (Src0.isReg()) { unsigned Reg = Src0.getReg(); - MachineInstr *Def = MRI.getUniqueVRegDef(Reg); - if (Def && Def->isMoveImmediate()) { - MachineOperand &MovSrc = Def->getOperand(1); - bool ConstantFolded = false; - - if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) || - isUInt<32>(MovSrc.getImm()))) { - Src0.ChangeToImmediate(MovSrc.getImm()); - ConstantFolded = true; - } - if (ConstantFolded) { - if (MRI.use_empty(Reg)) + if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) { + MachineInstr *Def = MRI.getUniqueVRegDef(Reg); + if (Def && Def->isMoveImmediate()) { + MachineOperand &MovSrc = Def->getOperand(1); + bool ConstantFolded = false; + + if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) || + isUInt<32>(MovSrc.getImm()))) { + // It's possible to have only one component of a super-reg defined by + // a single mov, so we need to clear any subregister flag. + Src0.setSubReg(0); + Src0.ChangeToImmediate(MovSrc.getImm()); + ConstantFolded = true; + } else if (MovSrc.isFI()) { + Src0.setSubReg(0); + Src0.ChangeToFrameIndex(MovSrc.getIndex()); + ConstantFolded = true; + } + + if (ConstantFolded) { + assert(MRI.use_empty(Reg)); Def->eraseFromParent(); - ++NumLiteralConstantsFolded; - return; + ++NumLiteralConstantsFolded; + return true; + } } } } // We have failed to fold src0, so commute the instruction and try again. - if (TryToCommute && MI.isCommutable() && TII->commuteInstruction(MI)) - foldImmediates(MI, TII, MRI, false); + if (TryToCommute && MI.isCommutable()) { + if (TII->commuteInstruction(MI)) { + if (foldImmediates(MI, TII, MRI, false)) + return true; + // Commute back. + TII->commuteInstruction(MI); + } + } + + return false; } // Copy MachineOperand with all flags except setting it as implicit. diff --git a/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp b/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp index 9908fc003ce70..92fb762ebd731 100644 --- a/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp +++ b/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp @@ -16,7 +16,7 @@ using namespace llvm; -/// \brief The target which suports all AMD GPUs. This will eventually +/// \brief The target which supports all AMD GPUs. This will eventually /// be deprecated and there will be a R600 target and a GCN target. Target &llvm::getTheAMDGPUTarget() { static Target TheAMDGPUTarget; diff --git a/lib/Target/AMDGPU/VOP3PInstructions.td b/lib/Target/AMDGPU/VOP3PInstructions.td index 96d343099132c..f2de1f9957260 100644 --- a/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/lib/Target/AMDGPU/VOP3PInstructions.td @@ -16,12 +16,21 @@ class VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> !if(P.HasModifiers, getVOP3PModPat<P, node>.ret, getVOP3Pat<P, node>.ret) >; -// Non-packed instructions that use the VOP3P encoding. i.e. where -// omod/abs are used. +// Non-packed instructions that use the VOP3P encoding. +// VOP3 neg/abs and VOP3P opsel/opsel_hi modifiers are allowed. class VOP3_VOP3PInst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> : - VOP3P_Pseudo<OpName, P, - !if(P.HasModifiers, getVOP3ModPat<P, node>.ret, getVOP3Pat<P, node>.ret) ->; + VOP3P_Pseudo<OpName, P> { + let InOperandList = + (ins + FP32InputMods:$src0_modifiers, VCSrc_f32:$src0, + FP32InputMods:$src1_modifiers, VCSrc_f32:$src1, + FP32InputMods:$src2_modifiers, VCSrc_f32:$src2, + clampmod:$clamp, + op_sel:$op_sel, + op_sel_hi:$op_sel_hi); + let AsmOperands = + " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$op_sel$op_sel_hi$clamp"; +} let isCommutable = 1 in { def V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, fma>; @@ -46,9 +55,12 @@ def V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I1 def V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshr_rev>; // XXX - Commutable? -def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>; -def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>; -def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>; +// These are VOP3a-like opcodes which accept no omod. +// Size of src arguments (16/32) is controlled by op_sel. +// For 16-bit src arguments their location (hi/lo) are controlled by op_sel_hi. +def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile<VOP_F32_V2F16_V2F16_V2F16>>; +def V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>; +def V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3_Profile<VOP_V2F16_V2F16_V2F16_V2F16>>; multiclass VOP3P_Real_vi<bits<10> op> { diff --git a/lib/Target/AMDGPU/VOPInstructions.td b/lib/Target/AMDGPU/VOPInstructions.td index e386f21c2ba49..77b7952b22a88 100644 --- a/lib/Target/AMDGPU/VOPInstructions.td +++ b/lib/Target/AMDGPU/VOPInstructions.td @@ -51,12 +51,8 @@ class VOP3Common <dag outs, dag ins, string asm = "", let VOP3 = 1; - let AsmMatchConverter = - !if(!eq(VOP3Only,1), - "cvtVOP3", - !if(!eq(HasMods,1), "cvtVOP3_2_mod", "")); - let AsmVariantName = AMDGPUAsmVariants.VOP3; + let AsmMatchConverter = !if(!eq(HasMods,1), "cvtVOP3", ""); let isCodeGenOnly = 0; @@ -106,13 +102,11 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [], let AsmVariantName = AMDGPUAsmVariants.VOP3; let AsmMatchConverter = - !if(!eq(VOP3Only,1), - !if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", "cvtVOP3"), - !if(!eq(P.HasModifiers, 1), - "cvtVOP3_2_mod", - !if(!eq(P.HasOMod, 1), "cvtVOP3OMod", "") - ) - ); + !if(!and(P.IsPacked, isVOP3P), + "cvtVOP3P", + !if(!or(P.HasModifiers, P.HasOMod), + "cvtVOP3", + "")); VOPProfile Pfl = P; } diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 90f635c812542..582153daebde9 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1103,6 +1103,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { case ARM::tPUSH: // Special case here: no src & dst reg, but two extra imp ops. StartOp = 2; NumOffset = 2; + LLVM_FALLTHROUGH; case ARM::STMDB_UPD: case ARM::t2STMDB_UPD: case ARM::VSTMDDB_UPD: diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 1ec6b24b2ed67..3cf5950a1918d 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1880,6 +1880,9 @@ isProfitableToIfCvt(MachineBasicBlock &TBB, // Diamond: TBB is the block that is branched to, FBB is the fallthrough TUnpredCycles = TCycles + TakenBranchCost; FUnpredCycles = FCycles + NotTakenBranchCost; + // The branch at the end of FBB will disappear when it's predicated, so + // discount it from PredCost. + PredCost -= 1 * ScalingUpFactor; } // The total cost is the cost of each path scaled by their probabilites unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index b4fb292c0116d..e97a7ce5067f9 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -193,10 +193,11 @@ getReservedRegs(const MachineFunction &MF) const { for (unsigned R = 0; R < 16; ++R) markSuperRegs(Reserved, ARM::D16 + R); } - const TargetRegisterClass *RC = &ARM::GPRPairRegClass; - for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I) - for (MCSubRegIterator SI(*I, this); SI.isValid(); ++SI) - if (Reserved.test(*SI)) markSuperRegs(Reserved, *I); + const TargetRegisterClass &RC = ARM::GPRPairRegClass; + for (unsigned Reg : RC) + for (MCSubRegIterator SI(Reg, this); SI.isValid(); ++SI) + if (Reserved.test(*SI)) + markSuperRegs(Reserved, Reg); assert(checkAllSuperRegsMarked(Reserved)); return Reserved; @@ -315,8 +316,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, Hints.push_back(PairedPhys); // Then prefer even or odd registers. - for (unsigned I = 0, E = Order.size(); I != E; ++I) { - unsigned Reg = Order[I]; + for (unsigned Reg : Order) { if (Reg == PairedPhys || (getEncodingValue(Reg) & 1) != Odd) continue; // Don't provide hints that are paired to a reserved register. @@ -659,11 +659,8 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, unsigned Ba const MCInstrDesc &Desc = MI->getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); unsigned i = 0; - - while (!MI->getOperand(i).isFI()) { - ++i; - assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!"); - } + for (; !MI->getOperand(i).isFI(); ++i) + assert(i+1 < MI->getNumOperands() && "Instr doesn't have FrameIndex operand!"); // AddrMode4 and AddrMode6 cannot handle any offset. if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6) diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp index e498f70b820db..051827a6a6a2f 100644 --- a/lib/Target/ARM/ARMCallLowering.cpp +++ b/lib/Target/ARM/ARMCallLowering.cpp @@ -321,7 +321,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size"); assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size"); - // The necesary extensions are handled on the other side of the ABI + // The necessary extensions are handled on the other side of the ABI // boundary. markPhysRegUsed(PhysReg); MIRBuilder.buildCopy(ValVReg, PhysReg); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e42514acd76f0..6ba7593543a92 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3398,9 +3398,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { SDLoc dl(Op); - ConstantSDNode *ScopeN = cast<ConstantSDNode>(Op.getOperand(2)); - auto Scope = static_cast<SynchronizationScope>(ScopeN->getZExtValue()); - if (Scope == SynchronizationScope::SingleThread) + ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2)); + auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue()); + if (SSID == SyncScope::SingleThread) return Op; if (!Subtarget->hasDataBarrier()) { @@ -5356,15 +5356,15 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { // Integer comparisons. switch (SetCCOpcode) { default: llvm_unreachable("Illegal integer comparison"); - case ISD::SETNE: Invert = true; + case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH; case ISD::SETEQ: Opc = ARMISD::VCEQ; break; - case ISD::SETLT: Swap = true; + case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETGT: Opc = ARMISD::VCGT; break; - case ISD::SETLE: Swap = true; + case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETGE: Opc = ARMISD::VCGE; break; - case ISD::SETULT: Swap = true; + case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETUGT: Opc = ARMISD::VCGTU; break; - case ISD::SETULE: Swap = true; + case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETUGE: Opc = ARMISD::VCGEU; break; } @@ -13779,7 +13779,9 @@ bool ARMTargetLowering::lowerInterleavedLoad( // Convert the integer vector to pointer vector if the element is pointer. if (EltTy->isPointerTy()) - SubVec = Builder.CreateIntToPtr(SubVec, SV->getType()); + SubVec = Builder.CreateIntToPtr( + SubVec, VectorType::get(SV->getType()->getVectorElementType(), + VecTy->getVectorNumElements())); SubVecs[SV].push_back(SubVec); } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 5044134f5b1e2..f05b142552369 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -510,7 +510,8 @@ class InstrItineraryData; bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override; - bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT) const override { + bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, + const SelectionDAG &DAG) const override { // Do not merge to larger than i32. return (MemVT.getSizeInBits() <= 32); } diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 53db5acbe805c..42eac12e457b2 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -4799,7 +4799,7 @@ def : t2InstAlias<"add${p} $Rd, pc, $imm", // Pseudo instruction ldr Rt, =immediate def t2LDRConstPool : t2AsmPseudo<"ldr${p} $Rt, $immediate", - (ins GPRnopc:$Rt, const_pool_asm_imm:$immediate, pred:$p)>; + (ins GPR:$Rt, const_pool_asm_imm:$immediate, pred:$p)>; // Version w/ the .w suffix. def : t2InstAlias<"ldr${p}.w $Rt, $immediate", (t2LDRConstPool GPRnopc:$Rt, diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp index 374176d1d7371..29ef69ad0010f 100644 --- a/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/lib/Target/ARM/ARMInstructionSelector.cpp @@ -20,6 +20,8 @@ #define DEBUG_TYPE "arm-isel" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" + using namespace llvm; #ifndef LLVM_BUILD_GLOBAL_ISEL @@ -42,13 +44,32 @@ public: private: bool selectImpl(MachineInstr &I) const; - bool selectICmp(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) const; + struct CmpConstants; + struct InsertInfo; + + bool selectCmp(CmpConstants Helper, MachineInstrBuilder &MIB, + MachineRegisterInfo &MRI) const; + + // Helper for inserting a comparison sequence that sets \p ResReg to either 1 + // if \p LHSReg and \p RHSReg are in the relationship defined by \p Cond, or + // \p PrevRes otherwise. In essence, it computes PrevRes OR (LHS Cond RHS). + bool insertComparison(CmpConstants Helper, InsertInfo I, unsigned ResReg, + ARMCC::CondCodes Cond, unsigned LHSReg, unsigned RHSReg, + unsigned PrevRes) const; + + // Set \p DestReg to \p Constant. + void putConstant(InsertInfo I, unsigned DestReg, unsigned Constant) const; + + bool selectSelect(MachineInstrBuilder &MIB, MachineRegisterInfo &MRI) const; + + // Check if the types match and both operands have the expected size and + // register bank. + bool validOpRegPair(MachineRegisterInfo &MRI, unsigned LHS, unsigned RHS, + unsigned ExpectedSize, unsigned ExpectedRegBankID) const; - bool selectSelect(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) const; + // Check if the register has the expected size and register bank. + bool validReg(MachineRegisterInfo &MRI, unsigned Reg, unsigned ExpectedSize, + unsigned ExpectedRegBankID) const; const ARMBaseInstrInfo &TII; const ARMBaseRegisterInfo &TRI; @@ -251,120 +272,233 @@ static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank, return Opc; } -static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) { +// When lowering comparisons, we sometimes need to perform two compares instead +// of just one. Get the condition codes for both comparisons. If only one is +// needed, the second member of the pair is ARMCC::AL. +static std::pair<ARMCC::CondCodes, ARMCC::CondCodes> +getComparePreds(CmpInst::Predicate Pred) { + std::pair<ARMCC::CondCodes, ARMCC::CondCodes> Preds = {ARMCC::AL, ARMCC::AL}; switch (Pred) { - // Needs two compares... case CmpInst::FCMP_ONE: + Preds = {ARMCC::GT, ARMCC::MI}; + break; case CmpInst::FCMP_UEQ: - default: - // AL is our "false" for now. The other two need more compares. - return ARMCC::AL; + Preds = {ARMCC::EQ, ARMCC::VS}; + break; case CmpInst::ICMP_EQ: case CmpInst::FCMP_OEQ: - return ARMCC::EQ; + Preds.first = ARMCC::EQ; + break; case CmpInst::ICMP_SGT: case CmpInst::FCMP_OGT: - return ARMCC::GT; + Preds.first = ARMCC::GT; + break; case CmpInst::ICMP_SGE: case CmpInst::FCMP_OGE: - return ARMCC::GE; + Preds.first = ARMCC::GE; + break; case CmpInst::ICMP_UGT: case CmpInst::FCMP_UGT: - return ARMCC::HI; + Preds.first = ARMCC::HI; + break; case CmpInst::FCMP_OLT: - return ARMCC::MI; + Preds.first = ARMCC::MI; + break; case CmpInst::ICMP_ULE: case CmpInst::FCMP_OLE: - return ARMCC::LS; + Preds.first = ARMCC::LS; + break; case CmpInst::FCMP_ORD: - return ARMCC::VC; + Preds.first = ARMCC::VC; + break; case CmpInst::FCMP_UNO: - return ARMCC::VS; + Preds.first = ARMCC::VS; + break; case CmpInst::FCMP_UGE: - return ARMCC::PL; + Preds.first = ARMCC::PL; + break; case CmpInst::ICMP_SLT: case CmpInst::FCMP_ULT: - return ARMCC::LT; + Preds.first = ARMCC::LT; + break; case CmpInst::ICMP_SLE: case CmpInst::FCMP_ULE: - return ARMCC::LE; + Preds.first = ARMCC::LE; + break; case CmpInst::FCMP_UNE: case CmpInst::ICMP_NE: - return ARMCC::NE; + Preds.first = ARMCC::NE; + break; case CmpInst::ICMP_UGE: - return ARMCC::HS; + Preds.first = ARMCC::HS; + break; case CmpInst::ICMP_ULT: - return ARMCC::LO; + Preds.first = ARMCC::LO; + break; + default: + break; } + assert(Preds.first != ARMCC::AL && "No comparisons needed?"); + return Preds; } -bool ARMInstructionSelector::selectICmp(MachineInstrBuilder &MIB, - const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) const { - auto &MBB = *MIB->getParent(); - auto InsertBefore = std::next(MIB->getIterator()); - auto &DebugLoc = MIB->getDebugLoc(); - - // Move 0 into the result register. - auto Mov0I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVi)) - .addDef(MRI.createVirtualRegister(&ARM::GPRRegClass)) - .addImm(0) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); - if (!constrainSelectedInstRegOperands(*Mov0I, TII, TRI, RBI)) +struct ARMInstructionSelector::CmpConstants { + CmpConstants(unsigned CmpOpcode, unsigned FlagsOpcode, unsigned OpRegBank, + unsigned OpSize) + : ComparisonOpcode(CmpOpcode), ReadFlagsOpcode(FlagsOpcode), + OperandRegBankID(OpRegBank), OperandSize(OpSize) {} + + // The opcode used for performing the comparison. + const unsigned ComparisonOpcode; + + // The opcode used for reading the flags set by the comparison. May be + // ARM::INSTRUCTION_LIST_END if we don't need to read the flags. + const unsigned ReadFlagsOpcode; + + // The assumed register bank ID for the operands. + const unsigned OperandRegBankID; + + // The assumed size in bits for the operands. + const unsigned OperandSize; +}; + +struct ARMInstructionSelector::InsertInfo { + InsertInfo(MachineInstrBuilder &MIB) + : MBB(*MIB->getParent()), InsertBefore(std::next(MIB->getIterator())), + DbgLoc(MIB->getDebugLoc()) {} + + MachineBasicBlock &MBB; + const MachineBasicBlock::instr_iterator InsertBefore; + const DebugLoc &DbgLoc; +}; + +void ARMInstructionSelector::putConstant(InsertInfo I, unsigned DestReg, + unsigned Constant) const { + (void)BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVi)) + .addDef(DestReg) + .addImm(Constant) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); +} + +bool ARMInstructionSelector::validOpRegPair(MachineRegisterInfo &MRI, + unsigned LHSReg, unsigned RHSReg, + unsigned ExpectedSize, + unsigned ExpectedRegBankID) const { + return MRI.getType(LHSReg) == MRI.getType(RHSReg) && + validReg(MRI, LHSReg, ExpectedSize, ExpectedRegBankID) && + validReg(MRI, RHSReg, ExpectedSize, ExpectedRegBankID); +} + +bool ARMInstructionSelector::validReg(MachineRegisterInfo &MRI, unsigned Reg, + unsigned ExpectedSize, + unsigned ExpectedRegBankID) const { + if (MRI.getType(Reg).getSizeInBits() != ExpectedSize) { + DEBUG(dbgs() << "Unexpected size for register"); return false; + } - // Perform the comparison. - auto LHSReg = MIB->getOperand(2).getReg(); - auto RHSReg = MIB->getOperand(3).getReg(); - assert(MRI.getType(LHSReg) == MRI.getType(RHSReg) && - MRI.getType(LHSReg).getSizeInBits() == 32 && - MRI.getType(RHSReg).getSizeInBits() == 32 && - "Unsupported types for comparison operation"); - auto CmpI = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::CMPrr)) - .addUse(LHSReg) - .addUse(RHSReg) - .add(predOps(ARMCC::AL)); - if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI)) + if (RBI.getRegBank(Reg, MRI, TRI)->getID() != ExpectedRegBankID) { + DEBUG(dbgs() << "Unexpected register bank for register"); return false; + } + + return true; +} + +bool ARMInstructionSelector::selectCmp(CmpConstants Helper, + MachineInstrBuilder &MIB, + MachineRegisterInfo &MRI) const { + const InsertInfo I(MIB); - // Move 1 into the result register if the flags say so. auto ResReg = MIB->getOperand(0).getReg(); + if (!validReg(MRI, ResReg, 1, ARM::GPRRegBankID)) + return false; + auto Cond = static_cast<CmpInst::Predicate>(MIB->getOperand(1).getPredicate()); - auto ARMCond = getComparePred(Cond); - if (ARMCond == ARMCC::AL) + if (Cond == CmpInst::FCMP_TRUE || Cond == CmpInst::FCMP_FALSE) { + putConstant(I, ResReg, Cond == CmpInst::FCMP_TRUE ? 1 : 0); + MIB->eraseFromParent(); + return true; + } + + auto LHSReg = MIB->getOperand(2).getReg(); + auto RHSReg = MIB->getOperand(3).getReg(); + if (!validOpRegPair(MRI, LHSReg, RHSReg, Helper.OperandSize, + Helper.OperandRegBankID)) return false; - auto Mov1I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVCCi)) + auto ARMConds = getComparePreds(Cond); + auto ZeroReg = MRI.createVirtualRegister(&ARM::GPRRegClass); + putConstant(I, ZeroReg, 0); + + if (ARMConds.second == ARMCC::AL) { + // Simple case, we only need one comparison and we're done. + if (!insertComparison(Helper, I, ResReg, ARMConds.first, LHSReg, RHSReg, + ZeroReg)) + return false; + } else { + // Not so simple, we need two successive comparisons. + auto IntermediateRes = MRI.createVirtualRegister(&ARM::GPRRegClass); + if (!insertComparison(Helper, I, IntermediateRes, ARMConds.first, LHSReg, + RHSReg, ZeroReg)) + return false; + if (!insertComparison(Helper, I, ResReg, ARMConds.second, LHSReg, RHSReg, + IntermediateRes)) + return false; + } + + MIB->eraseFromParent(); + return true; +} + +bool ARMInstructionSelector::insertComparison(CmpConstants Helper, InsertInfo I, + unsigned ResReg, + ARMCC::CondCodes Cond, + unsigned LHSReg, unsigned RHSReg, + unsigned PrevRes) const { + // Perform the comparison. + auto CmpI = + BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(Helper.ComparisonOpcode)) + .addUse(LHSReg) + .addUse(RHSReg) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI)) + return false; + + // Read the comparison flags (if necessary). + if (Helper.ReadFlagsOpcode != ARM::INSTRUCTION_LIST_END) { + auto ReadI = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, + TII.get(Helper.ReadFlagsOpcode)) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*ReadI, TII, TRI, RBI)) + return false; + } + + // Select either 1 or the previous result based on the value of the flags. + auto Mov1I = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVCCi)) .addDef(ResReg) - .addUse(Mov0I->getOperand(0).getReg()) + .addUse(PrevRes) .addImm(1) - .add(predOps(ARMCond, ARM::CPSR)); + .add(predOps(Cond, ARM::CPSR)); if (!constrainSelectedInstRegOperands(*Mov1I, TII, TRI, RBI)) return false; - MIB->eraseFromParent(); return true; } bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB, - const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) const { + MachineRegisterInfo &MRI) const { auto &MBB = *MIB->getParent(); auto InsertBefore = std::next(MIB->getIterator()); - auto &DebugLoc = MIB->getDebugLoc(); + auto &DbgLoc = MIB->getDebugLoc(); // Compare the condition to 0. auto CondReg = MIB->getOperand(1).getReg(); - assert(MRI.getType(CondReg).getSizeInBits() == 1 && - RBI.getRegBank(CondReg, MRI, TRI)->getID() == ARM::GPRRegBankID && + assert(validReg(MRI, CondReg, 1, ARM::GPRRegBankID) && "Unsupported types for select operation"); - auto CmpI = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::CMPri)) + auto CmpI = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::CMPri)) .addUse(CondReg) .addImm(0) .add(predOps(ARMCC::AL)); @@ -376,13 +510,10 @@ bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB, auto ResReg = MIB->getOperand(0).getReg(); auto TrueReg = MIB->getOperand(2).getReg(); auto FalseReg = MIB->getOperand(3).getReg(); - assert(MRI.getType(ResReg) == MRI.getType(TrueReg) && - MRI.getType(TrueReg) == MRI.getType(FalseReg) && - MRI.getType(FalseReg).getSizeInBits() == 32 && - RBI.getRegBank(TrueReg, MRI, TRI)->getID() == ARM::GPRRegBankID && - RBI.getRegBank(FalseReg, MRI, TRI)->getID() == ARM::GPRRegBankID && + assert(validOpRegPair(MRI, ResReg, TrueReg, 32, ARM::GPRRegBankID) && + validOpRegPair(MRI, TrueReg, FalseReg, 32, ARM::GPRRegBankID) && "Unsupported types for select operation"); - auto Mov1I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVCCr)) + auto Mov1I = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::MOVCCr)) .addDef(ResReg) .addUse(TrueReg) .addUse(FalseReg) @@ -494,10 +625,32 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { I.setDesc(TII.get(COPY)); return selectCopy(I, TII, MRI, TRI, RBI); } - case G_ICMP: - return selectICmp(MIB, TII, MRI, TRI, RBI); case G_SELECT: - return selectSelect(MIB, TII, MRI, TRI, RBI); + return selectSelect(MIB, MRI); + case G_ICMP: { + CmpConstants Helper(ARM::CMPrr, ARM::INSTRUCTION_LIST_END, + ARM::GPRRegBankID, 32); + return selectCmp(Helper, MIB, MRI); + } + case G_FCMP: { + assert(TII.getSubtarget().hasVFP2() && "Can't select fcmp without VFP"); + + unsigned OpReg = I.getOperand(2).getReg(); + unsigned Size = MRI.getType(OpReg).getSizeInBits(); + + if (Size == 64 && TII.getSubtarget().isFPOnlySP()) { + DEBUG(dbgs() << "Subtarget only supports single precision"); + return false; + } + if (Size != 32 && Size != 64) { + DEBUG(dbgs() << "Unsupported size for G_FCMP operand"); + return false; + } + + CmpConstants Helper(Size == 32 ? ARM::VCMPS : ARM::VCMPD, ARM::FMSTAT, + ARM::FPRRegBankID, Size); + return selectCmp(Helper, MIB, MRI); + } case G_GEP: I.setDesc(TII.get(ARM::ADDrr)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); @@ -510,11 +663,10 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { break; case G_CONSTANT: { unsigned Reg = I.getOperand(0).getReg(); - if (MRI.getType(Reg).getSizeInBits() != 32) + + if (!validReg(MRI, Reg, 32, ARM::GPRRegBankID)) return false; - assert(RBI.getRegBank(Reg, MRI, TRI)->getID() == ARM::GPRRegBankID && - "Expected constant to live in a GPR"); I.setDesc(TII.get(ARM::MOVi)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index f3e62d09cc30a..f23e62595d2e9 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -28,6 +28,10 @@ using namespace llvm; #error "You shouldn't build this" #endif +static bool AEABI(const ARMSubtarget &ST) { + return ST.isTargetAEABI() || ST.isTargetGNUAEABI() || ST.isTargetMuslAEABI(); +} + ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { using namespace TargetOpcode; @@ -66,8 +70,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { for (unsigned Op : {G_SREM, G_UREM}) if (ST.hasDivideInARMMode()) setAction({Op, s32}, Lower); - else if (ST.isTargetAEABI() || ST.isTargetGNUAEABI() || - ST.isTargetMuslAEABI()) + else if (AEABI(ST)) setAction({Op, s32}, Custom); else setAction({Op, s32}, Libcall); @@ -86,6 +89,8 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({G_SELECT, 1, s1}, Legal); setAction({G_CONSTANT, s32}, Legal); + for (auto Ty : {s1, s8, s16}) + setAction({G_CONSTANT, Ty}, WidenScalar); setAction({G_ICMP, s1}, Legal); for (auto Ty : {s8, s16}) @@ -99,9 +104,22 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({G_LOAD, s64}, Legal); setAction({G_STORE, s64}, Legal); + + setAction({G_FCMP, s1}, Legal); + setAction({G_FCMP, 1, s32}, Legal); + setAction({G_FCMP, 1, s64}, Legal); } else { for (auto Ty : {s32, s64}) setAction({G_FADD, Ty}, Libcall); + + setAction({G_FCMP, s1}, Legal); + setAction({G_FCMP, 1, s32}, Custom); + setAction({G_FCMP, 1, s64}, Custom); + + if (AEABI(ST)) + setFCmpLibcallsAEABI(); + else + setFCmpLibcallsGNU(); } for (unsigned Op : {G_FREM, G_FPOW}) @@ -111,11 +129,120 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { computeTables(); } +void ARMLegalizerInfo::setFCmpLibcallsAEABI() { + // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be + // default-initialized. + FCmp32Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp32Libcalls[CmpInst::FCMP_OEQ] = { + {RTLIB::OEQ_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OGE] = { + {RTLIB::OGE_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OGT] = { + {RTLIB::OGT_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OLE] = { + {RTLIB::OLE_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OLT] = { + {RTLIB::OLT_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UNO] = { + {RTLIB::UO_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_ONE] = { + {RTLIB::OGT_F32, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::OLT_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_UEQ] = { + {RTLIB::OEQ_F32, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::UO_F32, CmpInst::BAD_ICMP_PREDICATE}}; + + FCmp64Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp64Libcalls[CmpInst::FCMP_OEQ] = { + {RTLIB::OEQ_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OGE] = { + {RTLIB::OGE_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OGT] = { + {RTLIB::OGT_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OLE] = { + {RTLIB::OLE_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OLT] = { + {RTLIB::OLT_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UNO] = { + {RTLIB::UO_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_ONE] = { + {RTLIB::OGT_F64, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::OLT_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_UEQ] = { + {RTLIB::OEQ_F64, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::UO_F64, CmpInst::BAD_ICMP_PREDICATE}}; +} + +void ARMLegalizerInfo::setFCmpLibcallsGNU() { + // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be + // default-initialized. + FCmp32Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp32Libcalls[CmpInst::FCMP_OEQ] = {{RTLIB::OEQ_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_OGE] = {{RTLIB::OGE_F32, CmpInst::ICMP_SGE}}; + FCmp32Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F32, CmpInst::ICMP_SGT}}; + FCmp32Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F32, CmpInst::ICMP_SLE}}; + FCmp32Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F32, CmpInst::ICMP_SLT}}; + FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_SGE}}; + FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_SGT}}; + FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_SLE}}; + FCmp32Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F32, CmpInst::ICMP_SLT}}; + FCmp32Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F32, CmpInst::ICMP_NE}}; + FCmp32Libcalls[CmpInst::FCMP_UNO] = {{RTLIB::UO_F32, CmpInst::ICMP_NE}}; + FCmp32Libcalls[CmpInst::FCMP_ONE] = {{RTLIB::OGT_F32, CmpInst::ICMP_SGT}, + {RTLIB::OLT_F32, CmpInst::ICMP_SLT}}; + FCmp32Libcalls[CmpInst::FCMP_UEQ] = {{RTLIB::OEQ_F32, CmpInst::ICMP_EQ}, + {RTLIB::UO_F32, CmpInst::ICMP_NE}}; + + FCmp64Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp64Libcalls[CmpInst::FCMP_OEQ] = {{RTLIB::OEQ_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_OGE] = {{RTLIB::OGE_F64, CmpInst::ICMP_SGE}}; + FCmp64Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F64, CmpInst::ICMP_SGT}}; + FCmp64Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F64, CmpInst::ICMP_SLE}}; + FCmp64Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F64, CmpInst::ICMP_SLT}}; + FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_SGE}}; + FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_SGT}}; + FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_SLE}}; + FCmp64Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F64, CmpInst::ICMP_SLT}}; + FCmp64Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F64, CmpInst::ICMP_NE}}; + FCmp64Libcalls[CmpInst::FCMP_UNO] = {{RTLIB::UO_F64, CmpInst::ICMP_NE}}; + FCmp64Libcalls[CmpInst::FCMP_ONE] = {{RTLIB::OGT_F64, CmpInst::ICMP_SGT}, + {RTLIB::OLT_F64, CmpInst::ICMP_SLT}}; + FCmp64Libcalls[CmpInst::FCMP_UEQ] = {{RTLIB::OEQ_F64, CmpInst::ICMP_EQ}, + {RTLIB::UO_F64, CmpInst::ICMP_NE}}; +} + +ARMLegalizerInfo::FCmpLibcallsList +ARMLegalizerInfo::getFCmpLibcalls(CmpInst::Predicate Predicate, + unsigned Size) const { + assert(CmpInst::isFPPredicate(Predicate) && "Unsupported FCmp predicate"); + if (Size == 32) + return FCmp32Libcalls[Predicate]; + if (Size == 64) + return FCmp64Libcalls[Predicate]; + llvm_unreachable("Unsupported size for FCmp predicate"); +} + bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const { using namespace TargetOpcode; + MIRBuilder.setInstr(MI); + switch (MI.getOpcode()) { default: return false; @@ -137,9 +264,9 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, auto RetVal = MRI.createGenericVirtualRegister( getLLTForType(*RetTy, MIRBuilder.getMF().getDataLayout())); - auto Status = replaceWithLibcall(MI, MIRBuilder, Libcall, {RetVal, RetTy}, - {{MI.getOperand(1).getReg(), ArgTy}, - {MI.getOperand(2).getReg(), ArgTy}}); + auto Status = createLibcall(MIRBuilder, Libcall, {RetVal, RetTy}, + {{MI.getOperand(1).getReg(), ArgTy}, + {MI.getOperand(2).getReg(), ArgTy}}); if (Status != LegalizerHelper::Legalized) return false; @@ -149,8 +276,76 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, MIRBuilder.buildUnmerge( {MRI.createGenericVirtualRegister(LLT::scalar(32)), OriginalResult}, RetVal); + break; + } + case G_FCMP: { + assert(MRI.getType(MI.getOperand(2).getReg()) == + MRI.getType(MI.getOperand(3).getReg()) && + "Mismatched operands for G_FCMP"); + auto OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); + + auto OriginalResult = MI.getOperand(0).getReg(); + auto Predicate = + static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); + auto Libcalls = getFCmpLibcalls(Predicate, OpSize); + + if (Libcalls.empty()) { + assert((Predicate == CmpInst::FCMP_TRUE || + Predicate == CmpInst::FCMP_FALSE) && + "Predicate needs libcalls, but none specified"); + MIRBuilder.buildConstant(OriginalResult, + Predicate == CmpInst::FCMP_TRUE ? 1 : 0); + MI.eraseFromParent(); + return true; + } + + auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + assert((OpSize == 32 || OpSize == 64) && "Unsupported operand size"); + auto *ArgTy = OpSize == 32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx); + auto *RetTy = Type::getInt32Ty(Ctx); + + SmallVector<unsigned, 2> Results; + for (auto Libcall : Libcalls) { + auto LibcallResult = MRI.createGenericVirtualRegister(LLT::scalar(32)); + auto Status = + createLibcall(MIRBuilder, Libcall.LibcallID, {LibcallResult, RetTy}, + {{MI.getOperand(2).getReg(), ArgTy}, + {MI.getOperand(3).getReg(), ArgTy}}); + + if (Status != LegalizerHelper::Legalized) + return false; - return LegalizerHelper::Legalized; + auto ProcessedResult = + Libcalls.size() == 1 + ? OriginalResult + : MRI.createGenericVirtualRegister(MRI.getType(OriginalResult)); + + // We have a result, but we need to transform it into a proper 1-bit 0 or + // 1, taking into account the different peculiarities of the values + // returned by the comparison functions. + CmpInst::Predicate ResultPred = Libcall.Predicate; + if (ResultPred == CmpInst::BAD_ICMP_PREDICATE) { + // We have a nice 0 or 1, and we just need to truncate it back to 1 bit + // to keep the types consistent. + MIRBuilder.buildTrunc(ProcessedResult, LibcallResult); + } else { + // We need to compare against 0. + assert(CmpInst::isIntPredicate(ResultPred) && "Unsupported predicate"); + auto Zero = MRI.createGenericVirtualRegister(LLT::scalar(32)); + MIRBuilder.buildConstant(Zero, 0); + MIRBuilder.buildICmp(ResultPred, ProcessedResult, LibcallResult, Zero); + } + Results.push_back(ProcessedResult); + } + + if (Results.size() != 1) { + assert(Results.size() == 2 && "Unexpected number of results"); + MIRBuilder.buildOr(OriginalResult, Results[0], Results[1]); + } + break; } } + + MI.eraseFromParent(); + return true; } diff --git a/lib/Target/ARM/ARMLegalizerInfo.h b/lib/Target/ARM/ARMLegalizerInfo.h index a9bdd367737e5..78ab9412c04ba 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.h +++ b/lib/Target/ARM/ARMLegalizerInfo.h @@ -14,7 +14,10 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H #define LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H +#include "llvm/ADT/IndexedMap.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/IR/Instructions.h" namespace llvm { @@ -27,6 +30,36 @@ public: bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const override; + +private: + void setFCmpLibcallsGNU(); + void setFCmpLibcallsAEABI(); + + struct FCmpLibcallInfo { + // Which libcall this is. + RTLIB::Libcall LibcallID; + + // The predicate to be used when comparing the value returned by the + // function with a relevant constant (currently hard-coded to zero). This is + // necessary because often the libcall will return e.g. a value greater than + // 0 to represent 'true' and anything negative to represent 'false', or + // maybe 0 to represent 'true' and non-zero for 'false'. If no comparison is + // needed, this should be CmpInst::BAD_ICMP_PREDICATE. + CmpInst::Predicate Predicate; + }; + using FCmpLibcallsList = SmallVector<FCmpLibcallInfo, 2>; + + // Map from each FCmp predicate to the corresponding libcall infos. A FCmp + // instruction may be lowered to one or two libcalls, which is why we need a + // list. If two libcalls are needed, their results will be OR'ed. + using FCmpLibcallsMapTy = IndexedMap<FCmpLibcallsList>; + + FCmpLibcallsMapTy FCmp32Libcalls; + FCmpLibcallsMapTy FCmp64Libcalls; + + // Get the libcall(s) corresponding to \p Predicate for operands of \p Size + // bits. + FCmpLibcallsList getFCmpLibcalls(CmpInst::Predicate, unsigned Size) const; }; } // End llvm namespace. #endif diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp index 11fb81a4f9fea..c0c09e8c15afd 100644 --- a/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -212,8 +212,6 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - unsigned NumOperands = MI.getNumOperands(); const ValueMapping *OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx]; @@ -236,26 +234,31 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx]; break; case G_LOAD: - case G_STORE: + case G_STORE: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); OperandsMapping = Ty.getSizeInBits() == 64 ? getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx], &ARM::ValueMappings[ARM::GPR3OpsIdx]}) : &ARM::ValueMappings[ARM::GPR3OpsIdx]; break; - case G_FADD: + } + case G_FADD: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); assert((Ty.getSizeInBits() == 32 || Ty.getSizeInBits() == 64) && "Unsupported size for G_FADD"); OperandsMapping = Ty.getSizeInBits() == 64 ? &ARM::ValueMappings[ARM::DPR3OpsIdx] : &ARM::ValueMappings[ARM::SPR3OpsIdx]; break; + } case G_CONSTANT: case G_FRAME_INDEX: OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); break; case G_SELECT: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); LLT Ty2 = MRI.getType(MI.getOperand(1).getReg()); (void)Ty2; assert(Ty.getSizeInBits() == 32 && "Unsupported size for G_SELECT"); @@ -277,9 +280,29 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { &ARM::ValueMappings[ARM::GPR3OpsIdx]}); break; } + case G_FCMP: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + LLT Ty1 = MRI.getType(MI.getOperand(2).getReg()); + LLT Ty2 = MRI.getType(MI.getOperand(3).getReg()); + (void)Ty2; + assert(Ty.getSizeInBits() == 1 && "Unsupported size for G_FCMP"); + assert(Ty1.getSizeInBits() == Ty2.getSizeInBits() && + "Mismatched operand sizes for G_FCMP"); + + unsigned Size = Ty1.getSizeInBits(); + assert((Size == 32 || Size == 64) && "Unsupported size for G_FCMP"); + + auto FPRValueMapping = Size == 32 ? &ARM::ValueMappings[ARM::SPR3OpsIdx] + : &ARM::ValueMappings[ARM::DPR3OpsIdx]; + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr, + FPRValueMapping, FPRValueMapping}); + break; + } case G_MERGE_VALUES: { // We only support G_MERGE_VALUES for creating a double precision floating // point value out of two GPRs. + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); if (Ty.getSizeInBits() != 64 || Ty1.getSizeInBits() != 32 || @@ -294,6 +317,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case G_UNMERGE_VALUES: { // We only support G_UNMERGE_VALUES for splitting a double precision // floating point value into two GPRs. + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 32 || diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 8eb9dbf5f9de6..51b0fedd2b54f 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -15,6 +15,24 @@ using namespace llvm; #define DEBUG_TYPE "armtti" +bool ARMTTIImpl::areInlineCompatible(const Function *Caller, + const Function *Callee) const { + const TargetMachine &TM = getTLI()->getTargetMachine(); + const FeatureBitset &CallerBits = + TM.getSubtargetImpl(*Caller)->getFeatureBits(); + const FeatureBitset &CalleeBits = + TM.getSubtargetImpl(*Callee)->getFeatureBits(); + + // To inline a callee, all features not in the whitelist must match exactly. + bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) == + (CalleeBits & ~InlineFeatureWhitelist); + // For features in the whitelist, the callee's features must be a subset of + // the callers'. + bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) == + (CalleeBits & InlineFeatureWhitelist); + return MatchExact && MatchSubset; +} + int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index 8a1a378638779..0695a4e633467 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -33,6 +33,39 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> { const ARMSubtarget *ST; const ARMTargetLowering *TLI; + // Currently the following features are excluded from InlineFeatureWhitelist. + // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureVFPOnlySP, FeatureD16 + // Depending on whether they are set or unset, different + // instructions/registers are available. For example, inlining a callee with + // -thumb-mode in a caller with +thumb-mode, may cause the assembler to + // fail if the callee uses ARM only instructions, e.g. in inline asm. + const FeatureBitset InlineFeatureWhitelist = { + ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2, + ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8, + ARM::FeatureFullFP16, ARM::FeatureHWDivThumb, + ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex, + ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc, + ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt, + ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS, + ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing, + ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32, + ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR, + ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits, + ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg, + ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx, + ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs, + ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign, + ARM::FeatureHasSlowFPVMLx, ARM::FeatureVMLxForwarding, + ARM::FeaturePref32BitThumb, ARM::FeatureAvoidPartialCPSR, + ARM::FeatureCheapPredicableCPSR, ARM::FeatureAvoidMOVsShOp, + ARM::FeatureHasRetAddrStack, ARM::FeatureHasNoBranchPredictor, + ARM::FeatureDSP, ARM::FeatureMP, ARM::FeatureVirtualization, + ARM::FeatureMClass, ARM::FeatureRClass, ARM::FeatureAClass, + ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, ARM::FeatureLongCalls, + ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt, + ARM::FeatureNoNegativeImmediates + }; + const ARMSubtarget *getST() const { return ST; } const ARMTargetLowering *getTLI() const { return TLI; } @@ -41,6 +74,9 @@ public: : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const; + bool enableInterleavedAccessVectorization() { return true; } /// Floating-point computation using ARMv8 AArch32 Advanced diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 891b5c60e1fd6..1129826f21f64 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -5249,6 +5249,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { // Fall though for the Identifier case that is not a register or a // special name. + LLVM_FALLTHROUGH; } case AsmToken::LParen: // parenthesized expressions like (_strcmp-4) case AsmToken::Integer: // things like 1f and 2b as a branch targets @@ -8992,6 +8993,8 @@ unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst, return PlainMatchResult; } +std::string ARMMnemonicSpellCheck(StringRef S, uint64_t FBS); + static const char *getSubtargetFeatureName(uint64_t Val); bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, @@ -9085,9 +9088,13 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(ErrorLoc, "invalid operand for instruction"); } - case Match_MnemonicFail: - return Error(IDLoc, "invalid instruction", + case Match_MnemonicFail: { + uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); + std::string Suggestion = ARMMnemonicSpellCheck( + ((ARMOperand &)*Operands[0]).getToken(), FBS); + return Error(IDLoc, "invalid instruction" + Suggestion, ((ARMOperand &)*Operands[0]).getLocRange()); + } case Match_RequiresNotITBlock: return Error(IDLoc, "flag setting instruction only valid outside IT block"); case Match_RequiresITBlock: diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 22de728fe06e1..a77df7a2598f4 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -361,9 +361,8 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf, unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, uint64_t Value, - bool IsPCRel, MCContext &Ctx, - bool IsLittleEndian, - bool IsResolved) const { + bool IsResolved, MCContext &Ctx, + bool IsLittleEndian) const { unsigned Kind = Fixup.getKind(); // MachO tries to make .o files that look vaguely pre-linked, so for MOVW/MOVT @@ -392,7 +391,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, case FK_SecRel_4: return Value; case ARM::fixup_arm_movt_hi16: - if (!IsPCRel) + if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF()) Value >>= 16; LLVM_FALLTHROUGH; case ARM::fixup_arm_movw_lo16: { @@ -404,7 +403,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, return Value; } case ARM::fixup_t2_movt_hi16: - if (!IsPCRel) + if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF()) Value >>= 16; LLVM_FALLTHROUGH; case ARM::fixup_t2_movw_lo16: { @@ -885,11 +884,11 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) { void ARMAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, uint64_t Value, - bool IsPCRel) const { + bool IsResolved) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); MCContext &Ctx = Asm.getContext(); - Value = adjustFixupValue(Asm, Fixup, Target, Value, IsPCRel, Ctx, - IsLittleEndian, true); + Value = adjustFixupValue(Asm, Fixup, Target, Value, IsResolved, Ctx, + IsLittleEndian); if (!Value) return; // Doesn't change encoding. diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h index 84b54bbb9a49b..02374966dafe7 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -42,13 +42,13 @@ public: const MCValue &Target) override; unsigned adjustFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, - const MCValue &Target, uint64_t Value, bool IsPCRel, - MCContext &Ctx, bool IsLittleEndian, - bool IsResolved) const; + const MCValue &Target, uint64_t Value, + bool IsResolved, MCContext &Ctx, + bool IsLittleEndian) const; void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsResolved) const override; unsigned getRelaxedOpcode(unsigned Op) const; diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 0b6574c37de12..5709b4e617987 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -236,7 +236,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, case ARM::R12: if (STI.splitFramePushPop(MF)) break; - // fallthough + LLVM_FALLTHROUGH; case ARM::R0: case ARM::R1: case ARM::R2: diff --git a/lib/Target/AVR/AVRAsmPrinter.cpp b/lib/Target/AVR/AVRAsmPrinter.cpp index f0c7b11895b4a..c058c9e1f5348 100644 --- a/lib/Target/AVR/AVRAsmPrinter.cpp +++ b/lib/Target/AVR/AVRAsmPrinter.cpp @@ -149,7 +149,10 @@ bool AVRAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, (void)MO; assert(MO.isReg() && "Unexpected inline asm memory operand"); - // TODO: We can look up the alternative name for the register if it's given. + // TODO: We should be able to look up the alternative name for + // the register if it's given. + // TableGen doesn't expose a way of getting retrieving names + // for registers. if (MI->getOperand(OpNum).getReg() == AVR::R31R30) { O << "Z"; } else { diff --git a/lib/Target/AVR/AVRDevices.td b/lib/Target/AVR/AVRDevices.td index 9224af613d148..62def45744372 100644 --- a/lib/Target/AVR/AVRDevices.td +++ b/lib/Target/AVR/AVRDevices.td @@ -6,7 +6,6 @@ // :TODO: We define all devices with SRAM to have all variants of LD/ST/LDD/STD. // In reality, avr1 (no SRAM) has one variant each of `LD` and `ST`. // avr2 (with SRAM) adds the rest of the variants. -// :TODO: s/AVRTiny/Tiny // A feature set aggregates features, grouping them. We don't want to create a @@ -136,7 +135,7 @@ def ELFArchAVR4 : ELFArch<"EF_AVR_ARCH_AVR4">; def ELFArchAVR5 : ELFArch<"EF_AVR_ARCH_AVR5">; def ELFArchAVR51 : ELFArch<"EF_AVR_ARCH_AVR51">; def ELFArchAVR6 : ELFArch<"EF_AVR_ARCH_AVR6">; -def ELFArchAVRTiny : ELFArch<"EF_AVR_ARCH_AVRTINY">; +def ELFArchTiny : ELFArch<"EF_AVR_ARCH_AVRTINY">; def ELFArchXMEGA1 : ELFArch<"EF_AVR_ARCH_XMEGA1">; def ELFArchXMEGA2 : ELFArch<"EF_AVR_ARCH_XMEGA2">; def ELFArchXMEGA3 : ELFArch<"EF_AVR_ARCH_XMEGA3">; @@ -189,7 +188,7 @@ def FamilyAVR51 : Family<"avr51", def FamilyAVR6 : Family<"avr6", [FamilyAVR51]>; -def FamilyAVRTiny : Family<"avrtiny", +def FamilyTiny : Family<"avrtiny", [FamilyAVR0, FeatureBREAK, FeatureSRAM, FeatureTinyEncoding]>; @@ -240,7 +239,7 @@ def : Device<"avrxmega4", FamilyXMEGA, ELFArchXMEGA4>; def : Device<"avrxmega5", FamilyXMEGA, ELFArchXMEGA5>; def : Device<"avrxmega6", FamilyXMEGA, ELFArchXMEGA6>; def : Device<"avrxmega7", FamilyXMEGA, ELFArchXMEGA7>; -def : Device<"avrtiny", FamilyAVRTiny, ELFArchAVRTiny>; +def : Device<"avrtiny", FamilyTiny, ELFArchTiny>; // Specific MCUs def : Device<"at90s1200", FamilyAVR0, ELFArchAVR1>; @@ -480,12 +479,12 @@ def : Device<"atxmega384d3", FamilyXMEGA, ELFArchXMEGA6>; def : Device<"atxmega128a1", FamilyXMEGA, ELFArchXMEGA7>; def : Device<"atxmega128a1u", FamilyXMEGAU, ELFArchXMEGA7>; def : Device<"atxmega128a4u", FamilyXMEGAU, ELFArchXMEGA7>; -def : Device<"attiny4", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny5", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny9", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny10", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny20", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny40", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny102", FamilyAVRTiny, ELFArchAVRTiny>; -def : Device<"attiny104", FamilyAVRTiny, ELFArchAVRTiny>; +def : Device<"attiny4", FamilyTiny, ELFArchTiny>; +def : Device<"attiny5", FamilyTiny, ELFArchTiny>; +def : Device<"attiny9", FamilyTiny, ELFArchTiny>; +def : Device<"attiny10", FamilyTiny, ELFArchTiny>; +def : Device<"attiny20", FamilyTiny, ELFArchTiny>; +def : Device<"attiny40", FamilyTiny, ELFArchTiny>; +def : Device<"attiny102", FamilyTiny, ELFArchTiny>; +def : Device<"attiny104", FamilyTiny, ELFArchTiny>; diff --git a/lib/Target/AVR/AVRInstrInfo.cpp b/lib/Target/AVR/AVRInstrInfo.cpp index afba66b2e69bb..744aa723c416c 100644 --- a/lib/Target/AVR/AVRInstrInfo.cpp +++ b/lib/Target/AVR/AVRInstrInfo.cpp @@ -402,7 +402,7 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const { - assert(!BytesAdded && "code size not handled"); + if (BytesAdded) *BytesAdded = 0; // Shouldn't be a fall through. assert(TBB && "insertBranch must not be told to insert a fallthrough"); @@ -411,19 +411,24 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB, if (Cond.empty()) { assert(!FBB && "Unconditional branch with multiple successors!"); - BuildMI(&MBB, DL, get(AVR::RJMPk)).addMBB(TBB); + auto &MI = *BuildMI(&MBB, DL, get(AVR::RJMPk)).addMBB(TBB); + if (BytesAdded) + *BytesAdded += getInstSizeInBytes(MI); return 1; } // Conditional branch. unsigned Count = 0; AVRCC::CondCodes CC = (AVRCC::CondCodes)Cond[0].getImm(); - BuildMI(&MBB, DL, getBrCond(CC)).addMBB(TBB); + auto &CondMI = *BuildMI(&MBB, DL, getBrCond(CC)).addMBB(TBB); + + if (BytesAdded) *BytesAdded += getInstSizeInBytes(CondMI); ++Count; if (FBB) { // Two-way Conditional branch. Insert the second branch. - BuildMI(&MBB, DL, get(AVR::RJMPk)).addMBB(FBB); + auto &MI = *BuildMI(&MBB, DL, get(AVR::RJMPk)).addMBB(FBB); + if (BytesAdded) *BytesAdded += getInstSizeInBytes(MI); ++Count; } @@ -432,7 +437,7 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB, unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const { - assert(!BytesRemoved && "code size not handled"); + if (BytesRemoved) *BytesRemoved = 0; MachineBasicBlock::iterator I = MBB.end(); unsigned Count = 0; @@ -450,6 +455,7 @@ unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB, } // Remove the branch. + if (BytesRemoved) *BytesRemoved += getInstSizeInBytes(*I); I->eraseFromParent(); I = MBB.end(); ++Count; @@ -494,5 +500,61 @@ unsigned AVRInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { } } +MachineBasicBlock * +AVRInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + default: + llvm_unreachable("unexpected opcode!"); + case AVR::JMPk: + case AVR::CALLk: + case AVR::RCALLk: + case AVR::RJMPk: + case AVR::BREQk: + case AVR::BRNEk: + case AVR::BRSHk: + case AVR::BRLOk: + case AVR::BRMIk: + case AVR::BRPLk: + case AVR::BRGEk: + case AVR::BRLTk: + return MI.getOperand(0).getMBB(); + case AVR::BRBSsk: + case AVR::BRBCsk: + return MI.getOperand(1).getMBB(); + case AVR::SBRCRrB: + case AVR::SBRSRrB: + case AVR::SBICAb: + case AVR::SBISAb: + llvm_unreachable("unimplemented branch instructions"); + } +} + +bool AVRInstrInfo::isBranchOffsetInRange(unsigned BranchOp, + int64_t BrOffset) const { + + switch (BranchOp) { + default: + llvm_unreachable("unexpected opcode!"); + case AVR::JMPk: + case AVR::CALLk: + assert(BrOffset >= 0 && "offset must be absolute address"); + return isUIntN(16, BrOffset); + case AVR::RCALLk: + case AVR::RJMPk: + return isIntN(13, BrOffset); + case AVR::BRBSsk: + case AVR::BRBCsk: + case AVR::BREQk: + case AVR::BRNEk: + case AVR::BRSHk: + case AVR::BRLOk: + case AVR::BRMIk: + case AVR::BRPLk: + case AVR::BRGEk: + case AVR::BRLTk: + return isIntN(7, BrOffset); + } +} + } // end of namespace llvm diff --git a/lib/Target/AVR/AVRInstrInfo.h b/lib/Target/AVR/AVRInstrInfo.h index c5105dafe5eb5..f42d34fb28480 100644 --- a/lib/Target/AVR/AVRInstrInfo.h +++ b/lib/Target/AVR/AVRInstrInfo.h @@ -103,6 +103,10 @@ public: bool reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; + MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override; + + bool isBranchOffsetInRange(unsigned BranchOpc, + int64_t BrOffset) const override; private: const AVRRegisterInfo RI; }; diff --git a/lib/Target/AVR/AVRInstrInfo.td b/lib/Target/AVR/AVRInstrInfo.td index 5dd8b2c27b212..184e4d53f7c8f 100644 --- a/lib/Target/AVR/AVRInstrInfo.td +++ b/lib/Target/AVR/AVRInstrInfo.td @@ -1411,17 +1411,11 @@ hasSideEffects = 0 in def LPMRdZ : FLPMX<0, 0, (outs GPR8:$dst), - (ins ZREGS:$z), + (ins ZREG:$z), "lpm\t$dst, $z", []>, Requires<[HasLPMX]>; - def LPMWRdZ : Pseudo<(outs DREGS:$dst), - (ins ZREGS:$z), - "lpmw\t$dst, $z", - []>, - Requires<[HasLPMX]>; - // Load program memory, while postincrementing the Z register. let mayLoad = 1, Defs = [R31R30] in @@ -1429,13 +1423,19 @@ hasSideEffects = 0 in def LPMRdZPi : FLPMX<0, 1, (outs GPR8:$dst), - (ins ZREGS:$z), + (ins ZREG:$z), "lpm\t$dst, $z+", []>, Requires<[HasLPMX]>; + def LPMWRdZ : Pseudo<(outs DREGS:$dst), + (ins ZREG:$z), + "lpmw\t$dst, $z", + []>, + Requires<[HasLPMX]>; + def LPMWRdZPi : Pseudo<(outs DREGS:$dst), - (ins ZREGS:$z), + (ins ZREG:$z), "lpmw\t$dst, $z+", []>, Requires<[HasLPMX]>; @@ -1458,7 +1458,7 @@ hasSideEffects = 0 in def ELPMRdZ : FLPMX<1, 0, (outs GPR8:$dst), - (ins ZREGS:$z), + (ins ZREG:$z), "elpm\t$dst, $z", []>, Requires<[HasELPMX]>; @@ -1467,7 +1467,7 @@ hasSideEffects = 0 in def ELPMRdZPi : FLPMX<1, 1, (outs GPR8:$dst), - (ins ZREGS: $z), + (ins ZREG: $z), "elpm\t$dst, $z+", []>, Requires<[HasELPMX]>; @@ -1487,7 +1487,7 @@ let Uses = [R1, R0] in let Defs = [R31R30] in def SPMZPi : F16<0b1001010111111000, (outs), - (ins ZREGS:$z), + (ins ZREG:$z), "spm $z+", []>, Requires<[HasSPMX]>; @@ -1564,28 +1564,28 @@ hasSideEffects = 0 in // Read-Write-Modify (RMW) instructions. def XCHZRd : FZRd<0b100, (outs GPR8:$rd), - (ins ZREGS:$z), + (ins ZREG:$z), "xch\t$z, $rd", []>, Requires<[SupportsRMW]>; def LASZRd : FZRd<0b101, (outs GPR8:$rd), - (ins ZREGS:$z), + (ins ZREG:$z), "las\t$z, $rd", []>, Requires<[SupportsRMW]>; def LACZRd : FZRd<0b110, (outs GPR8:$rd), - (ins ZREGS:$z), + (ins ZREG:$z), "lac\t$z, $rd", []>, Requires<[SupportsRMW]>; def LATZRd : FZRd<0b111, (outs GPR8:$rd), - (ins ZREGS:$z), + (ins ZREG:$z), "lat\t$z, $rd", []>, Requires<[SupportsRMW]>; diff --git a/lib/Target/AVR/AVRMCInstLower.cpp b/lib/Target/AVR/AVRMCInstLower.cpp index 475dda420e892..dfefd09bc4b86 100644 --- a/lib/Target/AVR/AVRMCInstLower.cpp +++ b/lib/Target/AVR/AVRMCInstLower.cpp @@ -37,10 +37,22 @@ MCOperand AVRMCInstLower::lowerSymbolOperand(const MachineOperand &MO, Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); } + bool IsFunction = MO.isGlobal() && isa<Function>(MO.getGlobal()); + if (TF & AVRII::MO_LO) { - Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_LO8, Expr, IsNegated, Ctx); + if (IsFunction) { + // N.B. Should we use _GS fixups here to cope with >128k progmem? + Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_PM_LO8, Expr, IsNegated, Ctx); + } else { + Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_LO8, Expr, IsNegated, Ctx); + } } else if (TF & AVRII::MO_HI) { - Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_HI8, Expr, IsNegated, Ctx); + if (IsFunction) { + // N.B. Should we use _GS fixups here to cope with >128k progmem? + Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_PM_HI8, Expr, IsNegated, Ctx); + } else { + Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_HI8, Expr, IsNegated, Ctx); + } } else if (TF != 0) { llvm_unreachable("Unknown target flag on symbol operand"); } diff --git a/lib/Target/AVR/AVRRegisterInfo.cpp b/lib/Target/AVR/AVRRegisterInfo.cpp index 55f3f5cf428ac..249dc5512c289 100644 --- a/lib/Target/AVR/AVRRegisterInfo.cpp +++ b/lib/Target/AVR/AVRRegisterInfo.cpp @@ -95,7 +95,8 @@ AVRRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, } /// Fold a frame offset shared between two add instructions into a single one. -static void foldFrameOffset(MachineInstr &MI, int &Offset, unsigned DstReg) { +static void foldFrameOffset(MachineBasicBlock::iterator &II, int &Offset, unsigned DstReg) { + MachineInstr &MI = *II; int Opcode = MI.getOpcode(); // Don't bother trying if the next instruction is not an add or a sub. @@ -120,6 +121,7 @@ static void foldFrameOffset(MachineInstr &MI, int &Offset, unsigned DstReg) { } // Finally remove the instruction. + II++; MI.eraseFromParent(); } @@ -158,6 +160,8 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned DstReg = MI.getOperand(0).getReg(); assert(DstReg != AVR::R29R28 && "Dest reg cannot be the frame pointer"); + II++; // Skip over the FRMIDX (and now MOVW) instruction. + // Generally, to load a frame address two add instructions are emitted that // could get folded into a single one: // movw r31:r30, r29:r28 @@ -166,7 +170,8 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // to: // movw r31:r30, r29:r28 // adiw r31:r30, 45 - foldFrameOffset(*std::next(II), Offset, DstReg); + if (II != MBB.end()) + foldFrameOffset(II, Offset, DstReg); // Select the best opcode based on DstReg and the offset size. switch (DstReg) { @@ -187,7 +192,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } - MachineInstr *New = BuildMI(MBB, std::next(II), dl, TII.get(Opcode), DstReg) + MachineInstr *New = BuildMI(MBB, II, dl, TII.get(Opcode), DstReg) .addReg(DstReg, RegState::Kill) .addImm(Offset); New->getOperand(3).setIsDead(); diff --git a/lib/Target/AVR/AVRRegisterInfo.td b/lib/Target/AVR/AVRRegisterInfo.td index 32650fc66751e..8162f12052be5 100644 --- a/lib/Target/AVR/AVRRegisterInfo.td +++ b/lib/Target/AVR/AVRRegisterInfo.td @@ -110,8 +110,6 @@ CoveredBySubRegs = 1 in // Register Classes //===----------------------------------------------------------------------===// -//:TODO: use proper set instructions instead of using always "add" - // Main 8-bit register class. def GPR8 : RegisterClass<"AVR", [i8], 8, ( @@ -199,14 +197,11 @@ def PTRDISPREGS : RegisterClass<"AVR", [i16], 8, // We have a bunch of instructions with an explicit Z register argument. We // model this using a register class containing only the Z register. -// :TODO: Rename to 'ZREG'. -def ZREGS : RegisterClass<"AVR", [i16], 8, (add R31R30)>; +def ZREG : RegisterClass<"AVR", [i16], 8, (add R31R30)>; // Register class used for the stack read pseudo instruction. def GPRSP: RegisterClass<"AVR", [i16], 8, (add SP)>; -//:TODO: if we remove this we get an error in tablegen -//:TODO: this is just a hack, remove it once add16 works! // Status register. def SREG : AVRReg<14, "FLAGS">, DwarfRegNum<[88]>; def CCR : RegisterClass<"AVR", [i8], 8, (add SREG)> diff --git a/lib/Target/AVR/AVRTargetMachine.cpp b/lib/Target/AVR/AVRTargetMachine.cpp index 91d2a8737b870..a9d61ffc952c3 100644 --- a/lib/Target/AVR/AVRTargetMachine.cpp +++ b/lib/Target/AVR/AVRTargetMachine.cpp @@ -66,6 +66,7 @@ public: bool addInstSelector() override; void addPreSched2() override; + void addPreEmitPass() override; void addPreRegAlloc() override; }; } // namespace @@ -115,4 +116,9 @@ void AVRPassConfig::addPreSched2() { addPass(createAVRExpandPseudoPass()); } +void AVRPassConfig::addPreEmitPass() { + // Must run branch selection immediately preceding the asm printer. + addPass(&BranchRelaxationPassID); +} + } // end of namespace llvm diff --git a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp index cf52e552978f1..5004736365c7b 100644 --- a/lib/Target/AVR/AsmParser/AVRAsmParser.cpp +++ b/lib/Target/AVR/AsmParser/AVRAsmParser.cpp @@ -466,6 +466,7 @@ bool AVRAsmParser::parseOperand(OperandVector &Operands) { if (!tryParseRegisterOperand(Operands)) { return false; } + LLVM_FALLTHROUGH; case AsmToken::LParen: case AsmToken::Integer: case AsmToken::Dot: diff --git a/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp b/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp index 316b7836df0d7..0f34b8e18ff96 100644 --- a/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp +++ b/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp @@ -106,7 +106,7 @@ void AVRInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, if (Op.isReg()) { bool isPtrReg = (MOI.RegClass == AVR::PTRREGSRegClassID) || (MOI.RegClass == AVR::PTRDISPREGSRegClassID) || - (MOI.RegClass == AVR::ZREGSRegClassID); + (MOI.RegClass == AVR::ZREGRegClassID); if (isPtrReg) { O << getRegisterName(Op.getReg(), AVR::ptr); diff --git a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp index 1e61eccf775f5..6d126ed622aa1 100644 --- a/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp +++ b/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp @@ -33,7 +33,7 @@ static unsigned getEFlagsForFeatureSet(const FeatureBitset &Features) { EFlags |= ELF::EF_AVR_ARCH_AVR51; else if (Features[AVR::ELFArchAVR6]) EFlags |= ELF::EF_AVR_ARCH_AVR6; - else if (Features[AVR::ELFArchAVRTiny]) + else if (Features[AVR::ELFArchTiny]) EFlags |= ELF::EF_AVR_ARCH_AVRTINY; else if (Features[AVR::ELFArchXMEGA1]) EFlags |= ELF::EF_AVR_ARCH_XMEGA1; diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp index 15e89fb2a2611..9fc812cdef14f 100644 --- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp +++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp @@ -29,7 +29,7 @@ public: void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsResolved) const override; MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; @@ -65,7 +65,7 @@ bool BPFAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { void BPFAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, uint64_t Value, - bool IsPCRel) const { + bool IsResolved) const { if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) { assert(Value == 0); } else if (Fixup.getKind() == FK_Data_4 || Fixup.getKind() == FK_Data_8) { diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index c19e636d79ca2..d901abbd16925 100644 --- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -1413,6 +1413,7 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, // Translate a "$Rx = CONST32(#imm)" to "$Rx = memw(gp+#LABEL) " case Hexagon::CONST32: is32bit = true; + LLVM_FALLTHROUGH; // Translate a "$Rx:y = CONST64(#imm)" to "$Rx:y = memd(gp+#LABEL) " case Hexagon::CONST64: // FIXME: need better way to detect AsmStreamer (upstream removed getKind()) diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp index 14c682c6df4bc..b064778c4bbd3 100644 --- a/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -1947,8 +1947,10 @@ bool BitSimplification::genStoreImmediate(MachineInstr *MI) { switch (Opc) { case Hexagon::S2_storeri_io: Align++; + LLVM_FALLTHROUGH; case Hexagon::S2_storerh_io: Align++; + LLVM_FALLTHROUGH; case Hexagon::S2_storerb_io: break; default: diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp index 730026121d3be..3de5310882409 100644 --- a/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -937,6 +937,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI, case Hexagon::J2_jumpfnew: case Hexagon::J2_jumpfnewpt: Negated = true; + LLVM_FALLTHROUGH; case Hexagon::J2_jumpt: case Hexagon::J2_jumptpt: case Hexagon::J2_jumptnew: diff --git a/lib/Target/Hexagon/HexagonConstPropagation.cpp b/lib/Target/Hexagon/HexagonConstPropagation.cpp index aa68f6cfdfc11..49ddd6961f8a9 100644 --- a/lib/Target/Hexagon/HexagonConstPropagation.cpp +++ b/lib/Target/Hexagon/HexagonConstPropagation.cpp @@ -2244,6 +2244,7 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &BrI, case Hexagon::J2_jumpfnew: case Hexagon::J2_jumpfnewpt: Negated = true; + LLVM_FALLTHROUGH; case Hexagon::J2_jumpt: case Hexagon::J2_jumptnew: case Hexagon::J2_jumptnewpt: diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 97a53dcbaed72..c790579ccebc0 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -979,18 +979,6 @@ bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const { if (MFI.hasCalls() || HMFI.hasClobberLR()) return true; - // Frame pointer elimination is a possiblility at this point, but - // to know if FP is necessary we need to know if spill/restore - // functions will be used (they require FP to be valid). - // This means that hasFP shouldn't really be called before CSI is - // calculated, and some measures are taken to make sure of that - // (e.g. default implementations of virtual functions that call it - // are overridden apropriately). - assert(MFI.isCalleeSavedInfoValid() && "Need to know CSI"); - const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); - if (useSpillFunction(MF, CSI) || useRestoreFunction(MF, CSI)) - return true; - return false; } @@ -2437,6 +2425,8 @@ bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF, const CSIVect &CSI) const { if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn()) return true; + if (!hasFP(MF)) + return true; if (!isOptSize(MF) && !isMinSize(MF)) if (MF.getTarget().getOptLevel() > CodeGenOpt::Default) return true; diff --git a/lib/Target/Hexagon/HexagonGenPredicate.cpp b/lib/Target/Hexagon/HexagonGenPredicate.cpp index f14c733dcf511..3470480d607dc 100644 --- a/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -334,6 +334,7 @@ bool HexagonGenPredicate::isScalarPred(Register PredReg) { if (MRI->getRegClass(PR.R) != PredRC) return false; // If it is a copy between two predicate registers, fall through. + LLVM_FALLTHROUGH; } case Hexagon::C2_and: case Hexagon::C2_andn: diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index e5f49ca77a912..0163b2e2bdc46 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -241,22 +241,31 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) { case MVT::v32i16: case MVT::v16i32: case MVT::v8i64: - if (isAlignedMemNode(LD)) - Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai; - else + if (isAlignedMemNode(LD)) { + if (LD->isNonTemporal()) + Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi : Hexagon::V6_vL32b_nt_ai; + else + Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai; + } else { Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi : Hexagon::V6_vL32Ub_ai; + } break; // 128B case MVT::v128i8: case MVT::v64i16: case MVT::v32i32: case MVT::v16i64: - if (isAlignedMemNode(LD)) - Opcode = IsValidInc ? Hexagon::V6_vL32b_pi_128B - : Hexagon::V6_vL32b_ai_128B; - else + if (isAlignedMemNode(LD)) { + if (LD->isNonTemporal()) + Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi_128B + : Hexagon::V6_vL32b_nt_ai_128B; + else + Opcode = IsValidInc ? Hexagon::V6_vL32b_pi_128B + : Hexagon::V6_vL32b_ai_128B; + } else { Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi_128B : Hexagon::V6_vL32Ub_ai_128B; + } break; default: llvm_unreachable("Unexpected memory type in indexed load"); @@ -529,22 +538,31 @@ void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) { case MVT::v32i16: case MVT::v16i32: case MVT::v8i64: - if (isAlignedMemNode(ST)) - Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai; - else + if (isAlignedMemNode(ST)) { + if (ST->isNonTemporal()) + Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi : Hexagon::V6_vS32b_nt_ai; + else + Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai; + } else { Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi : Hexagon::V6_vS32Ub_ai; + } break; // 128B case MVT::v128i8: case MVT::v64i16: case MVT::v32i32: case MVT::v16i64: - if (isAlignedMemNode(ST)) - Opcode = IsValidInc ? Hexagon::V6_vS32b_pi_128B - : Hexagon::V6_vS32b_ai_128B; - else + if (isAlignedMemNode(ST)) { + if (ST->isNonTemporal()) + Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi_128B + : Hexagon::V6_vS32b_nt_ai_128B; + else + Opcode = IsValidInc ? Hexagon::V6_vS32b_pi_128B + : Hexagon::V6_vS32b_ai_128B; + } else { Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi_128B : Hexagon::V6_vS32Ub_ai_128B; + } break; default: llvm_unreachable("Unexpected memory type in indexed store"); diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 2daacf7955559..67242764d4531 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -716,6 +716,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); auto PtrVT = getPointerTy(MF.getDataLayout()); // Check for varargs. @@ -832,7 +833,6 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (NeedsArgAlign && Subtarget.hasV60TOps()) { DEBUG(dbgs() << "Function needs byte stack align due to call args\n"); - MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); // V6 vectors passed by value have 64 or 128 byte alignment depending // on whether we are 64 byte vector mode or 128 byte. bool UseHVXDbl = Subtarget.useHVXDblOps(); @@ -916,10 +916,15 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ops.push_back(Glue); if (IsTailCall) { - MF.getFrameInfo().setHasTailCall(); + MFI.setHasTailCall(); return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops); } + // Set this here because we need to know this for "hasFP" in frame lowering. + // The target-independent code calls getFrameRegister before setting it, and + // getFrameRegister uses hasFP to determine whether the function has FP. + MFI.setHasCalls(true); + unsigned OpCode = DoesNotReturn ? HexagonISD::CALLnr : HexagonISD::CALL; Chain = DAG.getNode(OpCode, dl, NodeTys, Ops); Glue = Chain.getValue(1); @@ -1284,11 +1289,9 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { // Creates a SPLAT instruction for a constant value VAL. static SDValue createSplat(SelectionDAG &DAG, const SDLoc &dl, EVT VT, SDValue Val) { - if (VT.getSimpleVT() == MVT::v4i8) - return DAG.getNode(HexagonISD::VSPLATB, dl, VT, Val); - - if (VT.getSimpleVT() == MVT::v4i16) - return DAG.getNode(HexagonISD::VSPLATH, dl, VT, Val); + EVT T = VT.getVectorElementType(); + if (T == MVT::i8 || T == MVT::i16) + return DAG.getNode(HexagonISD::VSPLAT, dl, VT, Val); return SDValue(); } @@ -2296,32 +2299,13 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::JT: return "HexagonISD::JT"; case HexagonISD::PACKHL: return "HexagonISD::PACKHL"; case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; - case HexagonISD::SHUFFEB: return "HexagonISD::SHUFFEB"; - case HexagonISD::SHUFFEH: return "HexagonISD::SHUFFEH"; - case HexagonISD::SHUFFOB: return "HexagonISD::SHUFFOB"; - case HexagonISD::SHUFFOH: return "HexagonISD::SHUFFOH"; case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; - case HexagonISD::VCMPBEQ: return "HexagonISD::VCMPBEQ"; - case HexagonISD::VCMPBGT: return "HexagonISD::VCMPBGT"; - case HexagonISD::VCMPBGTU: return "HexagonISD::VCMPBGTU"; - case HexagonISD::VCMPHEQ: return "HexagonISD::VCMPHEQ"; - case HexagonISD::VCMPHGT: return "HexagonISD::VCMPHGT"; - case HexagonISD::VCMPHGTU: return "HexagonISD::VCMPHGTU"; - case HexagonISD::VCMPWEQ: return "HexagonISD::VCMPWEQ"; - case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT"; - case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU"; case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE"; case HexagonISD::VPACK: return "HexagonISD::VPACK"; - case HexagonISD::VSHLH: return "HexagonISD::VSHLH"; - case HexagonISD::VSHLW: return "HexagonISD::VSHLW"; - case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB"; - case HexagonISD::VSPLATH: return "HexagonISD::VSPLATH"; - case HexagonISD::VSRAH: return "HexagonISD::VSRAH"; - case HexagonISD::VSRAW: return "HexagonISD::VSRAW"; - case HexagonISD::VSRLH: return "HexagonISD::VSRLH"; - case HexagonISD::VSRLW: return "HexagonISD::VSRLW"; - case HexagonISD::VSXTBH: return "HexagonISD::VSXTBH"; - case HexagonISD::VSXTBW: return "HexagonISD::VSXTBW"; + case HexagonISD::VASL: return "HexagonISD::VASL"; + case HexagonISD::VASR: return "HexagonISD::VASR"; + case HexagonISD::VLSR: return "HexagonISD::VLSR"; + case HexagonISD::VSPLAT: return "HexagonISD::VSPLAT"; case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE"; case HexagonISD::OP_END: break; } @@ -2503,13 +2487,13 @@ HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const { if (VT.getSimpleVT() == MVT::v4i16) { switch (Op.getOpcode()) { case ISD::SRA: - Result = DAG.getNode(HexagonISD::VSRAH, dl, VT, V3, CommonSplat); + Result = DAG.getNode(HexagonISD::VASR, dl, VT, V3, CommonSplat); break; case ISD::SHL: - Result = DAG.getNode(HexagonISD::VSHLH, dl, VT, V3, CommonSplat); + Result = DAG.getNode(HexagonISD::VASL, dl, VT, V3, CommonSplat); break; case ISD::SRL: - Result = DAG.getNode(HexagonISD::VSRLH, dl, VT, V3, CommonSplat); + Result = DAG.getNode(HexagonISD::VLSR, dl, VT, V3, CommonSplat); break; default: return SDValue(); @@ -2517,13 +2501,13 @@ HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const { } else if (VT.getSimpleVT() == MVT::v2i32) { switch (Op.getOpcode()) { case ISD::SRA: - Result = DAG.getNode(HexagonISD::VSRAW, dl, VT, V3, CommonSplat); + Result = DAG.getNode(HexagonISD::VASR, dl, VT, V3, CommonSplat); break; case ISD::SHL: - Result = DAG.getNode(HexagonISD::VSHLW, dl, VT, V3, CommonSplat); + Result = DAG.getNode(HexagonISD::VASL, dl, VT, V3, CommonSplat); break; case ISD::SRL: - Result = DAG.getNode(HexagonISD::VSRLW, dl, VT, V3, CommonSplat); + Result = DAG.getNode(HexagonISD::VLSR, dl, VT, V3, CommonSplat); break; default: return SDValue(); diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 1415156487c07..bfd2c94eeabaa 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -52,29 +52,10 @@ namespace HexagonISD { COMBINE, PACKHL, - VSPLATB, - VSPLATH, - SHUFFEB, - SHUFFEH, - SHUFFOB, - SHUFFOH, - VSXTBH, - VSXTBW, - VSRAW, - VSRAH, - VSRLW, - VSRLH, - VSHLW, - VSHLH, - VCMPBEQ, - VCMPBGT, - VCMPBGTU, - VCMPHEQ, - VCMPHGT, - VCMPHGTU, - VCMPWEQ, - VCMPWGT, - VCMPWGTU, + VSPLAT, + VASL, + VASR, + VLSR, INSERT, INSERTRP, diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 1eac2d3dd8e22..c77c669f4ca75 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -250,15 +250,19 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, case Hexagon::L2_loadri_io: case Hexagon::L2_loadrd_io: case Hexagon::V6_vL32b_ai: + case Hexagon::V6_vL32b_nt_ai: case Hexagon::V6_vL32b_ai_128B: + case Hexagon::V6_vL32b_nt_ai_128B: case Hexagon::V6_vL32Ub_ai: case Hexagon::V6_vL32Ub_ai_128B: case Hexagon::LDriw_pred: case Hexagon::LDriw_mod: case Hexagon::PS_vloadrq_ai: case Hexagon::PS_vloadrw_ai: + case Hexagon::PS_vloadrw_nt_ai: case Hexagon::PS_vloadrq_ai_128B: - case Hexagon::PS_vloadrw_ai_128B: { + case Hexagon::PS_vloadrw_ai_128B: + case Hexagon::PS_vloadrw_nt_ai_128B: { const MachineOperand OpFI = MI.getOperand(1); if (!OpFI.isFI()) return 0; @@ -1726,6 +1730,39 @@ bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI, return false; } +std::pair<unsigned, unsigned> +HexagonInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const { + return std::make_pair(TF & ~HexagonII::MO_Bitmasks, + TF & HexagonII::MO_Bitmasks); +} + +ArrayRef<std::pair<unsigned, const char*>> +HexagonInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { + using namespace HexagonII; + static const std::pair<unsigned, const char*> Flags[] = { + {MO_PCREL, "hexagon-pcrel"}, + {MO_GOT, "hexagon-got"}, + {MO_LO16, "hexagon-lo16"}, + {MO_HI16, "hexagon-hi16"}, + {MO_GPREL, "hexagon-gprel"}, + {MO_GDGOT, "hexagon-gdgot"}, + {MO_GDPLT, "hexagon-gdplt"}, + {MO_IE, "hexagon-ie"}, + {MO_IEGOT, "hexagon-iegot"}, + {MO_TPREL, "hexagon-tprel"} + }; + return makeArrayRef(Flags); +} + +ArrayRef<std::pair<unsigned, const char*>> +HexagonInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { + using namespace HexagonII; + static const std::pair<unsigned, const char*> Flags[] = { + {HMOTF_ConstExtended, "hexagon-ext"} + }; + return makeArrayRef(Flags); +} + unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const { MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetRegisterClass *TRC; @@ -1797,7 +1834,7 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const { const MachineOperand &MO = MI.getOperand(ExtOpNum); // Use MO operand flags to determine if MO // has the HMOTF_ConstExtended flag set. - if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended) + if (MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended) return true; // If this is a Machine BB address we are talking about, and it is // not marked as extended, say so. @@ -1807,9 +1844,6 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const { // We could be using an instruction with an extendable immediate and shoehorn // a global address into it. If it is a global address it will be constant // extended. We do this for COMBINE. - // We currently only handle isGlobal() because it is the only kind of - // object we are going to end up with here for now. - // In the future we probably should add isSymbol(), etc. if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress() || MO.isJTI() || MO.isCPI() || MO.isFPImm()) return true; @@ -1961,11 +1995,9 @@ bool HexagonInstrInfo::isExtended(const MachineInstr &MI) const { return true; // Use MO operand flags to determine if one of MI's operands // has HMOTF_ConstExtended flag set. - for (MachineInstr::const_mop_iterator I = MI.operands_begin(), - E = MI.operands_end(); I != E; ++I) { - if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended) + for (const MachineOperand &MO : MI.operands()) + if (MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended) return true; - } return false; } @@ -2445,20 +2477,28 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, switch (Opcode) { case Hexagon::PS_vstorerq_ai: case Hexagon::PS_vstorerw_ai: + case Hexagon::PS_vstorerw_nt_ai: case Hexagon::PS_vloadrq_ai: case Hexagon::PS_vloadrw_ai: + case Hexagon::PS_vloadrw_nt_ai: case Hexagon::V6_vL32b_ai: case Hexagon::V6_vS32b_ai: + case Hexagon::V6_vL32b_nt_ai: + case Hexagon::V6_vS32b_nt_ai: case Hexagon::V6_vL32Ub_ai: case Hexagon::V6_vS32Ub_ai: return isShiftedInt<4,6>(Offset); case Hexagon::PS_vstorerq_ai_128B: case Hexagon::PS_vstorerw_ai_128B: + case Hexagon::PS_vstorerw_nt_ai_128B: case Hexagon::PS_vloadrq_ai_128B: case Hexagon::PS_vloadrw_ai_128B: + case Hexagon::PS_vloadrw_nt_ai_128B: case Hexagon::V6_vL32b_ai_128B: case Hexagon::V6_vS32b_ai_128B: + case Hexagon::V6_vL32b_nt_ai_128B: + case Hexagon::V6_vS32b_nt_ai_128B: case Hexagon::V6_vL32Ub_ai_128B: case Hexagon::V6_vS32Ub_ai_128B: return isShiftedInt<4,7>(Offset); @@ -3170,11 +3210,19 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const { return Hexagon::V6_vL32b_cur_pi; case Hexagon::V6_vL32b_ai: return Hexagon::V6_vL32b_cur_ai; + case Hexagon::V6_vL32b_nt_pi: + return Hexagon::V6_vL32b_nt_cur_pi; + case Hexagon::V6_vL32b_nt_ai: + return Hexagon::V6_vL32b_nt_cur_ai; //128B case Hexagon::V6_vL32b_pi_128B: return Hexagon::V6_vL32b_cur_pi_128B; case Hexagon::V6_vL32b_ai_128B: return Hexagon::V6_vL32b_cur_ai_128B; + case Hexagon::V6_vL32b_nt_pi_128B: + return Hexagon::V6_vL32b_nt_cur_pi_128B; + case Hexagon::V6_vL32b_nt_ai_128B: + return Hexagon::V6_vL32b_nt_cur_ai_128B; } return 0; } @@ -3187,11 +3235,19 @@ int HexagonInstrInfo::getNonDotCurOp(const MachineInstr &MI) const { return Hexagon::V6_vL32b_pi; case Hexagon::V6_vL32b_cur_ai: return Hexagon::V6_vL32b_ai; + case Hexagon::V6_vL32b_nt_cur_pi: + return Hexagon::V6_vL32b_nt_pi; + case Hexagon::V6_vL32b_nt_cur_ai: + return Hexagon::V6_vL32b_nt_ai; //128B case Hexagon::V6_vL32b_cur_pi_128B: return Hexagon::V6_vL32b_pi_128B; case Hexagon::V6_vL32b_cur_ai_128B: return Hexagon::V6_vL32b_ai_128B; + case Hexagon::V6_vL32b_nt_cur_pi_128B: + return Hexagon::V6_vL32b_nt_pi_128B; + case Hexagon::V6_vL32b_nt_cur_ai_128B: + return Hexagon::V6_vL32b_nt_ai_128B; } return 0; } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 944d0161a7c8e..0436ce3ac475b 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -301,6 +301,27 @@ public: const MachineInstr &UseMI, unsigned UseIdx) const override; + /// Decompose the machine operand's target flags into two values - the direct + /// target flag value and any of bit flags that are applied. + std::pair<unsigned, unsigned> + decomposeMachineOperandsTargetFlags(unsigned TF) const override; + + /// Return an array that contains the direct target flag values and their + /// names. + /// + /// MIR Serialization is able to serialize only the target flags that are + /// defined by this method. + ArrayRef<std::pair<unsigned, const char *>> + getSerializableDirectMachineOperandTargetFlags() const override; + + /// Return an array that contains the bitmask target flag values and their + /// names. + /// + /// MIR Serialization is able to serialize only the target flags that are + /// defined by this method. + ArrayRef<std::pair<unsigned, const char *>> + getSerializableBitmaskMachineOperandTargetFlags() const override; + bool isTailCall(const MachineInstr &MI) const override; /// HexagonInstrInfo specifics. diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 4602de979024a..1a26805d190d0 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -49,7 +49,7 @@ static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden, using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" namespace { class HexagonCallMutation : public ScheduleDAGMutation { diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td index 689419638f546..ba98b8994937f 100644 --- a/lib/Target/Hexagon/HexagonPatterns.td +++ b/lib/Target/Hexagon/HexagonPatterns.td @@ -2770,6 +2770,9 @@ def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [ multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { // Aligned stores + def : Pat<(alignednontemporalstore (VTSgl VectorRegs:$src1), IntRegs:$addr), + (V6_vS32b_nt_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr), (V6_vS32b_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>, Requires<[UseHVXSgl]>; @@ -2778,6 +2781,9 @@ multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { Requires<[UseHVXSgl]>; // 128B Aligned stores + def : Pat<(alignednontemporalstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), + (V6_vS32b_nt_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr), (V6_vS32b_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>, Requires<[UseHVXDbl]>; @@ -2787,6 +2793,11 @@ multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { // Fold Add R+OFF into vector store. let AddedComplexity = 10 in { + def : Pat<(alignednontemporalstore (VTSgl VectorRegs:$src1), + (add IntRegs:$src2, Iss4_6:$offset)), + (V6_vS32b_nt_ai IntRegs:$src2, Iss4_6:$offset, + (VTSgl VectorRegs:$src1))>, + Requires<[UseHVXSgl]>; def : Pat<(alignedstore (VTSgl VectorRegs:$src1), (add IntRegs:$src2, Iss4_6:$offset)), (V6_vS32b_ai IntRegs:$src2, Iss4_6:$offset, @@ -2799,6 +2810,11 @@ multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { Requires<[UseHVXSgl]>; // Fold Add R+OFF into vector store 128B. + def : Pat<(alignednontemporalstore (VTDbl VectorRegs128B:$src1), + (add IntRegs:$src2, Iss4_7:$offset)), + (V6_vS32b_nt_ai_128B IntRegs:$src2, Iss4_7:$offset, + (VTDbl VectorRegs128B:$src1))>, + Requires<[UseHVXDbl]>; def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), (add IntRegs:$src2, Iss4_7:$offset)), (V6_vS32b_ai_128B IntRegs:$src2, Iss4_7:$offset, @@ -2820,6 +2836,9 @@ defm : vS32b_ai_pats <v8i64, v16i64>; multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { // Aligned loads + def : Pat < (VTSgl (alignednontemporalload IntRegs:$addr)), + (V6_vL32b_nt_ai IntRegs:$addr, 0) >, + Requires<[UseHVXSgl]>; def : Pat < (VTSgl (alignedload IntRegs:$addr)), (V6_vL32b_ai IntRegs:$addr, 0) >, Requires<[UseHVXSgl]>; @@ -2828,6 +2847,9 @@ multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { Requires<[UseHVXSgl]>; // 128B Load + def : Pat < (VTDbl (alignednontemporalload IntRegs:$addr)), + (V6_vL32b_nt_ai_128B IntRegs:$addr, 0) >, + Requires<[UseHVXDbl]>; def : Pat < (VTDbl (alignedload IntRegs:$addr)), (V6_vL32b_ai_128B IntRegs:$addr, 0) >, Requires<[UseHVXDbl]>; @@ -2837,6 +2859,9 @@ multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { // Fold Add R+OFF into vector load. let AddedComplexity = 10 in { + def : Pat<(VTDbl (alignednontemporalload (add IntRegs:$src2, Iss4_7:$offset))), + (V6_vL32b_nt_ai_128B IntRegs:$src2, Iss4_7:$offset)>, + Requires<[UseHVXDbl]>; def : Pat<(VTDbl (alignedload (add IntRegs:$src2, Iss4_7:$offset))), (V6_vL32b_ai_128B IntRegs:$src2, Iss4_7:$offset)>, Requires<[UseHVXDbl]>; @@ -2844,6 +2869,9 @@ multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> { (V6_vL32Ub_ai_128B IntRegs:$src2, Iss4_7:$offset)>, Requires<[UseHVXDbl]>; + def : Pat<(VTSgl (alignednontemporalload (add IntRegs:$src2, Iss4_6:$offset))), + (V6_vL32b_nt_ai IntRegs:$src2, Iss4_6:$offset)>, + Requires<[UseHVXSgl]>; def : Pat<(VTSgl (alignedload (add IntRegs:$src2, Iss4_6:$offset))), (V6_vL32b_ai IntRegs:$src2, Iss4_6:$offset)>, Requires<[UseHVXSgl]>; @@ -2859,6 +2887,9 @@ defm : vL32b_ai_pats <v16i32, v32i32>; defm : vL32b_ai_pats <v8i64, v16i64>; multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> { + def : Pat<(alignednontemporalstore (VTSgl VecDblRegs:$src1), IntRegs:$addr), + (PS_vstorerw_nt_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>, + Requires<[UseHVXSgl]>; def : Pat<(alignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr), (PS_vstorerw_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>, Requires<[UseHVXSgl]>; @@ -2866,6 +2897,10 @@ multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> { (PS_vstorerwu_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>, Requires<[UseHVXSgl]>; + def : Pat<(alignednontemporalstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr), + (PS_vstorerw_nt_ai_128B IntRegs:$addr, 0, + (VTDbl VecDblRegs128B:$src1))>, + Requires<[UseHVXDbl]>; def : Pat<(alignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr), (PS_vstorerw_ai_128B IntRegs:$addr, 0, (VTDbl VecDblRegs128B:$src1))>, @@ -2882,6 +2917,9 @@ defm : STrivv_pats <v32i32, v64i32>; defm : STrivv_pats <v16i64, v32i64>; multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> { + def : Pat<(VTSgl (alignednontemporalload I32:$addr)), + (PS_vloadrw_nt_ai I32:$addr, 0)>, + Requires<[UseHVXSgl]>; def : Pat<(VTSgl (alignedload I32:$addr)), (PS_vloadrw_ai I32:$addr, 0)>, Requires<[UseHVXSgl]>; @@ -2889,6 +2927,9 @@ multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> { (PS_vloadrwu_ai I32:$addr, 0)>, Requires<[UseHVXSgl]>; + def : Pat<(VTDbl (alignednontemporalload I32:$addr)), + (PS_vloadrw_nt_ai_128B I32:$addr, 0)>, + Requires<[UseHVXDbl]>; def : Pat<(VTDbl (alignedload I32:$addr)), (PS_vloadrw_ai_128B I32:$addr, 0)>, Requires<[UseHVXDbl]>; @@ -3021,16 +3062,16 @@ def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), (A2_svsubh IntRegs:$src1, IntRegs:$src2)>; -def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>; -def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>; +def SDTHexagonVSPLAT: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; +def HexagonVSPLAT: SDNode<"HexagonISD::VSPLAT", SDTHexagonVSPLAT>; // Replicate the low 8-bits from 32-bits input register into each of the // four bytes of 32-bits destination register. -def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>; +def: Pat<(v4i8 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrb I32:$Rs)>; // Replicate the low 16-bits from 32-bits input register into each of the // four halfwords of 64-bits destination register. -def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>; +def: Pat<(v4i16 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrh I32:$Rs)>; class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type> @@ -3068,84 +3109,44 @@ def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c), (i32 u5_0ImmPred:$c))))), (S2_asl_i_vw V2I32:$b, imm:$c)>; -def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), +def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))), (S2_asr_i_vh V4I16:$b, imm:$c)>; -def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), +def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))), (S2_lsr_i_vh V4I16:$b, imm:$c)>; -def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))), +def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))), (S2_asl_i_vh V4I16:$b, imm:$c)>; -def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>; -def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>; +def SDTHexagonVShift + : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVec<0>, SDTCisVT<2, i32>]>; -def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>; -def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>; -def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>; -def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>; -def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>; -def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>; +def HexagonVASL: SDNode<"HexagonISD::VASL", SDTHexagonVShift>; +def HexagonVASR: SDNode<"HexagonISD::VASR", SDTHexagonVShift>; +def HexagonVLSR: SDNode<"HexagonISD::VLSR", SDTHexagonVShift>; -def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5_0ImmPred:$u5)), +def: Pat<(v2i32 (HexagonVASL V2I32:$Rs, u5_0ImmPred:$u5)), + (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; +def: Pat<(v4i16 (HexagonVASL V4I16:$Rs, u4_0ImmPred:$u4)), + (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; +def: Pat<(v2i32 (HexagonVASR V2I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4_0ImmPred:$u4)), +def: Pat<(v4i16 (HexagonVASR V4I16:$Rs, u4_0ImmPred:$u4)), (S2_asr_i_vh V4I16:$Rs, imm:$u4)>; -def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5_0ImmPred:$u5)), +def: Pat<(v2i32 (HexagonVLSR V2I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4_0ImmPred:$u4)), +def: Pat<(v4i16 (HexagonVLSR V4I16:$Rs, u4_0ImmPred:$u4)), (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>; -def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5_0ImmPred:$u5)), - (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4_0ImmPred:$u4)), - (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value> : Pat <(Op Value:$Rs, I32:$Rt), (MI Value:$Rs, I32:$Rt)>; -def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>; -def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>; -def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>; -def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>; -def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>; -def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>; - - -def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2, - [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>; -def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2, - [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>; -def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2, - [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>; - -def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>; -def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>; -def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>; -def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>; -def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>; -def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>; -def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>; -def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>; -def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>; - - -class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value> - : Pat <(i1 (Op Value:$Rs, Value:$Rt)), - (MI Value:$Rs, Value:$Rt)>; - -def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>; -def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>; -def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>; - -def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>; -def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>; -def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>; - -def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>; -def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>; -def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>; +def: vshift_rr_pat <S2_asl_r_vw, HexagonVASL, V2I32>; +def: vshift_rr_pat <S2_asl_r_vh, HexagonVASL, V4I16>; +def: vshift_rr_pat <S2_asr_r_vw, HexagonVASR, V2I32>; +def: vshift_rr_pat <S2_asr_r_vh, HexagonVASR, V4I16>; +def: vshift_rr_pat <S2_lsr_r_vw, HexagonVLSR, V2I32>; +def: vshift_rr_pat <S2_lsr_r_vh, HexagonVLSR, V4I16>; class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy> @@ -3255,13 +3256,6 @@ def: Pat<(v4i8 (trunc V4I16:$Rs)), def: Pat<(v2i16 (trunc V2I32:$Rs)), (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; - -def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>; -def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>; - -def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>; -def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>; - def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; @@ -3322,31 +3316,6 @@ def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))), (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>; -def SDTHexagonBinOp64 : SDTypeProfile<1, 2, - [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>; - -def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>; -def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>; -def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>; -def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>; - -class ShufflePat<InstHexagon MI, SDNode Op> - : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)), - (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>; - -// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b -def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>; - -// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b -def: ShufflePat<S2_shuffob, HexagonSHUFFOB>; - -// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h -def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>; - -// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h -def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>; - - // Truncated store from v4i16 to v4i8. def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr), (truncstore node:$val, node:$ptr), diff --git a/lib/Target/Hexagon/HexagonPseudo.td b/lib/Target/Hexagon/HexagonPseudo.td index 93fb688fc1c0a..b42c1ab975a80 100644 --- a/lib/Target/Hexagon/HexagonPseudo.td +++ b/lib/Target/Hexagon/HexagonPseudo.td @@ -407,6 +407,11 @@ def PS_vstorerw_ai: STrivv_template<VecDblRegs, V6_vS32b_ai>, def PS_vstorerw_ai_128B: STrivv_template<VecDblRegs128B, V6_vS32b_ai_128B>, Requires<[HasV60T,UseHVXDbl]>; +def PS_vstorerw_nt_ai: STrivv_template<VecDblRegs, V6_vS32b_nt_ai>, + Requires<[HasV60T,UseHVXSgl]>; +def PS_vstorerw_nt_ai_128B: STrivv_template<VecDblRegs128B, V6_vS32b_nt_ai_128B>, + Requires<[HasV60T,UseHVXDbl]>; + def PS_vstorerwu_ai: STrivv_template<VecDblRegs, V6_vS32Ub_ai>, Requires<[HasV60T,UseHVXSgl]>; def PS_vstorerwu_ai_128B: STrivv_template<VecDblRegs128B, V6_vS32Ub_ai_128B>, @@ -433,6 +438,11 @@ def PS_vloadrw_ai: LDrivv_template<VecDblRegs, V6_vL32b_ai>, def PS_vloadrw_ai_128B: LDrivv_template<VecDblRegs128B, V6_vL32b_ai_128B>, Requires<[HasV60T,UseHVXDbl]>; +def PS_vloadrw_nt_ai: LDrivv_template<VecDblRegs, V6_vL32b_nt_ai>, + Requires<[HasV60T,UseHVXSgl]>; +def PS_vloadrw_nt_ai_128B: LDrivv_template<VecDblRegs128B, V6_vL32b_nt_ai_128B>, + Requires<[HasV60T,UseHVXDbl]>; + def PS_vloadrwu_ai: LDrivv_template<VecDblRegs, V6_vL32Ub_ai>, Requires<[HasV60T,UseHVXSgl]>; def PS_vloadrwu_ai_128B: LDrivv_template<VecDblRegs128B, V6_vL32Ub_ai_128B>, diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp index db268b78cd73f..4fa929a20810a 100644 --- a/lib/Target/Hexagon/HexagonSplitDouble.cpp +++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp @@ -350,6 +350,8 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const { MI->getOperand(2).getImm()); case Hexagon::A4_combineri: ImmX++; + // Fall through into A4_combineir. + LLVM_FALLTHROUGH; case Hexagon::A4_combineir: { ImmX++; int64_t V = MI->getOperand(ImmX).getImm(); diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 76d9b31b005ff..7d88b51f32dd3 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -110,10 +110,11 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", namespace llvm { extern char &HexagonExpandCondsetsID; void initializeHexagonExpandCondsetsPass(PassRegistry&); - void initializeHexagonLoopIdiomRecognizePass(PassRegistry&); void initializeHexagonGenMuxPass(PassRegistry&); - void initializeHexagonOptAddrModePass(PassRegistry&); + void initializeHexagonLoopIdiomRecognizePass(PassRegistry&); void initializeHexagonNewValueJumpPass(PassRegistry&); + void initializeHexagonOptAddrModePass(PassRegistry&); + void initializeHexagonPacketizerPass(PassRegistry&); Pass *createHexagonLoopIdiomPass(); FunctionPass *createHexagonBitSimplify(); @@ -156,10 +157,11 @@ extern "C" void LLVMInitializeHexagonTarget() { RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget()); PassRegistry &PR = *PassRegistry::getPassRegistry(); - initializeHexagonLoopIdiomRecognizePass(PR); initializeHexagonGenMuxPass(PR); - initializeHexagonOptAddrModePass(PR); + initializeHexagonLoopIdiomRecognizePass(PR); initializeHexagonNewValueJumpPass(PR); + initializeHexagonOptAddrModePass(PR); + initializeHexagonPacketizerPass(PR); } HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 7667bfb7a0eb4..a3021e3dfe432 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -60,9 +60,7 @@ namespace { class HexagonPacketizer : public MachineFunctionPass { public: static char ID; - HexagonPacketizer() : MachineFunctionPass(ID) { - initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry()); - } + HexagonPacketizer() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -89,14 +87,14 @@ namespace { char HexagonPacketizer::ID = 0; } -INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer", - false, false) +INITIALIZE_PASS_BEGIN(HexagonPacketizer, "hexagon-packetizer", + "Hexagon Packetizer", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer", - false, false) +INITIALIZE_PASS_END(HexagonPacketizer, "hexagon-packetizer", + "Hexagon Packetizer", false, false) HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA, diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 34d0b55aa22ae..2a0edda8dcee8 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -412,7 +412,7 @@ public: /// fixup kind as appropriate. void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, - uint64_t FixupValue, bool IsPCRel) const override { + uint64_t FixupValue, bool IsResolved) const override { // When FixupValue is 0 the relocation is external and there // is nothing for us to do. @@ -442,6 +442,7 @@ public: case fixup_Hexagon_B7_PCREL: if (!(isIntN(7, sValue))) HandleFixupError(7, 2, (int64_t)FixupValue, "B7_PCREL"); + LLVM_FALLTHROUGH; case fixup_Hexagon_B7_PCREL_X: InstMask = 0x00001f18; // Word32_B7 Reloc = (((Value >> 2) & 0x1f) << 8) | // Value 6-2 = Target 12-8 @@ -451,6 +452,7 @@ public: case fixup_Hexagon_B9_PCREL: if (!(isIntN(9, sValue))) HandleFixupError(9, 2, (int64_t)FixupValue, "B9_PCREL"); + LLVM_FALLTHROUGH; case fixup_Hexagon_B9_PCREL_X: InstMask = 0x003000fe; // Word32_B9 Reloc = (((Value >> 7) & 0x3) << 20) | // Value 8-7 = Target 21-20 @@ -462,6 +464,7 @@ public: case fixup_Hexagon_B13_PCREL: if (!(isIntN(13, sValue))) HandleFixupError(13, 2, (int64_t)FixupValue, "B13_PCREL"); + LLVM_FALLTHROUGH; case fixup_Hexagon_B13_PCREL_X: InstMask = 0x00202ffe; // Word32_B13 Reloc = (((Value >> 12) & 0x1) << 21) | // Value 12 = Target 21 @@ -472,6 +475,7 @@ public: case fixup_Hexagon_B15_PCREL: if (!(isIntN(15, sValue))) HandleFixupError(15, 2, (int64_t)FixupValue, "B15_PCREL"); + LLVM_FALLTHROUGH; case fixup_Hexagon_B15_PCREL_X: InstMask = 0x00df20fe; // Word32_B15 Reloc = (((Value >> 13) & 0x3) << 22) | // Value 14-13 = Target 23-22 @@ -483,6 +487,7 @@ public: case fixup_Hexagon_B22_PCREL: if (!(isIntN(22, sValue))) HandleFixupError(22, 2, (int64_t)FixupValue, "B22_PCREL"); + LLVM_FALLTHROUGH; case fixup_Hexagon_B22_PCREL_X: InstMask = 0x01ff3ffe; // Word32_B22 Reloc = (((Value >> 13) & 0x1ff) << 16) | // Value 21-13 = Target 24-16 diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index d8009c5da08ee..7f90e83fc8e9e 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -169,8 +169,11 @@ namespace HexagonII { // Hexagon specific MO operand flag mask. enum HexagonMOTargetFlagVal { - //===------------------------------------------------------------------===// - // Hexagon Specific MachineOperand flags. + // Hexagon-specific MachineOperand target flags. + // + // When chaning these, make sure to update + // getSerializableDirectMachineOperandTargetFlags and + // getSerializableBitmaskMachineOperandTargetFlags if needed. MO_NO_FLAG, /// MO_PCREL - On a symbol operand, indicates a PC-relative relocation @@ -207,10 +210,12 @@ namespace HexagonII { MO_TPREL, // HMOTF_ConstExtended - // Addendum to abovem, indicates a const extended op + // Addendum to above, indicates a const extended op // Can be used as a mask. - HMOTF_ConstExtended = 0x80 + HMOTF_ConstExtended = 0x80, + // Union of all bitmasks (currently only HMOTF_ConstExtended). + MO_Bitmasks = HMOTF_ConstExtended }; // Hexagon Sub-instruction classes. diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp index 564d43b45cb87..1604e7c8dc549 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -259,6 +259,7 @@ bool HexagonShuffler::check() { break; case HexagonII::TypeCVI_VM_VP_LDU: ++onlyNo1; + LLVM_FALLTHROUGH; case HexagonII::TypeCVI_VM_LD: case HexagonII::TypeCVI_VM_TMP_LD: case HexagonII::TypeLD: @@ -274,6 +275,7 @@ bool HexagonShuffler::check() { break; case HexagonII::TypeCVI_VM_STU: ++onlyNo1; + LLVM_FALLTHROUGH; case HexagonII::TypeCVI_VM_ST: case HexagonII::TypeCVI_VM_NEW_ST: case HexagonII::TypeST: diff --git a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp index 72e471f5766e5..1394ac7210f2f 100644 --- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp +++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp @@ -787,6 +787,7 @@ std::unique_ptr<LanaiOperand> LanaiAsmParser::parseImmediate() { case AsmToken::Dot: if (!Parser.parseExpression(ExprVal)) return LanaiOperand::createImm(ExprVal, Start, End); + LLVM_FALLTHROUGH; default: return nullptr; } diff --git a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp index c212726113ab7..bbce5f670c99e 100644 --- a/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp +++ b/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp @@ -51,7 +51,7 @@ public: void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsResolved) const override; MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; @@ -92,7 +92,7 @@ bool LanaiAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { void LanaiAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, uint64_t Value, - bool /*IsPCRel*/) const { + bool /*IsResolved*/) const { MCFixupKind Kind = Fixup.getKind(); Value = adjustFixupValue(static_cast<unsigned>(Kind), Value); diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 69b1ba1528d04..b72c9d5344787 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -304,6 +304,9 @@ class MipsAsmParser : public MCTargetAsmParser { bool expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI); + bool expandMXTRAlias(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI); + bool reportParseError(Twine ErrorMsg); bool reportParseError(SMLoc Loc, Twine ErrorMsg); @@ -343,6 +346,8 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseSetPushDirective(); bool parseSetSoftFloatDirective(); bool parseSetHardFloatDirective(); + bool parseSetMtDirective(); + bool parseSetNoMtDirective(); bool parseSetAssignment(); @@ -628,6 +633,9 @@ public: bool useSoftFloat() const { return getSTI().getFeatureBits()[Mips::FeatureSoftFloat]; } + bool hasMT() const { + return getSTI().getFeatureBits()[Mips::FeatureMT]; + } /// Warn if RegIndex is the same as the current AT. void warnIfRegIndexIsAT(unsigned RegIndex, SMLoc Loc); @@ -1966,6 +1974,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, case Mips::SDIV_MM: FirstOp = 0; SecondOp = 1; + LLVM_FALLTHROUGH; case Mips::SDivMacro: case Mips::DSDivMacro: case Mips::UDivMacro: @@ -2505,6 +2514,16 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, return expandSeq(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; case Mips::SEQIMacro: return expandSeqI(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; + case Mips::MFTC0: case Mips::MTTC0: + case Mips::MFTGPR: case Mips::MTTGPR: + case Mips::MFTLO: case Mips::MTTLO: + case Mips::MFTHI: case Mips::MTTHI: + case Mips::MFTACX: case Mips::MTTACX: + case Mips::MFTDSP: case Mips::MTTDSP: + case Mips::MFTC1: case Mips::MTTC1: + case Mips::MFTHC1: case Mips::MTTHC1: + case Mips::CFTC1: case Mips::CTTC1: + return expandMXTRAlias(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success; } } @@ -4876,6 +4895,212 @@ bool MipsAsmParser::expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, return false; } +// Map the DSP accumulator and control register to the corresponding gpr +// operand. Unlike the other alias, the m(f|t)t(lo|hi|acx) instructions +// do not map the DSP registers contigously to gpr registers. +static unsigned getRegisterForMxtrDSP(MCInst &Inst, bool IsMFDSP) { + switch (Inst.getOpcode()) { + case Mips::MFTLO: + case Mips::MTTLO: + switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) { + case Mips::AC0: + return Mips::ZERO; + case Mips::AC1: + return Mips::A0; + case Mips::AC2: + return Mips::T0; + case Mips::AC3: + return Mips::T4; + default: + llvm_unreachable("Unknown register for 'mttr' alias!"); + } + case Mips::MFTHI: + case Mips::MTTHI: + switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) { + case Mips::AC0: + return Mips::AT; + case Mips::AC1: + return Mips::A1; + case Mips::AC2: + return Mips::T1; + case Mips::AC3: + return Mips::T5; + default: + llvm_unreachable("Unknown register for 'mttr' alias!"); + } + case Mips::MFTACX: + case Mips::MTTACX: + switch (Inst.getOperand(IsMFDSP ? 1 : 0).getReg()) { + case Mips::AC0: + return Mips::V0; + case Mips::AC1: + return Mips::A2; + case Mips::AC2: + return Mips::T2; + case Mips::AC3: + return Mips::T6; + default: + llvm_unreachable("Unknown register for 'mttr' alias!"); + } + case Mips::MFTDSP: + case Mips::MTTDSP: + return Mips::S0; + default: + llvm_unreachable("Unknown instruction for 'mttr' dsp alias!"); + } +} + +// Map the floating point register operand to the corresponding register +// operand. +static unsigned getRegisterForMxtrFP(MCInst &Inst, bool IsMFTC1) { + switch (Inst.getOperand(IsMFTC1 ? 1 : 0).getReg()) { + case Mips::F0: return Mips::ZERO; + case Mips::F1: return Mips::AT; + case Mips::F2: return Mips::V0; + case Mips::F3: return Mips::V1; + case Mips::F4: return Mips::A0; + case Mips::F5: return Mips::A1; + case Mips::F6: return Mips::A2; + case Mips::F7: return Mips::A3; + case Mips::F8: return Mips::T0; + case Mips::F9: return Mips::T1; + case Mips::F10: return Mips::T2; + case Mips::F11: return Mips::T3; + case Mips::F12: return Mips::T4; + case Mips::F13: return Mips::T5; + case Mips::F14: return Mips::T6; + case Mips::F15: return Mips::T7; + case Mips::F16: return Mips::S0; + case Mips::F17: return Mips::S1; + case Mips::F18: return Mips::S2; + case Mips::F19: return Mips::S3; + case Mips::F20: return Mips::S4; + case Mips::F21: return Mips::S5; + case Mips::F22: return Mips::S6; + case Mips::F23: return Mips::S7; + case Mips::F24: return Mips::T8; + case Mips::F25: return Mips::T9; + case Mips::F26: return Mips::K0; + case Mips::F27: return Mips::K1; + case Mips::F28: return Mips::GP; + case Mips::F29: return Mips::SP; + case Mips::F30: return Mips::FP; + case Mips::F31: return Mips::RA; + default: llvm_unreachable("Unknown register for mttc1 alias!"); + } +} + +// Map the coprocessor operand the corresponding gpr register operand. +static unsigned getRegisterForMxtrC0(MCInst &Inst, bool IsMFTC0) { + switch (Inst.getOperand(IsMFTC0 ? 1 : 0).getReg()) { + case Mips::COP00: return Mips::ZERO; + case Mips::COP01: return Mips::AT; + case Mips::COP02: return Mips::V0; + case Mips::COP03: return Mips::V1; + case Mips::COP04: return Mips::A0; + case Mips::COP05: return Mips::A1; + case Mips::COP06: return Mips::A2; + case Mips::COP07: return Mips::A3; + case Mips::COP08: return Mips::T0; + case Mips::COP09: return Mips::T1; + case Mips::COP010: return Mips::T2; + case Mips::COP011: return Mips::T3; + case Mips::COP012: return Mips::T4; + case Mips::COP013: return Mips::T5; + case Mips::COP014: return Mips::T6; + case Mips::COP015: return Mips::T7; + case Mips::COP016: return Mips::S0; + case Mips::COP017: return Mips::S1; + case Mips::COP018: return Mips::S2; + case Mips::COP019: return Mips::S3; + case Mips::COP020: return Mips::S4; + case Mips::COP021: return Mips::S5; + case Mips::COP022: return Mips::S6; + case Mips::COP023: return Mips::S7; + case Mips::COP024: return Mips::T8; + case Mips::COP025: return Mips::T9; + case Mips::COP026: return Mips::K0; + case Mips::COP027: return Mips::K1; + case Mips::COP028: return Mips::GP; + case Mips::COP029: return Mips::SP; + case Mips::COP030: return Mips::FP; + case Mips::COP031: return Mips::RA; + default: llvm_unreachable("Unknown register for mttc0 alias!"); + } +} + +/// Expand an alias of 'mftr' or 'mttr' into the full instruction, by producing +/// an mftr or mttr with the correctly mapped gpr register, u, sel and h bits. +bool MipsAsmParser::expandMXTRAlias(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + const MCSubtargetInfo *STI) { + MipsTargetStreamer &TOut = getTargetStreamer(); + unsigned rd = 0; + unsigned u = 1; + unsigned sel = 0; + unsigned h = 0; + bool IsMFTR = false; + switch (Inst.getOpcode()) { + case Mips::MFTC0: + IsMFTR = true; + LLVM_FALLTHROUGH; + case Mips::MTTC0: + u = 0; + rd = getRegisterForMxtrC0(Inst, IsMFTR); + sel = Inst.getOperand(2).getImm(); + break; + case Mips::MFTGPR: + IsMFTR = true; + LLVM_FALLTHROUGH; + case Mips::MTTGPR: + rd = Inst.getOperand(IsMFTR ? 1 : 0).getReg(); + break; + case Mips::MFTLO: + case Mips::MFTHI: + case Mips::MFTACX: + case Mips::MFTDSP: + IsMFTR = true; + LLVM_FALLTHROUGH; + case Mips::MTTLO: + case Mips::MTTHI: + case Mips::MTTACX: + case Mips::MTTDSP: + rd = getRegisterForMxtrDSP(Inst, IsMFTR); + sel = 1; + break; + case Mips::MFTHC1: + h = 1; + LLVM_FALLTHROUGH; + case Mips::MFTC1: + IsMFTR = true; + rd = getRegisterForMxtrFP(Inst, IsMFTR); + sel = 2; + break; + case Mips::MTTHC1: + h = 1; + LLVM_FALLTHROUGH; + case Mips::MTTC1: + rd = getRegisterForMxtrFP(Inst, IsMFTR); + sel = 2; + break; + case Mips::CFTC1: + IsMFTR = true; + LLVM_FALLTHROUGH; + case Mips::CTTC1: + rd = getRegisterForMxtrFP(Inst, IsMFTR); + sel = 3; + break; + } + unsigned Op0 = IsMFTR ? Inst.getOperand(0).getReg() : rd; + unsigned Op1 = + IsMFTR ? rd + : (Inst.getOpcode() != Mips::MTTDSP ? Inst.getOperand(1).getReg() + : Inst.getOperand(0).getReg()); + + TOut.emitRRIII(IsMFTR ? Mips::MFTR : Mips::MTTR, Op0, Op1, u, sel, h, IDLoc, + STI); + return false; +} + unsigned MipsAsmParser::checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) { @@ -6329,6 +6554,39 @@ bool MipsAsmParser::parseSetNoOddSPRegDirective() { return false; } +bool MipsAsmParser::parseSetMtDirective() { + MCAsmParser &Parser = getParser(); + Parser.Lex(); // Eat "mt". + + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + setFeatureBits(Mips::FeatureMT, "mt"); + getTargetStreamer().emitDirectiveSetMt(); + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + +bool MipsAsmParser::parseSetNoMtDirective() { + MCAsmParser &Parser = getParser(); + Parser.Lex(); // Eat "nomt". + + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + clearFeatureBits(Mips::FeatureMT, "mt"); + + getTargetStreamer().emitDirectiveSetNoMt(); + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + bool MipsAsmParser::parseSetPopDirective() { MCAsmParser &Parser = getParser(); SMLoc Loc = getLexer().getLoc(); @@ -6829,6 +7087,10 @@ bool MipsAsmParser::parseDirectiveSet() { return parseSetMsaDirective(); } else if (Tok.getString() == "nomsa") { return parseSetNoMsaDirective(); + } else if (Tok.getString() == "mt") { + return parseSetMtDirective(); + } else if (Tok.getString() == "nomt") { + return parseSetNoMtDirective(); } else if (Tok.getString() == "softfloat") { return parseSetSoftFloatDirective(); } else if (Tok.getString() == "hardfloat") { @@ -7078,6 +7340,7 @@ bool MipsAsmParser::parseSSectionDirective(StringRef Section, unsigned Type) { /// ::= .module fp=value /// ::= .module softfloat /// ::= .module hardfloat +/// ::= .module mt bool MipsAsmParser::parseDirectiveModule() { MCAsmParser &Parser = getParser(); MCAsmLexer &Lexer = getLexer(); @@ -7177,6 +7440,25 @@ bool MipsAsmParser::parseDirectiveModule() { } return false; // parseDirectiveModule has finished successfully. + } else if (Option == "mt") { + setModuleFeatureBits(Mips::FeatureMT, "mt"); + + // Synchronize the ABI Flags information with the FeatureBits information we + // updated above. + getTargetStreamer().updateABIInfo(*this); + + // If printing assembly, use the recently updated ABI Flags information. + // If generating ELF, don't do anything (the .MIPS.abiflags section gets + // emitted later). + getTargetStreamer().emitDirectiveModuleMT(); + + // If this is not the end of the statement, report an error. + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token, expected end of statement"); + return false; + } + + return false; // parseDirectiveModule has finished successfully. } else { return Error(L, "'" + Twine(Option) + "' is not a valid .module option."); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h index f385410270231..9abd4f1d6b08c 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h +++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h @@ -159,6 +159,8 @@ public: ASESet |= Mips::AFL_ASE_MICROMIPS; if (P.inMips16Mode()) ASESet |= Mips::AFL_ASE_MIPS16; + if (P.hasMT()) + ASESet |= Mips::AFL_ASE_MT; } template <class PredicateLibrary> diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index ae48d6e38fa0f..a1ed0ea4d7f36 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -238,7 +238,7 @@ static unsigned calculateMMLEIndex(unsigned i) { void MipsAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, uint64_t Value, - bool IsPCRel) const { + bool IsResolved) const { MCFixupKind Kind = Fixup.getKind(); MCContext &Ctx = Asm.getContext(); Value = adjustFixupValue(Fixup, Value, Ctx); diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h index bf3b290b7ed53..8ebde3b9b7a4e 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -40,7 +40,7 @@ public: void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsResolved) const override; Optional<MCFixupKind> getFixupKind(StringRef Name) const override; const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 0cd4aebe4d164..7caeb08589af6 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -50,6 +50,8 @@ void MipsTargetStreamer::emitDirectiveSetMacro() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoMacro() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetMsa() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetNoMsa() { forbidModuleDirective(); } +void MipsTargetStreamer::emitDirectiveSetMt() {} +void MipsTargetStreamer::emitDirectiveSetNoMt() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetAt() { forbidModuleDirective(); } void MipsTargetStreamer::emitDirectiveSetAtWithArg(unsigned RegNo) { forbidModuleDirective(); @@ -118,6 +120,7 @@ void MipsTargetStreamer::emitDirectiveModuleOddSPReg() { } void MipsTargetStreamer::emitDirectiveModuleSoftFloat() {} void MipsTargetStreamer::emitDirectiveModuleHardFloat() {} +void MipsTargetStreamer::emitDirectiveModuleMT() {} void MipsTargetStreamer::emitDirectiveSetFp( MipsABIFlagsSection::FpABIKind Value) { forbidModuleDirective(); @@ -190,6 +193,21 @@ void MipsTargetStreamer::emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, emitRRX(Opcode, Reg0, Reg1, MCOperand::createImm(Imm), IDLoc, STI); } +void MipsTargetStreamer::emitRRIII(unsigned Opcode, unsigned Reg0, + unsigned Reg1, int16_t Imm0, int16_t Imm1, + int16_t Imm2, SMLoc IDLoc, + const MCSubtargetInfo *STI) { + MCInst TmpInst; + TmpInst.setOpcode(Opcode); + TmpInst.addOperand(MCOperand::createReg(Reg0)); + TmpInst.addOperand(MCOperand::createReg(Reg1)); + TmpInst.addOperand(MCOperand::createImm(Imm0)); + TmpInst.addOperand(MCOperand::createImm(Imm1)); + TmpInst.addOperand(MCOperand::createImm(Imm2)); + TmpInst.setLoc(IDLoc); + getStreamer().EmitInstruction(TmpInst, *STI); +} + void MipsTargetStreamer::emitAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, bool Is64Bit, const MCSubtargetInfo *STI) { @@ -392,6 +410,16 @@ void MipsTargetAsmStreamer::emitDirectiveSetNoMsa() { MipsTargetStreamer::emitDirectiveSetNoMsa(); } +void MipsTargetAsmStreamer::emitDirectiveSetMt() { + OS << "\t.set\tmt\n"; + MipsTargetStreamer::emitDirectiveSetMt(); +} + +void MipsTargetAsmStreamer::emitDirectiveSetNoMt() { + OS << "\t.set\tnomt\n"; + MipsTargetStreamer::emitDirectiveSetNoMt(); +} + void MipsTargetAsmStreamer::emitDirectiveSetAt() { OS << "\t.set\tat\n"; MipsTargetStreamer::emitDirectiveSetAt(); @@ -656,6 +684,10 @@ void MipsTargetAsmStreamer::emitDirectiveModuleHardFloat() { OS << "\t.module\thardfloat\n"; } +void MipsTargetAsmStreamer::emitDirectiveModuleMT() { + OS << "\t.module\tmt\n"; +} + // This part is for ELF object output. MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI) diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index f24761d7d1013..d2f0fdcc6cc11 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -188,6 +188,8 @@ def FeatureUseTCCInDIV : SubtargetFeature< def FeatureMadd4 : SubtargetFeature<"nomadd4", "DisableMadd4", "true", "Disable 4-operand madd.fmt and related instructions">; +def FeatureMT : SubtargetFeature<"mt", "HasMT", "true", "Mips MT ASE">; + //===----------------------------------------------------------------------===// // Mips processors supported. //===----------------------------------------------------------------------===// diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 40078fb771442..89a5854bede0f 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -240,7 +240,8 @@ def HasMSA : Predicate<"Subtarget->hasMSA()">, AssemblerPredicate<"FeatureMSA">; def HasMadd4 : Predicate<"!Subtarget->disableMadd4()">, AssemblerPredicate<"!FeatureMadd4">; - +def HasMT : Predicate<"Subtarget->hasMT()">, + AssemblerPredicate<"FeatureMT">; //===----------------------------------------------------------------------===// // Mips GPR size adjectives. @@ -382,6 +383,10 @@ class ASE_MSA64 { list<Predicate> InsnPredicates = [HasMSA, HasMips64]; } +class ASE_MT { + list <Predicate> InsnPredicates = [HasMT]; +} + // Class used for separating microMIPSr6 and microMIPS (r3) instruction. // It can be used only on instructions that doesn't inherit PredicateControl. class ISA_MICROMIPS_NOT_32R6_64R6 : PredicateControl { @@ -2919,6 +2924,10 @@ include "MipsMSAInstrInfo.td" include "MipsEVAInstrFormats.td" include "MipsEVAInstrInfo.td" +// MT +include "MipsMTInstrFormats.td" +include "MipsMTInstrInfo.td" + // Micromips include "MicroMipsInstrFormats.td" include "MicroMipsInstrInfo.td" diff --git a/lib/Target/Mips/MipsMTInstrFormats.td b/lib/Target/Mips/MipsMTInstrFormats.td new file mode 100644 index 0000000000000..edc0981e6278a --- /dev/null +++ b/lib/Target/Mips/MipsMTInstrFormats.td @@ -0,0 +1,99 @@ +//===-- MipsMTInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe the MIPS MT instructions format +// +// opcode - operation code. +// rt - destination register +// +//===----------------------------------------------------------------------===// + +class MipsMTInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>, + PredicateControl { + let DecoderNamespace = "Mips"; + let EncodingPredicates = [HasStdEnc]; +} + +class OPCODE1<bits<1> Val> { + bits<1> Value = Val; +} + +def OPCODE_SC_D : OPCODE1<0b0>; +def OPCODE_SC_E : OPCODE1<0b1>; + +class FIELD5<bits<5> Val> { + bits<5> Value = Val; +} + +def FIELD5_1_DMT_EMT : FIELD5<0b00001>; +def FIELD5_2_DMT_EMT : FIELD5<0b01111>; +def FIELD5_1_2_DVPE_EVPE : FIELD5<0b00000>; +def FIELD5_MFTR : FIELD5<0b01000>; +def FIELD5_MTTR : FIELD5<0b01100>; + +class COP0_MFMC0_MT<FIELD5 Op1, FIELD5 Op2, OPCODE1 sc> : MipsMTInst { + bits<32> Inst; + + bits<5> rt; + let Inst{31-26} = 0b010000; // COP0 + let Inst{25-21} = 0b01011; // MFMC0 + let Inst{20-16} = rt; + let Inst{15-11} = Op1.Value; + let Inst{10-6} = Op2.Value; + let Inst{5} = sc.Value; + let Inst{4-3} = 0b00; + let Inst{2-0} = 0b001; +} + +class COP0_MFTTR_MT<FIELD5 Op> : MipsMTInst { + bits<32> Inst; + + bits<5> rt; + bits<5> rd; + bits<1> u; + bits<1> h; + bits<3> sel; + let Inst{31-26} = 0b010000; // COP0 + let Inst{25-21} = Op.Value; // MFMC0 + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = 0b00000; // rx - currently unsupported. + let Inst{5} = u; + let Inst{4} = h; + let Inst{3} = 0b0; + let Inst{2-0} = sel; +} + +class SPECIAL3_MT_FORK : MipsMTInst { + bits<32> Inst; + + bits<5> rs; + bits<5> rt; + bits<5> rd; + let Inst{31-26} = 0b011111; // SPECIAL3 + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = 0b00000; + let Inst{5-0} = 0b001000; // FORK +} + +class SPECIAL3_MT_YIELD : MipsMTInst { + bits<32> Inst; + + bits<5> rs; + bits<5> rd; + let Inst{31-26} = 0b011111; // SPECIAL3 + let Inst{25-21} = rs; + let Inst{20-16} = 0b00000; + let Inst{15-11} = rd; + let Inst{10-6} = 0b00000; + let Inst{5-0} = 0b001001; // FORK +} diff --git a/lib/Target/Mips/MipsMTInstrInfo.td b/lib/Target/Mips/MipsMTInstrInfo.td new file mode 100644 index 0000000000000..72e626cbec40a --- /dev/null +++ b/lib/Target/Mips/MipsMTInstrInfo.td @@ -0,0 +1,208 @@ +//===-- MipsMTInstrInfo.td - Mips MT Instruction Infos -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the MIPS MT ASE as defined by MD00378 1.12. +// +// TODO: Add support for the microMIPS encodings for the MT ASE and add the +// instruction mappings. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MIPS MT Instruction Encodings +//===----------------------------------------------------------------------===// + +class DMT_ENC : COP0_MFMC0_MT<FIELD5_1_DMT_EMT, FIELD5_2_DMT_EMT, + OPCODE_SC_D>; + +class EMT_ENC : COP0_MFMC0_MT<FIELD5_1_DMT_EMT, FIELD5_2_DMT_EMT, + OPCODE_SC_E>; + +class DVPE_ENC : COP0_MFMC0_MT<FIELD5_1_2_DVPE_EVPE, FIELD5_1_2_DVPE_EVPE, + OPCODE_SC_D>; + +class EVPE_ENC : COP0_MFMC0_MT<FIELD5_1_2_DVPE_EVPE, FIELD5_1_2_DVPE_EVPE, + OPCODE_SC_E>; + +class FORK_ENC : SPECIAL3_MT_FORK; + +class YIELD_ENC : SPECIAL3_MT_YIELD; + +class MFTR_ENC : COP0_MFTTR_MT<FIELD5_MFTR>; + +class MTTR_ENC : COP0_MFTTR_MT<FIELD5_MTTR>; + +//===----------------------------------------------------------------------===// +// MIPS MT Instruction Descriptions +//===----------------------------------------------------------------------===// + +class MT_1R_DESC_BASE<string instr_asm, InstrItinClass Itin = NoItinerary> { + dag OutOperandList = (outs GPR32Opnd:$rt); + dag InOperandList = (ins); + string AsmString = !strconcat(instr_asm, "\t$rt"); + list<dag> Pattern = []; + InstrItinClass Itinerary = Itin; +} + +class MFTR_DESC { + dag OutOperandList = (outs GPR32Opnd:$rd); + dag InOperandList = (ins GPR32Opnd:$rt, uimm1:$u, uimm3:$sel, uimm1:$h); + string AsmString = "mftr\t$rd, $rt, $u, $sel, $h"; + list<dag> Pattern = []; + InstrItinClass Itinerary = II_MFTR; +} + +class MTTR_DESC { + dag OutOperandList = (outs GPR32Opnd:$rd); + dag InOperandList = (ins GPR32Opnd:$rt, uimm1:$u, uimm3:$sel, uimm1:$h); + string AsmString = "mttr\t$rt, $rd, $u, $sel, $h"; + list<dag> Pattern = []; + InstrItinClass Itinerary = II_MTTR; +} + +class FORK_DESC { + dag OutOperandList = (outs GPR32Opnd:$rs, GPR32Opnd:$rd); + dag InOperandList = (ins GPR32Opnd:$rt); + string AsmString = "fork\t$rd, $rs, $rt"; + list<dag> Pattern = []; + InstrItinClass Itinerary = II_FORK; +} + +class YIELD_DESC { + dag OutOperandList = (outs GPR32Opnd:$rd); + dag InOperandList = (ins GPR32Opnd:$rs); + string AsmString = "yield\t$rd, $rs"; + list<dag> Pattern = []; + InstrItinClass Itinerary = II_YIELD; +} + +class DMT_DESC : MT_1R_DESC_BASE<"dmt", II_DMT>; + +class EMT_DESC : MT_1R_DESC_BASE<"emt", II_EMT>; + +class DVPE_DESC : MT_1R_DESC_BASE<"dvpe", II_DVPE>; + +class EVPE_DESC : MT_1R_DESC_BASE<"evpe", II_EVPE>; + +//===----------------------------------------------------------------------===// +// MIPS MT Instruction Definitions +//===----------------------------------------------------------------------===// +let hasSideEffects = 1, isNotDuplicable = 1, + AdditionalPredicates = [NotInMicroMips] in { + def DMT : DMT_ENC, DMT_DESC, ASE_MT; + + def EMT : EMT_ENC, EMT_DESC, ASE_MT; + + def DVPE : DVPE_ENC, DVPE_DESC, ASE_MT; + + def EVPE : EVPE_ENC, EVPE_DESC, ASE_MT; + + def FORK : FORK_ENC, FORK_DESC, ASE_MT; + + def YIELD : YIELD_ENC, YIELD_DESC, ASE_MT; + + def MFTR : MFTR_ENC, MFTR_DESC, ASE_MT; + + def MTTR : MTTR_ENC, MTTR_DESC, ASE_MT; +} + +//===----------------------------------------------------------------------===// +// MIPS MT Pseudo Instructions - used to support mtfr & mttr aliases. +//===----------------------------------------------------------------------===// +def MFTC0 : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins COP0Opnd:$rt, + uimm3:$sel), + "mftc0 $rd, $rt, $sel">, ASE_MT; + +def MFTGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rt, + uimm3:$sel), + "mftgpr $rd, $rt">, ASE_MT; + +def MFTLO : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac), + "mftlo $rt, $ac">, ASE_MT; + +def MFTHI : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac), + "mfthi $rt, $ac">, ASE_MT; + +def MFTACX : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins ACC64DSPOpnd:$ac), + "mftacx $rt, $ac">, ASE_MT; + +def MFTDSP : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins), + "mftdsp $rt">, ASE_MT; + +def MFTC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGR32Opnd:$ft), + "mftc1 $rt, $ft">, ASE_MT; + +def MFTHC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGR32Opnd:$ft), + "mfthc1 $rt, $ft">, ASE_MT; + +def CFTC1 : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins FGRCCOpnd:$ft), + "cftc1 $rt, $ft">, ASE_MT; + + +def MTTC0 : MipsAsmPseudoInst<(outs COP0Opnd:$rd), (ins GPR32Opnd:$rt, + uimm3:$sel), + "mttc0 $rt, $rd, $sel">, ASE_MT; + +def MTTGPR : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins GPR32Opnd:$rd), + "mttgpr $rd, $rt">, ASE_MT; + +def MTTLO : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt), + "mttlo $rt, $ac">, ASE_MT; + +def MTTHI : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt), + "mtthi $rt, $ac">, ASE_MT; + +def MTTACX : MipsAsmPseudoInst<(outs ACC64DSPOpnd:$ac), (ins GPR32Opnd:$rt), + "mttacx $rt, $ac">, ASE_MT; + +def MTTDSP : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rt), + "mttdsp $rt">, ASE_MT; + +def MTTC1 : MipsAsmPseudoInst<(outs FGR32Opnd:$ft), (ins GPR32Opnd:$rt), + "mttc1 $rt, $ft">, ASE_MT; + +def MTTHC1 : MipsAsmPseudoInst<(outs FGR32Opnd:$ft), (ins GPR32Opnd:$rt), + "mtthc1 $rt, $ft">, ASE_MT; + +def CTTC1 : MipsAsmPseudoInst<(outs FGRCCOpnd:$ft), (ins GPR32Opnd:$rt), + "cttc1 $rt, $ft">, ASE_MT; + +//===----------------------------------------------------------------------===// +// MIPS MT Instruction Definitions +//===----------------------------------------------------------------------===// + +let AdditionalPredicates = [NotInMicroMips] in { + def : MipsInstAlias<"dmt", (DMT ZERO), 1>, ASE_MT; + + def : MipsInstAlias<"emt", (EMT ZERO), 1>, ASE_MT; + + def : MipsInstAlias<"dvpe", (DVPE ZERO), 1>, ASE_MT; + + def : MipsInstAlias<"evpe", (EVPE ZERO), 1>, ASE_MT; + + def : MipsInstAlias<"yield $rs", (YIELD ZERO, GPR32Opnd:$rs), 1>, ASE_MT; + + def : MipsInstAlias<"mftc0 $rd, $rt", (MFTC0 GPR32Opnd:$rd, COP0Opnd:$rt, 0), + 1>, ASE_MT; + + def : MipsInstAlias<"mftlo $rt", (MFTLO GPR32Opnd:$rt, AC0), 1>, ASE_MT; + + def : MipsInstAlias<"mfthi $rt", (MFTHI GPR32Opnd:$rt, AC0), 1>, ASE_MT; + + def : MipsInstAlias<"mftacx $rt", (MFTACX GPR32Opnd:$rt, AC0), 1>, ASE_MT; + + def : MipsInstAlias<"mttc0 $rd, $rt", (MTTC0 COP0Opnd:$rt, GPR32Opnd:$rd, 0), + 1>, ASE_MT; + + def : MipsInstAlias<"mttlo $rt", (MTTLO AC0, GPR32Opnd:$rt), 1>, ASE_MT; + + def : MipsInstAlias<"mtthi $rt", (MTTHI AC0, GPR32Opnd:$rt), 1>, ASE_MT; + + def : MipsInstAlias<"mttacx $rt", (MTTACX AC0, GPR32Opnd:$rt), 1>, ASE_MT; +} diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td index c0de59ba15f55..8ec55ab6284da 100644 --- a/lib/Target/Mips/MipsSchedule.td +++ b/lib/Target/Mips/MipsSchedule.td @@ -84,6 +84,7 @@ def II_DIVU : InstrItinClass; def II_DIV_D : InstrItinClass; def II_DIV_S : InstrItinClass; def II_DMFC0 : InstrItinClass; +def II_DMT : InstrItinClass; def II_DMTC0 : InstrItinClass; def II_DMFC1 : InstrItinClass; def II_DMTC1 : InstrItinClass; @@ -113,8 +114,12 @@ def II_DSBH : InstrItinClass; def II_DSHD : InstrItinClass; def II_DSUBU : InstrItinClass; def II_DSUB : InstrItinClass; +def II_DVPE : InstrItinClass; +def II_EMT : InstrItinClass; +def II_EVPE : InstrItinClass; def II_EXT : InstrItinClass; // Any EXT instruction def II_FLOOR : InstrItinClass; +def II_FORK : InstrItinClass; def II_INS : InstrItinClass; // Any INS instruction def II_IndirectBranchPseudo : InstrItinClass; // Indirect branch pseudo. def II_J : InstrItinClass; @@ -221,6 +226,7 @@ def II_MFC1 : InstrItinClass; def II_MFHC1 : InstrItinClass; def II_MFC2 : InstrItinClass; def II_MFHI_MFLO : InstrItinClass; // mfhi and mflo +def II_MFTR : InstrItinClass; def II_MOD : InstrItinClass; def II_MODU : InstrItinClass; def II_MOVE : InstrItinClass; @@ -250,6 +256,7 @@ def II_MTC1 : InstrItinClass; def II_MTHC1 : InstrItinClass; def II_MTC2 : InstrItinClass; def II_MTHI_MTLO : InstrItinClass; // mthi and mtlo +def II_MTTR : InstrItinClass; def II_MUL : InstrItinClass; def II_MUH : InstrItinClass; def II_MUHU : InstrItinClass; @@ -345,6 +352,7 @@ def II_WRPGPR : InstrItinClass; def II_RDPGPR : InstrItinClass; def II_DVP : InstrItinClass; def II_EVP : InstrItinClass; +def II_YIELD : InstrItinClass; //===----------------------------------------------------------------------===// // Mips Generic instruction itineraries. @@ -386,6 +394,7 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData<II_DCLZ , [InstrStage<1, [ALU]>]>, InstrItinData<II_DMOD , [InstrStage<17, [IMULDIV]>]>, InstrItinData<II_DMODU , [InstrStage<17, [IMULDIV]>]>, + InstrItinData<II_DMT , [InstrStage<2, [ALU]>]>, InstrItinData<II_DSLL , [InstrStage<1, [ALU]>]>, InstrItinData<II_DSLL32 , [InstrStage<1, [ALU]>]>, InstrItinData<II_DSRL , [InstrStage<1, [ALU]>]>, @@ -404,7 +413,11 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData<II_DSHD , [InstrStage<1, [ALU]>]>, InstrItinData<II_DCLO , [InstrStage<1, [ALU]>]>, InstrItinData<II_DCLZ , [InstrStage<1, [ALU]>]>, + InstrItinData<II_DVPE , [InstrStage<2, [ALU]>]>, + InstrItinData<II_EMT , [InstrStage<2, [ALU]>]>, + InstrItinData<II_EVPE , [InstrStage<2, [ALU]>]>, InstrItinData<II_EXT , [InstrStage<1, [ALU]>]>, + InstrItinData<II_FORK , [InstrStage<1, [ALU]>]>, InstrItinData<II_INS , [InstrStage<1, [ALU]>]>, InstrItinData<II_LUI , [InstrStage<1, [ALU]>]>, InstrItinData<II_MOVE , [InstrStage<1, [ALU]>]>, @@ -653,12 +666,14 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData<II_MFHC0 , [InstrStage<2, [ALU]>]>, InstrItinData<II_MFC1 , [InstrStage<2, [ALU]>]>, InstrItinData<II_MFC2 , [InstrStage<2, [ALU]>]>, + InstrItinData<II_MFTR , [InstrStage<2, [ALU]>]>, InstrItinData<II_MTC0 , [InstrStage<2, [ALU]>]>, InstrItinData<II_MTHC0 , [InstrStage<2, [ALU]>]>, InstrItinData<II_MTC1 , [InstrStage<2, [ALU]>]>, InstrItinData<II_MTC2 , [InstrStage<2, [ALU]>]>, InstrItinData<II_MFHC1 , [InstrStage<2, [ALU]>]>, InstrItinData<II_MTHC1 , [InstrStage<2, [ALU]>]>, + InstrItinData<II_MTTR , [InstrStage<2, [ALU]>]>, InstrItinData<II_CACHE , [InstrStage<1, [ALU]>]>, InstrItinData<II_PREF , [InstrStage<1, [ALU]>]>, InstrItinData<II_CACHEE , [InstrStage<1, [ALU]>]>, @@ -670,5 +685,6 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [ InstrItinData<II_WRPGPR , [InstrStage<1, [ALU]>]>, InstrItinData<II_RDPGPR , [InstrStage<1, [ALU]>]>, InstrItinData<II_DVP , [InstrStage<1, [ALU]>]>, - InstrItinData<II_EVP , [InstrStage<1, [ALU]>]> + InstrItinData<II_EVP , [InstrStage<1, [ALU]>]>, + InstrItinData<II_YIELD , [InstrStage<5, [ALU]>]> ]>; diff --git a/lib/Target/Mips/MipsScheduleGeneric.td b/lib/Target/Mips/MipsScheduleGeneric.td index 15a0401b781e5..89cda676441e7 100644 --- a/lib/Target/Mips/MipsScheduleGeneric.td +++ b/lib/Target/Mips/MipsScheduleGeneric.td @@ -187,7 +187,11 @@ def GenericIssueCOP0 : ProcResource<1> { let Super = GenericCOP0; } def GenericWriteCOP0TLB : SchedWriteRes<[GenericIssueCOP0]> { let Latency = 4; } def GenericWriteCOP0 : SchedWriteRes<[GenericIssueCOP0]> { let Latency = 3; } def GenericReadCOP0 : SchedWriteRes<[GenericIssueCOP0]> { let Latency = 2; } -def GnereicReadWritePGPR : SchedWriteRes<[GenericIssueCOP0]>; +def GenericReadWritePGPR : SchedWriteRes<[GenericIssueCOP0]>; +def GenericReadWriteCOP0Long : SchedWriteRes<[GenericIssueCOP0]> { + let Latency = 5; +} +def GenericWriteCOP0Short : SchedWriteRes<[GenericIssueCOP0]>; def : ItinRW<[GenericWriteCOP0TLB], [II_TLBP, II_TLBR, II_TLBWI, II_TLBWR]>; def : ItinRW<[GenericWriteCOP0TLB], [II_TLBINV, II_TLBINVF]>; @@ -261,6 +265,14 @@ def : ItinRW<[GenericWriteLoad], [II_LBE, II_LBUE, II_LHE, II_LHUE, II_LWE, def : ItinRW<[GenericWriteLoad], [II_LWLE, II_LWRE]>; +// MIPS MT instructions +// ==================== + +def : ItinRW<[GenericWriteMove], [II_DMT, II_DVPE, II_EMT, II_EVPE]>; + +def : ItinRW<[GenericReadWriteCOP0Long], [II_YIELD]>; +def : ItinRW<[GenericWriteCOP0Short], [II_FORK]>; + // MIPS32R6 and MIPS16e // ==================== diff --git a/lib/Target/Mips/MipsScheduleP5600.td b/lib/Target/Mips/MipsScheduleP5600.td index 882a241d1426a..fedfac24e4e74 100644 --- a/lib/Target/Mips/MipsScheduleP5600.td +++ b/lib/Target/Mips/MipsScheduleP5600.td @@ -19,7 +19,7 @@ def MipsP5600Model : SchedMachineModel { HasMips64, HasMips64r2, HasCnMips, InMicroMips, InMips16Mode, HasMicroMips32r6, HasMicroMips64r6, - HasDSP, HasDSPR2]; + HasDSP, HasDSPR2, HasMT]; } diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 154d5825427b3..eba21e0a1c672 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -70,7 +70,8 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false), - HasEVA(false), DisableMadd4(false), TM(TM), TargetTriple(TT), TSInfo(), + HasEVA(false), DisableMadd4(false), HasMT(false), TM(TM), + TargetTriple(TT), TSInfo(), InstrInfo( MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))), FrameLowering(MipsFrameLowering::create(*this)), diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index ccd47f00c0d3b..7619e7b08612b 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -149,6 +149,9 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // related instructions. bool DisableMadd4; + // HasMT -- support MT ASE. + bool HasMT; + InstrItineraryData InstrItins; // We can override the determination of whether we are in mips16 mode @@ -259,6 +262,7 @@ public: bool hasMSA() const { return HasMSA; } bool disableMadd4() const { return DisableMadd4; } bool hasEVA() const { return HasEVA; } + bool hasMT() const { return HasMT; } bool useSmallSection() const { return UseSmallSection; } bool hasStandardEncoding() const { return !inMips16Mode(); } diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h index 41ebe411b98d9..af24838665e1f 100644 --- a/lib/Target/Mips/MipsTargetStreamer.h +++ b/lib/Target/Mips/MipsTargetStreamer.h @@ -40,6 +40,8 @@ public: virtual void emitDirectiveSetNoMacro(); virtual void emitDirectiveSetMsa(); virtual void emitDirectiveSetNoMsa(); + virtual void emitDirectiveSetMt(); + virtual void emitDirectiveSetNoMt(); virtual void emitDirectiveSetAt(); virtual void emitDirectiveSetAtWithArg(unsigned RegNo); virtual void emitDirectiveSetNoAt(); @@ -96,6 +98,7 @@ public: virtual void emitDirectiveModuleOddSPReg(); virtual void emitDirectiveModuleSoftFloat(); virtual void emitDirectiveModuleHardFloat(); + virtual void emitDirectiveModuleMT(); virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value); virtual void emitDirectiveSetOddSPReg(); virtual void emitDirectiveSetNoOddSPReg(); @@ -116,6 +119,9 @@ public: SMLoc IDLoc, const MCSubtargetInfo *STI); void emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm, SMLoc IDLoc, const MCSubtargetInfo *STI); + void emitRRIII(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm0, + int16_t Imm1, int16_t Imm2, SMLoc IDLoc, + const MCSubtargetInfo *STI); void emitAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg, bool Is64Bit, const MCSubtargetInfo *STI); void emitDSLL(unsigned DstReg, unsigned SrcReg, int16_t ShiftAmount, @@ -204,6 +210,8 @@ public: void emitDirectiveSetNoMacro() override; void emitDirectiveSetMsa() override; void emitDirectiveSetNoMsa() override; + void emitDirectiveSetMt() override; + void emitDirectiveSetNoMt() override; void emitDirectiveSetAt() override; void emitDirectiveSetAtWithArg(unsigned RegNo) override; void emitDirectiveSetNoAt() override; @@ -267,6 +275,7 @@ public: void emitDirectiveModuleOddSPReg() override; void emitDirectiveModuleSoftFloat() override; void emitDirectiveModuleHardFloat() override; + void emitDirectiveModuleMT() override; void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value) override; void emitDirectiveSetOddSPReg() override; void emitDirectiveSetNoOddSPReg() override; diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index f26b9a7cb8dd3..f800d91f40933 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -62,7 +62,6 @@ #include <utility>
#include <vector>
-#undef DEBUG_TYPE
#define DEBUG_TYPE "nvptx-lower"
using namespace llvm;
@@ -2456,7 +2455,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( // v2f16 was loaded as an i32. Now we must bitcast it back.
else if (EltVT == MVT::v2f16)
Elt = DAG.getNode(ISD::BITCAST, dl, MVT::v2f16, Elt);
- // Extend the element if necesary (e.g. an i8 is loaded
+ // Extend the element if necessary (e.g. an i8 is loaded
// into an i16 register)
if (Ins[InsIdx].VT.isInteger() &&
Ins[InsIdx].VT.getSizeInBits() > LoadVT.getSizeInBits()) {
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp index 3be291b48b8f2..989f0a3aba2f6 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "NVPTXLowerAggrCopies.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -42,6 +43,7 @@ struct NVPTXLowerAggrCopies : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved<StackProtector>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); } bool runOnFunction(Function &F) override; @@ -61,6 +63,8 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { const DataLayout &DL = F.getParent()->getDataLayout(); LLVMContext &Context = F.getParent()->getContext(); + const TargetTransformInfo &TTI = + getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); // Collect all aggregate loads and mem* calls. for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { @@ -104,15 +108,26 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { Value *SrcAddr = LI->getOperand(0); Value *DstAddr = SI->getOperand(1); unsigned NumLoads = DL.getTypeStoreSize(LI->getType()); - Value *CopyLen = ConstantInt::get(Type::getInt32Ty(Context), NumLoads); - - createMemCpyLoop(/* ConvertedInst */ SI, - /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, - /* CopyLen */ CopyLen, - /* SrcAlign */ LI->getAlignment(), - /* DestAlign */ SI->getAlignment(), - /* SrcIsVolatile */ LI->isVolatile(), - /* DstIsVolatile */ SI->isVolatile()); + ConstantInt *CopyLen = + ConstantInt::get(Type::getInt32Ty(Context), NumLoads); + + if (!TTI.useWideIRMemcpyLoopLowering()) { + createMemCpyLoop(/* ConvertedInst */ SI, + /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, + /* CopyLen */ CopyLen, + /* SrcAlign */ LI->getAlignment(), + /* DestAlign */ SI->getAlignment(), + /* SrcIsVolatile */ LI->isVolatile(), + /* DstIsVolatile */ SI->isVolatile()); + } else { + createMemCpyLoopKnownSize(/* ConvertedInst */ SI, + /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr, + /* CopyLen */ CopyLen, + /* SrcAlign */ LI->getAlignment(), + /* DestAlign */ SI->getAlignment(), + /* SrcIsVolatile */ LI->isVolatile(), + /* DstIsVolatile */ SI->isVolatile(), TTI); + } SI->eraseFromParent(); LI->eraseFromParent(); @@ -121,7 +136,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) { // Transform mem* intrinsic calls. for (MemIntrinsic *MemCall : MemCalls) { if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) { - expandMemCpyAsLoop(Memcpy); + expandMemCpyAsLoop(Memcpy, TTI); } else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) { expandMemMoveAsLoop(Memmove); } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 7393f3d7a08a9..bdad2fe8714fd 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -115,7 +115,7 @@ public: void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, - uint64_t Value, bool IsPCRel) const override { + uint64_t Value, bool IsResolved) const override { Value = adjustFixupValue(Fixup.getKind(), Value); if (!Value) return; // Doesn't change encoding. diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 094d3e6a61b5a..53f33ac1fc0ed 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -607,7 +607,10 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { // The old condition may be dead now, and may have even created a dead PHI // (the original induction variable). RecursivelyDeleteTriviallyDeadInstructions(OldCond); - DeleteDeadPHIs(CountedExitBlock); + // Run through the basic blocks of the loop and see if any of them have dead + // PHIs that can be removed. + for (auto I : L->blocks()) + DeleteDeadPHIs(I); ++NumCTRLoops; return MadeChange; diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index c2c115cb6dafa..b49c3345a17dd 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -435,22 +435,19 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); - // If we are a leaf function, and use up to 224 bytes of stack space, - // don't have a frame pointer, calls, or dynamic alloca then we do not need - // to adjust the stack pointer (we fit in the Red Zone). - // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate - // stackless code if all local vars are reg-allocated. - bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); unsigned LR = RegInfo->getRARegister(); - if (!DisableRedZone && - (Subtarget.isPPC64() || // 32-bit SVR4, no stack- - !Subtarget.isSVR4ABI() || // allocated locals. - FrameSize == 0) && - FrameSize <= 224 && // Fits in red zone. - !MFI.hasVarSizedObjects() && // No dynamic alloca. - !MFI.adjustsStack() && // No calls. - !MustSaveLR(MF, LR) && - !RegInfo->hasBasePointer(MF)) { // No special alignment. + bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); + bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. + !MFI.adjustsStack() && // No calls. + !MustSaveLR(MF, LR) && // No need to save LR. + !RegInfo->hasBasePointer(MF); // No special alignment. + + // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless + // code if all local vars are reg-allocated. + bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); + + // Check whether we can skip adjusting the stack pointer (by using red zone) + if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { // No need for frame if (UpdateMF) MFI.setStackSize(0); @@ -1869,8 +1866,13 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, } if (HasVRSaveArea) { - // Insert alignment padding, we need 16-byte alignment. - LowerBound = (LowerBound - 15) & ~(15); + // Insert alignment padding, we need 16-byte alignment. Note: for postive + // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since + // we are using negative number here (the stack grows downward). We should + // use formula : y = x & (~(n-1)). Where x is the size before aligning, n + // is the alignment size ( n = 16 here) and y is the size after aligning. + assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); + LowerBound &= ~(15); for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { int FI = VRegs[i].getFrameIdx(); diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 535b9deaefac3..3aaf7ef2c2a02 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -419,25 +419,6 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { .getNode(); } -/// isIntS16Immediate - This method tests to see if the node is either a 32-bit -/// or 64-bit immediate, and if the value can be accurately represented as a -/// sign extension from a 16-bit value. If so, this returns true and the -/// immediate. -static bool isIntS16Immediate(SDNode *N, short &Imm) { - if (N->getOpcode() != ISD::Constant) - return false; - - Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); - if (N->getValueType(0) == MVT::i32) - return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); - else - return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); -} - -static bool isIntS16Immediate(SDValue Op, short &Imm) { - return isIntS16Immediate(Op.getNode(), Imm); -} - /// isInt32Immediate - This method tests to see if the node is a 32-bit constant /// operand. If so Imm will receive the 32-bit value. static bool isInt32Immediate(SDNode *N, unsigned &Imm) { @@ -728,7 +709,10 @@ static uint64_t Rot64(uint64_t Imm, unsigned R) { static unsigned getInt64Count(int64_t Imm) { unsigned Count = getInt64CountDirect(Imm); - if (Count == 1) + + // If the instruction count is 1 or 2, we do not need further analysis + // since rotate + load constant requires at least 2 instructions. + if (Count <= 2) return Count; for (unsigned r = 1; r < 63; ++r) { @@ -838,7 +822,10 @@ static SDNode *getInt64Direct(SelectionDAG *CurDAG, const SDLoc &dl, static SDNode *getInt64(SelectionDAG *CurDAG, const SDLoc &dl, int64_t Imm) { unsigned Count = getInt64CountDirect(Imm); - if (Count == 1) + + // If the instruction count is 1 or 2, we do not need further analysis + // since rotate + load constant requires at least 2 instructions. + if (Count <= 2) return getInt64Direct(CurDAG, dl, Imm); unsigned RMin = 0; @@ -2126,7 +2113,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, getI32Imm(Imm & 0xFFFF, dl)), 0); Opc = PPC::CMPLW; } else { - short SImm; + int16_t SImm; if (isIntS16Immediate(RHS, SImm)) return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, getI32Imm((int)SImm & 0xFFFF, @@ -2173,7 +2160,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, getI64Imm(Imm & 0xFFFF, dl)), 0); Opc = PPC::CMPLD; } else { - short SImm; + int16_t SImm; if (isIntS16Immediate(RHS, SImm)) return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, getI64Imm(SImm & 0xFFFF, dl)), @@ -3323,7 +3310,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { if (tryLogicOpOfCompares(N)) return; - short Imm; + int16_t Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { KnownBits LHSKnown; @@ -3346,7 +3333,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) { break; } case ISD::ADD: { - short Imm; + int16_t Imm; if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && isIntS16Immediate(N->getOperand(1), Imm)) { selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); @@ -4034,11 +4021,13 @@ void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { O0.getNode(), O1.getNode()); }; + // FIXME: When the semantics of the interaction between select and undef + // are clearly defined, it may turn out to be unnecessary to break here. SDValue TrueRes = TryFold(ConstTrue); - if (!TrueRes) + if (!TrueRes || TrueRes.isUndef()) break; SDValue FalseRes = TryFold(ConstFalse); - if (!FalseRes) + if (!FalseRes || FalseRes.isUndef()) break; // For us to materialize these using one instruction, we must be able to diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 72f14e9691382..0e069ec1665f7 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -136,6 +136,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, addRegisterClass(MVT::f64, &PPC::F8RCRegClass); } + // Match BITREVERSE to customized fast code sequence in the td file. + setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); + setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); + // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); @@ -1168,6 +1172,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::LXSIZX: return "PPCISD::LXSIZX"; case PPCISD::STXSIX: return "PPCISD::STXSIX"; case PPCISD::VEXTS: return "PPCISD::VEXTS"; + case PPCISD::SExtVElems: return "PPCISD::SExtVElems"; case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; @@ -2028,17 +2033,17 @@ int PPC::isQVALIGNIShuffleMask(SDNode *N) { /// or 64-bit immediate, and if the value can be accurately represented as a /// sign extension from a 16-bit value. If so, this returns true and the /// immediate. -static bool isIntS16Immediate(SDNode *N, short &Imm) { +bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) { if (!isa<ConstantSDNode>(N)) return false; - Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); + Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue(); if (N->getValueType(0) == MVT::i32) return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); else return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); } -static bool isIntS16Immediate(SDValue Op, short &Imm) { +bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) { return isIntS16Immediate(Op.getNode(), Imm); } @@ -2048,7 +2053,7 @@ static bool isIntS16Immediate(SDValue Op, short &Imm) { bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const { - short imm = 0; + int16_t imm = 0; if (N.getOpcode() == ISD::ADD) { if (isIntS16Immediate(N.getOperand(1), imm)) return false; // r+i @@ -2138,7 +2143,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, return false; if (N.getOpcode() == ISD::ADD) { - short imm = 0; + int16_t imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && (!Aligned || (imm & 3) == 0)) { Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); @@ -2162,7 +2167,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, return true; // [&g+r] } } else if (N.getOpcode() == ISD::OR) { - short imm = 0; + int16_t imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && (!Aligned || (imm & 3) == 0)) { // If this is an or of disjoint bitfields, we can codegen this as an add @@ -2190,7 +2195,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // If this address fits entirely in a 16-bit sext immediate field, codegen // this as "d, 0" - short Imm; + int16_t Imm; if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, @@ -2235,10 +2240,15 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, if (SelectAddressRegReg(N, Base, Index, DAG)) return true; - // If the operand is an addition, always emit this as [r+r], since this is - // better (for code size, and execution, as the memop does the add for free) - // than emitting an explicit add. - if (N.getOpcode() == ISD::ADD) { + // If the address is the result of an add, we will utilize the fact that the + // address calculation includes an implicit add. However, we can reduce + // register pressure if we do not materialize a constant just for use as the + // index register. We only get rid of the add if it is not an add of a + // value and a 16-bit signed constant and both have a single use. + int16_t imm = 0; + if (N.getOpcode() == ISD::ADD && + (!isIntS16Immediate(N.getOperand(1), imm) || + !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) { Base = N.getOperand(0); Index = N.getOperand(1); return true; @@ -6422,7 +6432,7 @@ PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); - // Get the corect type for integers. + // Get the correct type for integers. EVT IntVT = Op.getValueType(); // Get the inputs. @@ -6439,7 +6449,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, // When we pop the dynamic allocation we need to restore the SP link. SDLoc dl(Op); - // Get the corect type for pointers. + // Get the correct type for pointers. EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Construct the stack pointer operand. @@ -6514,7 +6524,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue Size = Op.getOperand(1); SDLoc dl(Op); - // Get the corect type for pointers. + // Get the correct type for pointers. EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Negate the size. SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, @@ -6645,6 +6655,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { default: break; // SETUO etc aren't handled by fsel. case ISD::SETNE: std::swap(TV, FV); + LLVM_FALLTHROUGH; case ISD::SETEQ: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); @@ -6656,6 +6667,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { case ISD::SETULT: case ISD::SETLT: std::swap(TV, FV); // fsel is natively setge, swap operands for setlt + LLVM_FALLTHROUGH; case ISD::SETOGE: case ISD::SETGE: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits @@ -6664,6 +6676,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { case ISD::SETUGT: case ISD::SETGT: std::swap(TV, FV); // fsel is natively setge, swap operands for setlt + LLVM_FALLTHROUGH; case ISD::SETOLE: case ISD::SETLE: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits @@ -6677,6 +6690,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { default: break; // SETUO etc aren't handled by fsel. case ISD::SETNE: std::swap(TV, FV); + LLVM_FALLTHROUGH; case ISD::SETEQ: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits @@ -11311,6 +11325,132 @@ static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +// This function adds the required vector_shuffle needed to get +// the elements of the vector extract in the correct position +// as specified by the CorrectElems encoding. +static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, + SDValue Input, uint64_t Elems, + uint64_t CorrectElems) { + SDLoc dl(N); + + unsigned NumElems = Input.getValueType().getVectorNumElements(); + SmallVector<int, 16> ShuffleMask(NumElems, -1); + + // Knowing the element indices being extracted from the original + // vector and the order in which they're being inserted, just put + // them at element indices required for the instruction. + for (unsigned i = 0; i < N->getNumOperands(); i++) { + if (DAG.getDataLayout().isLittleEndian()) + ShuffleMask[CorrectElems & 0xF] = Elems & 0xF; + else + ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4; + CorrectElems = CorrectElems >> 8; + Elems = Elems >> 8; + } + + SDValue Shuffle = + DAG.getVectorShuffle(Input.getValueType(), dl, Input, + DAG.getUNDEF(Input.getValueType()), ShuffleMask); + + EVT Ty = N->getValueType(0); + SDValue BV = DAG.getNode(PPCISD::SExtVElems, dl, Ty, Shuffle); + return BV; +} + +// Look for build vector patterns where input operands come from sign +// extended vector_extract elements of specific indices. If the correct indices +// aren't used, add a vector shuffle to fix up the indices and create a new +// PPCISD:SExtVElems node which selects the vector sign extend instructions +// during instruction selection. +static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) { + // This array encodes the indices that the vector sign extend instructions + // extract from when extending from one type to another for both BE and LE. + // The right nibble of each byte corresponds to the LE incides. + // and the left nibble of each byte corresponds to the BE incides. + // For example: 0x3074B8FC byte->word + // For LE: the allowed indices are: 0x0,0x4,0x8,0xC + // For BE: the allowed indices are: 0x3,0x7,0xB,0xF + // For example: 0x000070F8 byte->double word + // For LE: the allowed indices are: 0x0,0x8 + // For BE: the allowed indices are: 0x7,0xF + uint64_t TargetElems[] = { + 0x3074B8FC, // b->w + 0x000070F8, // b->d + 0x10325476, // h->w + 0x00003074, // h->d + 0x00001032, // w->d + }; + + uint64_t Elems = 0; + int Index; + SDValue Input; + + auto isSExtOfVecExtract = [&](SDValue Op) -> bool { + if (!Op) + return false; + if (Op.getOpcode() != ISD::SIGN_EXTEND) + return false; + + SDValue Extract = Op.getOperand(0); + if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return false; + + ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1)); + if (!ExtOp) + return false; + + Index = ExtOp->getZExtValue(); + if (Input && Input != Extract.getOperand(0)) + return false; + + if (!Input) + Input = Extract.getOperand(0); + + Elems = Elems << 8; + Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4; + Elems |= Index; + + return true; + }; + + // If the build vector operands aren't sign extended vector extracts, + // of the same input vector, then return. + for (unsigned i = 0; i < N->getNumOperands(); i++) { + if (!isSExtOfVecExtract(N->getOperand(i))) { + return SDValue(); + } + } + + // If the vector extract indicies are not correct, add the appropriate + // vector_shuffle. + int TgtElemArrayIdx; + int InputSize = Input.getValueType().getScalarSizeInBits(); + int OutputSize = N->getValueType(0).getScalarSizeInBits(); + if (InputSize + OutputSize == 40) + TgtElemArrayIdx = 0; + else if (InputSize + OutputSize == 72) + TgtElemArrayIdx = 1; + else if (InputSize + OutputSize == 48) + TgtElemArrayIdx = 2; + else if (InputSize + OutputSize == 80) + TgtElemArrayIdx = 3; + else if (InputSize + OutputSize == 96) + TgtElemArrayIdx = 4; + else + return SDValue(); + + uint64_t CorrectElems = TargetElems[TgtElemArrayIdx]; + CorrectElems = DAG.getDataLayout().isLittleEndian() + ? CorrectElems & 0x0F0F0F0F0F0F0F0F + : CorrectElems & 0xF0F0F0F0F0F0F0F0; + if (Elems != CorrectElems) { + return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems); + } + + // Regular lowering will catch cases where a shuffle is not needed. + return SDValue(); +} + SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::BUILD_VECTOR && @@ -11338,6 +11478,15 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, if (Reduced) return Reduced; + // If we're building a vector out of extended elements from another vector + // we have P9 vector integer extend instructions. + if (Subtarget.hasP9Altivec()) { + Reduced = combineBVOfVecSExt(N, DAG); + if (Reduced) + return Reduced; + } + + if (N->getValueType(0) != MVT::v2f64) return SDValue(); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index a5108727bb4b1..821927d3b1576 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -67,6 +67,10 @@ namespace llvm { /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer. VEXTS, + /// SExtVElems, takes an input vector of a smaller type and sign + /// extends to an output vector of a larger type. + SExtVElems, + /// Reciprocal estimate instructions (unary FP ops). FRE, FRSQRTE, @@ -1092,6 +1096,9 @@ namespace llvm { ISD::ArgFlagsTy &ArgFlags, CCState &State); + bool isIntS16Immediate(SDNode *N, int16_t &Imm); + bool isIntS16Immediate(SDValue Op, int16_t &Imm); + } // end namespace llvm #endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 47d59c25392a2..6d9f55206b6af 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -32,6 +32,9 @@ def SDT_PPCstxsix : SDTypeProfile<0, 3, [ def SDT_PPCVexts : SDTypeProfile<1, 2, [ SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2> ]>; +def SDT_PPCSExtVElems : SDTypeProfile<1, 1, [ + SDTCisVec<0>, SDTCisVec<1> +]>; def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; @@ -131,6 +134,7 @@ def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx, def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix, [SDNPHasChain, SDNPMayStore]>; def PPCVexts : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>; +def PPCSExtVElems : SDNode<"PPCISD::SExtVElems", SDT_PPCSExtVElems, []>; // Extract FPSCR (not modeled at the DAG level). def PPCmffs : SDNode<"PPCISD::MFFS", @@ -4450,3 +4454,190 @@ def MSGSYNC : XForm_0<31, 886, (outs), (ins), "msgsync", IIC_SprMSGSYNC, []>; def STOP : XForm_0<19, 370, (outs), (ins), "stop", IIC_SprSTOP, []>; } // IsISA3_0 + +// Fast 32-bit reverse bits algorithm: +// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit): +// n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xAAAAAAAA); +// Step 2: 2-bit swap (swap odd 2-bit and even 2-bit): +// n = ((n >> 2) & 0x33333333) | ((n << 2) & 0xCCCCCCCC); +// Step 3: 4-bit swap (swap odd 4-bit and even 4-bit): +// n = ((n >> 4) & 0x0F0F0F0F) | ((n << 4) & 0xF0F0F0F0); +// Step 4: byte reverse (Suppose n = [B1,B2,B3,B4]): +// Step 4.1: Put B4,B2 in the right position (rotate left 3 bytes): +// n' = (n rotl 24); After which n' = [B4, B1, B2, B3] +// Step 4.2: Insert B3 to the right position: +// n' = rlwimi n', n, 8, 8, 15; After which n' = [B4, B3, B2, B3] +// Step 4.3: Insert B1 to the right position: +// n' = rlwimi n', n, 8, 24, 31; After which n' = [B4, B3, B2, B1] +def MaskValues { + dag Lo1 = (ORI (LIS 0x5555), 0x5555); + dag Hi1 = (ORI (LIS 0xAAAA), 0xAAAA); + dag Lo2 = (ORI (LIS 0x3333), 0x3333); + dag Hi2 = (ORI (LIS 0xCCCC), 0xCCCC); + dag Lo4 = (ORI (LIS 0x0F0F), 0x0F0F); + dag Hi4 = (ORI (LIS 0xF0F0), 0xF0F0); +} + +def Shift1 { + dag Right = (RLWINM $A, 31, 1, 31); + dag Left = (RLWINM $A, 1, 0, 30); +} + +def Swap1 { + dag Bit = (OR (AND Shift1.Right, MaskValues.Lo1), + (AND Shift1.Left, MaskValues.Hi1)); +} + +def Shift2 { + dag Right = (RLWINM Swap1.Bit, 30, 2, 31); + dag Left = (RLWINM Swap1.Bit, 2, 0, 29); +} + +def Swap2 { + dag Bits = (OR (AND Shift2.Right, MaskValues.Lo2), + (AND Shift2.Left, MaskValues.Hi2)); +} + +def Shift4 { + dag Right = (RLWINM Swap2.Bits, 28, 4, 31); + dag Left = (RLWINM Swap2.Bits, 4, 0, 27); +} + +def Swap4 { + dag Bits = (OR (AND Shift4.Right, MaskValues.Lo4), + (AND Shift4.Left, MaskValues.Hi4)); +} + +def Rotate { + dag Left3Bytes = (RLWINM Swap4.Bits, 24, 0, 31); +} + +def RotateInsertByte3 { + dag Left = (RLWIMI Rotate.Left3Bytes, Swap4.Bits, 8, 8, 15); +} + +def RotateInsertByte1 { + dag Left = (RLWIMI RotateInsertByte3.Left, Swap4.Bits, 8, 24, 31); +} + +def : Pat<(i32 (bitreverse i32:$A)), + (RLDICL_32 RotateInsertByte1.Left, 0, 32)>; + +// Fast 64-bit reverse bits algorithm: +// Step 1: 1-bit swap (swap odd 1-bit and even 1-bit): +// n = ((n >> 1) & 0x5555555555555555) | ((n << 1) & 0xAAAAAAAAAAAAAAAA); +// Step 2: 2-bit swap (swap odd 2-bit and even 2-bit): +// n = ((n >> 2) & 0x3333333333333333) | ((n << 2) & 0xCCCCCCCCCCCCCCCC); +// Step 3: 4-bit swap (swap odd 4-bit and even 4-bit): +// n = ((n >> 4) & 0x0F0F0F0F0F0F0F0F) | ((n << 4) & 0xF0F0F0F0F0F0F0F0); +// Step 4: byte reverse (Suppose n = [B1,B2,B3,B4,B5,B6,B7,B8]): +// Apply the same byte reverse algorithm mentioned above for the fast 32-bit +// reverse to both the high 32 bit and low 32 bit of the 64 bit value. And +// then OR them together to get the final result. +def MaskValues64 { + dag Lo1 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo1, sub_32)); + dag Hi1 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi1, sub_32)); + dag Lo2 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo2, sub_32)); + dag Hi2 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi2, sub_32)); + dag Lo4 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Lo4, sub_32)); + dag Hi4 = (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), MaskValues.Hi4, sub_32)); +} + +def DWMaskValues { + dag Lo1 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo1, 32, 31), 0x5555), 0x5555); + dag Hi1 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi1, 32, 31), 0xAAAA), 0xAAAA); + dag Lo2 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo2, 32, 31), 0x3333), 0x3333); + dag Hi2 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi2, 32, 31), 0xCCCC), 0xCCCC); + dag Lo4 = (ORI8 (ORIS8 (RLDICR MaskValues64.Lo4, 32, 31), 0x0F0F), 0x0F0F); + dag Hi4 = (ORI8 (ORIS8 (RLDICR MaskValues64.Hi4, 32, 31), 0xF0F0), 0xF0F0); +} + +def DWShift1 { + dag Right = (RLDICL $A, 63, 1); + dag Left = (RLDICR $A, 1, 62); +} + +def DWSwap1 { + dag Bit = (OR8 (AND8 DWShift1.Right, DWMaskValues.Lo1), + (AND8 DWShift1.Left, DWMaskValues.Hi1)); +} + +def DWShift2 { + dag Right = (RLDICL DWSwap1.Bit, 62, 2); + dag Left = (RLDICR DWSwap1.Bit, 2, 61); +} + +def DWSwap2 { + dag Bits = (OR8 (AND8 DWShift2.Right, DWMaskValues.Lo2), + (AND8 DWShift2.Left, DWMaskValues.Hi2)); +} + +def DWShift4 { + dag Right = (RLDICL DWSwap2.Bits, 60, 4); + dag Left = (RLDICR DWSwap2.Bits, 4, 59); +} + +def DWSwap4 { + dag Bits = (OR8 (AND8 DWShift4.Right, DWMaskValues.Lo4), + (AND8 DWShift4.Left, DWMaskValues.Hi4)); +} + +// Bit swap is done, now start byte swap. +def DWExtractLo32 { + dag SubReg = (i32 (EXTRACT_SUBREG DWSwap4.Bits, sub_32)); +} + +def DWRotateLo32 { + dag Left24 = (RLWINM DWExtractLo32.SubReg, 24, 0, 31); +} + +def DWLo32RotateInsertByte3 { + dag Left = (RLWIMI DWRotateLo32.Left24, DWExtractLo32.SubReg, 8, 8, 15); +} + +// Lower 32 bits in the right order +def DWLo32RotateInsertByte1 { + dag Left = + (RLWIMI DWLo32RotateInsertByte3.Left, DWExtractLo32.SubReg, 8, 24, 31); +} + +def ExtendLo32 { + dag To64Bit = + (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + DWLo32RotateInsertByte1.Left, sub_32)); +} + +def DWShiftHi32 { // SRDI DWSwap4.Bits, 32) + dag ToLo32 = (RLDICL DWSwap4.Bits, 32, 32); +} + +def DWExtractHi32 { + dag SubReg = (i32 (EXTRACT_SUBREG DWShiftHi32.ToLo32, sub_32)); +} + +def DWRotateHi32 { + dag Left24 = (RLWINM DWExtractHi32.SubReg, 24, 0, 31); +} + +def DWHi32RotateInsertByte3 { + dag Left = (RLWIMI DWRotateHi32.Left24, DWExtractHi32.SubReg, 8, 8, 15); +} + +// High 32 bits in the right order, but in the low 32-bit position +def DWHi32RotateInsertByte1 { + dag Left = + (RLWIMI DWHi32RotateInsertByte3.Left, DWExtractHi32.SubReg, 8, 24, 31); +} + +def ExtendHi32 { + dag To64Bit = + (i64 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + DWHi32RotateInsertByte1.Left, sub_32)); +} + +def DWShiftLo32 { // SLDI ExtendHi32.To64Bit, 32 + dag ToHi32 = (RLDICR ExtendHi32.To64Bit, 32, 31); +} + +def : Pat<(i64 (bitreverse i64:$A)), + (OR8 DWShiftLo32.ToHi32, ExtendLo32.To64Bit)>; diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 9cfc897cdb3f2..43635a8919e2b 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -1901,6 +1901,98 @@ let Predicates = [IsLittleEndian, HasVSX] in def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; +// Variable index unsigned vector_extract on Power9 +let Predicates = [HasP9Altivec, IsLittleEndian] in { + def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), + (VEXTUBRX $Idx, $S)>; + + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))), + (VEXTUHRX (RLWINM8 $Idx, 1, 28, 30), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))), + (VEXTUHRX (LI8 0), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))), + (VEXTUHRX (LI8 2), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))), + (VEXTUHRX (LI8 4), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))), + (VEXTUHRX (LI8 6), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))), + (VEXTUHRX (LI8 8), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))), + (VEXTUHRX (LI8 10), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))), + (VEXTUHRX (LI8 12), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))), + (VEXTUHRX (LI8 14), $S)>; + + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))), + (VEXTUWRX (LI8 0), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), + (VEXTUWRX (LI8 4), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), + (VEXTUWRX (LI8 8), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), + (VEXTUWRX (LI8 12), $S)>; + + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (EXTSW (VEXTUWRX (RLWINM8 $Idx, 2, 28, 29), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))), + (EXTSW (VEXTUWRX (LI8 0), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), + (EXTSW (VEXTUWRX (LI8 4), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), + (EXTSW (VEXTUWRX (LI8 8), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), + (EXTSW (VEXTUWRX (LI8 12), $S))>; +} +let Predicates = [HasP9Altivec, IsBigEndian] in { + def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), + (VEXTUBLX $Idx, $S)>; + + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, i64:$Idx)))), + (VEXTUHLX (RLWINM8 $Idx, 1, 28, 30), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 0)))), + (VEXTUHLX (LI8 0), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 1)))), + (VEXTUHLX (LI8 2), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 2)))), + (VEXTUHLX (LI8 4), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 3)))), + (VEXTUHLX (LI8 6), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 4)))), + (VEXTUHLX (LI8 8), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 5)))), + (VEXTUHLX (LI8 10), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 6)))), + (VEXTUHLX (LI8 12), $S)>; + def : Pat<(i64 (anyext (i32 (vector_extract v8i16:$S, 7)))), + (VEXTUHLX (LI8 14), $S)>; + + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 0)))), + (VEXTUWLX (LI8 0), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 1)))), + (VEXTUWLX (LI8 4), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 2)))), + (VEXTUWLX (LI8 8), $S)>; + def : Pat<(i64 (zext (i32 (vector_extract v4i32:$S, 3)))), + (VEXTUWLX (LI8 12), $S)>; + + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, i64:$Idx)))), + (EXTSW (VEXTUWLX (RLWINM8 $Idx, 2, 28, 29), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 0)))), + (EXTSW (VEXTUWLX (LI8 0), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 1)))), + (EXTSW (VEXTUWLX (LI8 4), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 2)))), + (EXTSW (VEXTUWLX (LI8 8), $S))>; + def : Pat<(i64 (sext (i32 (vector_extract v4i32:$S, 3)))), + (EXTSW (VEXTUWLX (LI8 12), $S))>; +} + let Predicates = [IsLittleEndian, HasDirectMove] in { // v16i8 scalar <-> vector conversions (LE) def : Pat<(v16i8 (scalar_to_vector i32:$A)), @@ -2729,36 +2821,54 @@ def DblToFlt { } def ByteToWord { - dag A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8)); - dag A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8)); - dag A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8)); - dag A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8)); + dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 0)), i8)); + dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 4)), i8)); + dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 8)), i8)); + dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 12)), i8)); + dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 3)), i8)); + dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 7)), i8)); + dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 11)), i8)); + dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v16i8:$A, 15)), i8)); } def ByteToDWord { - dag A0 = (i64 (sext_inreg - (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8)); - dag A1 = (i64 (sext_inreg - (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8)); + dag LE_A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 0)))), i8)); + dag LE_A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 8)))), i8)); + dag BE_A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 7)))), i8)); + dag BE_A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v16i8:$A, 15)))), i8)); } def HWordToWord { - dag A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16)); - dag A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16)); - dag A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16)); - dag A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16)); + dag LE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 0)), i16)); + dag LE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 2)), i16)); + dag LE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 4)), i16)); + dag LE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 6)), i16)); + dag BE_A0 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 1)), i16)); + dag BE_A1 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 3)), i16)); + dag BE_A2 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 5)), i16)); + dag BE_A3 = (i32 (sext_inreg (i32 (vector_extract v8i16:$A, 7)), i16)); } def HWordToDWord { - dag A0 = (i64 (sext_inreg - (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16)); - dag A1 = (i64 (sext_inreg - (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16)); + dag LE_A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 0)))), i16)); + dag LE_A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 4)))), i16)); + dag BE_A0 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 3)))), i16)); + dag BE_A1 = (i64 (sext_inreg + (i64 (anyext (i32 (vector_extract v8i16:$A, 7)))), i16)); } def WordToDWord { - dag A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0)))); - dag A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2)))); + dag LE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 0)))); + dag LE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 2)))); + dag BE_A0 = (i64 (sext (i32 (vector_extract v4i32:$A, 1)))); + dag BE_A1 = (i64 (sext (i32 (vector_extract v4i32:$A, 3)))); } def FltToIntLoad { @@ -3016,18 +3126,46 @@ let AddedComplexity = 400 in { // P9 Altivec instructions that can be used to build vectors. // Adding them to PPCInstrVSX.td rather than PPCAltivecVSX.td to compete // with complexities of existing build vector patterns in this file. - let Predicates = [HasP9Altivec] in { - def : Pat<(v2i64 (build_vector WordToDWord.A0, WordToDWord.A1)), + let Predicates = [HasP9Altivec, IsLittleEndian] in { + def : Pat<(v2i64 (build_vector WordToDWord.LE_A0, WordToDWord.LE_A1)), (v2i64 (VEXTSW2D $A))>; - def : Pat<(v2i64 (build_vector HWordToDWord.A0, HWordToDWord.A1)), + def : Pat<(v2i64 (build_vector HWordToDWord.LE_A0, HWordToDWord.LE_A1)), (v2i64 (VEXTSH2D $A))>; - def : Pat<(v4i32 (build_vector HWordToWord.A0, HWordToWord.A1, - HWordToWord.A2, HWordToWord.A3)), + def : Pat<(v4i32 (build_vector HWordToWord.LE_A0, HWordToWord.LE_A1, + HWordToWord.LE_A2, HWordToWord.LE_A3)), (v4i32 (VEXTSH2W $A))>; - def : Pat<(v4i32 (build_vector ByteToWord.A0, ByteToWord.A1, - ByteToWord.A2, ByteToWord.A3)), + def : Pat<(v4i32 (build_vector ByteToWord.LE_A0, ByteToWord.LE_A1, + ByteToWord.LE_A2, ByteToWord.LE_A3)), (v4i32 (VEXTSB2W $A))>; - def : Pat<(v2i64 (build_vector ByteToDWord.A0, ByteToDWord.A1)), + def : Pat<(v2i64 (build_vector ByteToDWord.LE_A0, ByteToDWord.LE_A1)), (v2i64 (VEXTSB2D $A))>; } + + let Predicates = [HasP9Altivec, IsBigEndian] in { + def : Pat<(v2i64 (build_vector WordToDWord.BE_A0, WordToDWord.BE_A1)), + (v2i64 (VEXTSW2D $A))>; + def : Pat<(v2i64 (build_vector HWordToDWord.BE_A0, HWordToDWord.BE_A1)), + (v2i64 (VEXTSH2D $A))>; + def : Pat<(v4i32 (build_vector HWordToWord.BE_A0, HWordToWord.BE_A1, + HWordToWord.BE_A2, HWordToWord.BE_A3)), + (v4i32 (VEXTSH2W $A))>; + def : Pat<(v4i32 (build_vector ByteToWord.BE_A0, ByteToWord.BE_A1, + ByteToWord.BE_A2, ByteToWord.BE_A3)), + (v4i32 (VEXTSB2W $A))>; + def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)), + (v2i64 (VEXTSB2D $A))>; + } + + let Predicates = [HasP9Altivec] in { + def: Pat<(v2i64 (PPCSExtVElems v16i8:$A)), + (v2i64 (VEXTSB2D $A))>; + def: Pat<(v2i64 (PPCSExtVElems v8i16:$A)), + (v2i64 (VEXTSH2D $A))>; + def: Pat<(v2i64 (PPCSExtVElems v4i32:$A)), + (v2i64 (VEXTSW2D $A))>; + def: Pat<(v4i32 (PPCSExtVElems v16i8:$A)), + (v4i32 (VEXTSB2W $A))>; + def: Pat<(v4i32 (PPCSExtVElems v8i16:$A)), + (v4i32 (VEXTSH2W $A))>; + } } diff --git a/lib/Target/PowerPC/PPCScheduleP9.td b/lib/Target/PowerPC/PPCScheduleP9.td index a9c1bd78b05e0..a01995a629c29 100644 --- a/lib/Target/PowerPC/PPCScheduleP9.td +++ b/lib/Target/PowerPC/PPCScheduleP9.td @@ -260,8 +260,8 @@ let SchedModel = P9Model in { // ***************** Defining Itinerary Class Resources ***************** - def : ItinRW<[P9_DFU_76C, IP_EXEC_1C, DISP_1C, DISP_1C], [IIC_IntSimple, - IIC_IntGeneral]>; + def : ItinRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C], + [IIC_IntSimple, IIC_IntGeneral]>; def : ItinRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], [IIC_IntISEL, IIC_IntRotate, IIC_IntShift]>; diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 5a97f595ad8cf..90d11f46a384d 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -272,6 +272,13 @@ public: return 16; } + + // DarwinABI has a 224-byte red zone. PPC32 SVR4ABI(Non-DarwinABI) has no + // red zone and PPC64 SVR4ABI has a 288-byte red zone. + unsigned getRedZoneSize() const { + return isDarwinABI() ? 224 : (isPPC64() ? 288 : 0); + } + bool hasHTM() const { return HasHTM; } bool hasFusion() const { return HasFusion; } bool hasFloat128() const { return HasFloat128; } diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp index 491eaf326a508..7d34efd4af3e0 100644 --- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -195,8 +195,10 @@ public: return false; // If we don't have VSX on the subtarget, don't do anything. + // Also, on Power 9 the load and store ops preserve element order and so + // the swaps are not required. const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>(); - if (!STI.hasVSX()) + if (!STI.hasVSX() || !STI.needsSwapsForVSXMemOps()) return false; bool Changed = false; diff --git a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index f85c0cf111c43..be83efc02d278 100644 --- a/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -34,7 +34,7 @@ public: void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsResolved) const override; MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; @@ -73,7 +73,7 @@ bool RISCVAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { void RISCVAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, uint64_t Value, - bool IsPCRel) const { + bool IsResolved) const { return; } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index d4454c271f5ac..0d021d67033e5 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -211,6 +211,7 @@ namespace { case Sparc::fixup_sparc_wplt30: if (Target.getSymA()->getSymbol().isTemporary()) return false; + LLVM_FALLTHROUGH; case Sparc::fixup_sparc_tls_gd_hi22: case Sparc::fixup_sparc_tls_gd_lo10: case Sparc::fixup_sparc_tls_gd_add: @@ -275,7 +276,7 @@ namespace { void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, - uint64_t Value, bool IsPCRel) const override { + uint64_t Value, bool IsResolved) const override { Value = adjustFixupValue(Fixup.getKind(), Value); if (!Value) return; // Doesn't change encoding. diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index 6b32a7926437a..51ac410a9c819 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -52,7 +52,7 @@ public: const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsResolved) const override; bool mayNeedRelaxation(const MCInst &Inst) const override { return false; } @@ -94,7 +94,7 @@ void SystemZMCAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, uint64_t Value, - bool IsPCRel) const { + bool IsResolved) const { MCFixupKind Kind = Fixup.getKind(); unsigned Offset = Fixup.getOffset(); unsigned BitSize = getFixupKindInfo(Kind).TargetSize; diff --git a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp index fe4b52b515e0c..73a1036f88e0c 100644 --- a/lib/Target/SystemZ/SystemZHazardRecognizer.cpp +++ b/lib/Target/SystemZ/SystemZHazardRecognizer.cpp @@ -26,7 +26,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" // This is the limit of processor resource usage at which the // scheduler should try to look for other instructions (not using the diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index fef4a8c92a362..2801141cd951f 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2224,15 +2224,12 @@ static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, // Lower a binary operation that produces two VT results, one in each // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, -// Extend extends Op0 to a GR128, and Opcode performs the GR128 operation -// on the extended Op0 and (unextended) Op1. Store the even register result +// and Opcode performs the GR128 operation. Store the even register result // in Even and the odd register result in Odd. static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, - unsigned Extend, unsigned Opcode, SDValue Op0, - SDValue Op1, SDValue &Even, SDValue &Odd) { - SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0); - SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, - SDValue(In128, 0), Op1); + unsigned Opcode, SDValue Op0, SDValue Op1, + SDValue &Even, SDValue &Odd) { + SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1); bool Is32Bit = is32Bit(VT); Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result); Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); @@ -2347,6 +2344,7 @@ static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, // Handle tests for order using (or (ogt y x) (oge x y)). case ISD::SETUO: Invert = true; + LLVM_FALLTHROUGH; case ISD::SETO: { assert(IsFP && "Unexpected integer comparison"); SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); @@ -2358,6 +2356,7 @@ static SDValue lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, // Handle <> tests using (or (ogt y x) (ogt x y)). case ISD::SETUEQ: Invert = true; + LLVM_FALLTHROUGH; case ISD::SETONE: { assert(IsFP && "Unexpected integer comparison"); SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); @@ -2962,7 +2961,7 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); else { - // Do a full 128-bit multiplication based on UMUL_LOHI64: + // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI: // // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64) // @@ -2980,10 +2979,10 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, SDValue RL = Op.getOperand(1); SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63); - // UMUL_LOHI64 returns the low result in the odd register and the high - // result in the even register. SMUL_LOHI is defined to return the - // low half first, so the results are in reverse order. - lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, + // SystemZISD::UMUL_LOHI returns the low result in the odd register and + // the high result in the even register. ISD::SMUL_LOHI is defined to + // return the low half first, so the results are in reverse order. + lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, LL, RL, Ops[1], Ops[0]); SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH); SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL); @@ -3004,10 +3003,10 @@ SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); else - // UMUL_LOHI64 returns the low result in the odd register and the high - // result in the even register. UMUL_LOHI is defined to return the - // low half first, so the results are in reverse order. - lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, + // SystemZISD::UMUL_LOHI returns the low result in the odd register and + // the high result in the even register. ISD::UMUL_LOHI is defined to + // return the low half first, so the results are in reverse order. + lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); return DAG.getMergeValues(Ops, DL); } @@ -3018,24 +3017,19 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, SDValue Op1 = Op.getOperand(1); EVT VT = Op.getValueType(); SDLoc DL(Op); - unsigned Opcode; - // We use DSGF for 32-bit division. - if (is32Bit(VT)) { + // We use DSGF for 32-bit division. This means the first operand must + // always be 64-bit, and the second operand should be 32-bit whenever + // that is possible, to improve performance. + if (is32Bit(VT)) Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); - Opcode = SystemZISD::SDIVREM32; - } else if (DAG.ComputeNumSignBits(Op1) > 32) { + else if (DAG.ComputeNumSignBits(Op1) > 32) Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); - Opcode = SystemZISD::SDIVREM32; - } else - Opcode = SystemZISD::SDIVREM64; - // DSG(F) takes a 64-bit dividend, so the even register in the GR128 - // input is "don't care". The instruction returns the remainder in - // the even register and the quotient in the odd register. + // DSG(F) returns the remainder in the even register and the + // quotient in the odd register. SDValue Ops[2]; - lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode, - Op0, Op1, Ops[1], Ops[0]); + lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]); return DAG.getMergeValues(Ops, DL); } @@ -3044,16 +3038,11 @@ SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, EVT VT = Op.getValueType(); SDLoc DL(Op); - // DL(G) uses a double-width dividend, so we need to clear the even - // register in the GR128 input. The instruction returns the remainder - // in the even register and the quotient in the odd register. + // DL(G) returns the remainder in the even register and the + // quotient in the odd register. SDValue Ops[2]; - if (is32Bit(VT)) - lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32, - Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); - else - lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64, - Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); + lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM, + Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); return DAG.getMergeValues(Ops, DL); } @@ -3193,13 +3182,13 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, SDLoc DL(Op); AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>( cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()); - SynchronizationScope FenceScope = static_cast<SynchronizationScope>( + SyncScope::ID FenceSSID = static_cast<SyncScope::ID>( cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue()); // The only fence that needs an instruction is a sequentially-consistent // cross-thread fence. if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && - FenceScope == CrossThread) { + FenceSSID == SyncScope::System) { return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other, Op.getOperand(0)), 0); @@ -4669,11 +4658,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(SELECT_CCMASK); OPCODE(ADJDYNALLOC); OPCODE(POPCNT); - OPCODE(UMUL_LOHI64); - OPCODE(SDIVREM32); - OPCODE(SDIVREM64); - OPCODE(UDIVREM32); - OPCODE(UDIVREM64); + OPCODE(UMUL_LOHI); + OPCODE(SDIVREM); + OPCODE(UDIVREM); OPCODE(MVC); OPCODE(MVC_LOOP); OPCODE(NC); @@ -5778,14 +5765,12 @@ SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, return DoneMBB; } -// Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true +// Emit an extension from a GR64 to a GR128. ClearEven is true // if the high register of the GR128 value must be cleared or false if -// it's "don't care". SubReg is subreg_l32 when extending a GR32 -// and subreg_l64 when extending a GR64. +// it's "don't care". MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, MachineBasicBlock *MBB, - bool ClearEven, - unsigned SubReg) const { + bool ClearEven) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); @@ -5808,7 +5793,7 @@ MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, In128 = NewIn128; } BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) - .addReg(In128).addReg(Src).addImm(SubReg); + .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64); MI.eraseFromParent(); return MBB; @@ -6172,12 +6157,10 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( case SystemZ::CondStoreF64Inv: return emitCondStore(MI, MBB, SystemZ::STD, 0, true); - case SystemZ::AEXT128_64: - return emitExt128(MI, MBB, false, SystemZ::subreg_l64); - case SystemZ::ZEXT128_32: - return emitExt128(MI, MBB, true, SystemZ::subreg_l32); - case SystemZ::ZEXT128_64: - return emitExt128(MI, MBB, true, SystemZ::subreg_l64); + case SystemZ::AEXT128: + return emitExt128(MI, MBB, false); + case SystemZ::ZEXT128: + return emitExt128(MI, MBB, true); case SystemZ::ATOMIC_SWAPW: return emitAtomicLoadBinary(MI, MBB, 0, 0); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 5dcb19c0a35db..6c9c404816f09 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -86,14 +86,11 @@ enum NodeType : unsigned { // Count number of bits set in operand 0 per byte. POPCNT, - // Wrappers around the ISD opcodes of the same name. The output and - // first input operands are GR128s. The trailing numbers are the - // widths of the second operand in bits. - UMUL_LOHI64, - SDIVREM32, - SDIVREM64, - UDIVREM32, - UDIVREM64, + // Wrappers around the ISD opcodes of the same name. The output is GR128. + // Input operands may be GR64 or GR32, depending on the instruction. + UMUL_LOHI, + SDIVREM, + UDIVREM, // Use a series of MVCs to copy bytes from one memory location to another. // The operands are: @@ -562,7 +559,7 @@ private: unsigned StoreOpcode, unsigned STOCOpcode, bool Invert) const; MachineBasicBlock *emitExt128(MachineInstr &MI, MachineBasicBlock *MBB, - bool ClearEven, unsigned SubReg) const; + bool ClearEven) const; MachineBasicBlock *emitAtomicLoadBinary(MachineInstr &MI, MachineBasicBlock *BB, unsigned BinOpcode, unsigned BitSize, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 98f66c29ae647..4569be7602e45 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -677,6 +677,22 @@ let Predicates = [FeatureLoadAndTrap] in { def LLGTAT : UnaryRXY<"llgtat", 0xE39C, null_frag, GR64, 4>; } +// Extend GR64s to GR128s. +let usesCustomInserter = 1 in + def ZEXT128 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; + +//===----------------------------------------------------------------------===// +// "Any" extensions +//===----------------------------------------------------------------------===// + +// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext. +def : Pat<(i64 (anyext GR32:$src)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>; + +// Extend GR64s to GR128s. +let usesCustomInserter = 1 in + def AEXT128 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; + //===----------------------------------------------------------------------===// // Truncations //===----------------------------------------------------------------------===// @@ -1216,13 +1232,17 @@ def MSG : BinaryRXY<"msg", 0xE30C, mul, GR64, load, 8>; // Multiplication of a register, producing two results. def MR : BinaryRR <"mr", 0x1C, null_frag, GR128, GR32>; def MLR : BinaryRRE<"mlr", 0xB996, null_frag, GR128, GR32>; -def MLGR : BinaryRRE<"mlgr", 0xB986, z_umul_lohi64, GR128, GR64>; +def MLGR : BinaryRRE<"mlgr", 0xB986, null_frag, GR128, GR64>; +def : Pat<(z_umul_lohi GR64:$src1, GR64:$src2), + (MLGR (AEXT128 GR64:$src1), GR64:$src2)>; // Multiplication of memory, producing two results. def M : BinaryRX <"m", 0x5C, null_frag, GR128, load, 4>; def MFY : BinaryRXY<"mfy", 0xE35C, null_frag, GR128, load, 4>; def ML : BinaryRXY<"ml", 0xE396, null_frag, GR128, load, 4>; -def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>; +def MLG : BinaryRXY<"mlg", 0xE386, null_frag, GR128, load, 8>; +def : Pat<(z_umul_lohi GR64:$src1, (i64 (load bdxaddr20only:$src2))), + (MLG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; //===----------------------------------------------------------------------===// // Division and remainder @@ -1230,19 +1250,38 @@ def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>; let hasSideEffects = 1 in { // Do not speculatively execute. // Division and remainder, from registers. - def DR : BinaryRR <"dr", 0x1D, null_frag, GR128, GR32>; - def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>; - def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>; - def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>; - def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>; + def DR : BinaryRR <"dr", 0x1D, null_frag, GR128, GR32>; + def DSGFR : BinaryRRE<"dsgfr", 0xB91D, null_frag, GR128, GR32>; + def DSGR : BinaryRRE<"dsgr", 0xB90D, null_frag, GR128, GR64>; + def DLR : BinaryRRE<"dlr", 0xB997, null_frag, GR128, GR32>; + def DLGR : BinaryRRE<"dlgr", 0xB987, null_frag, GR128, GR64>; // Division and remainder, from memory. - def D : BinaryRX <"d", 0x5D, null_frag, GR128, load, 4>; - def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>; - def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>; - def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>; - def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>; -} + def D : BinaryRX <"d", 0x5D, null_frag, GR128, load, 4>; + def DSGF : BinaryRXY<"dsgf", 0xE31D, null_frag, GR128, load, 4>; + def DSG : BinaryRXY<"dsg", 0xE30D, null_frag, GR128, load, 8>; + def DL : BinaryRXY<"dl", 0xE397, null_frag, GR128, load, 4>; + def DLG : BinaryRXY<"dlg", 0xE387, null_frag, GR128, load, 8>; +} +def : Pat<(z_sdivrem GR64:$src1, GR32:$src2), + (DSGFR (AEXT128 GR64:$src1), GR32:$src2)>; +def : Pat<(z_sdivrem GR64:$src1, (i32 (load bdxaddr20only:$src2))), + (DSGF (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; +def : Pat<(z_sdivrem GR64:$src1, GR64:$src2), + (DSGR (AEXT128 GR64:$src1), GR64:$src2)>; +def : Pat<(z_sdivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))), + (DSG (AEXT128 GR64:$src1), bdxaddr20only:$src2)>; + +def : Pat<(z_udivrem GR32:$src1, GR32:$src2), + (DLR (ZEXT128 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src1, + subreg_l32)), GR32:$src2)>; +def : Pat<(z_udivrem GR32:$src1, (i32 (load bdxaddr20only:$src2))), + (DL (ZEXT128 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src1, + subreg_l32)), bdxaddr20only:$src2)>; +def : Pat<(z_udivrem GR64:$src1, GR64:$src2), + (DLGR (ZEXT128 GR64:$src1), GR64:$src2)>; +def : Pat<(z_udivrem GR64:$src1, (i64 (load bdxaddr20only:$src2))), + (DLG (ZEXT128 GR64:$src1), bdxaddr20only:$src2)>; //===----------------------------------------------------------------------===// // Shifts @@ -1894,17 +1933,6 @@ def : Pat<(ctlz GR64:$src), let Predicates = [FeaturePopulationCount], Defs = [CC] in def POPCNT : UnaryRRE<"popcnt", 0xB9E1, z_popcnt, GR64, GR64>; -// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext. -def : Pat<(i64 (anyext GR32:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>; - -// Extend GR32s and GR64s to GR128s. -let usesCustomInserter = 1 in { - def AEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; - def ZEXT128_32 : Pseudo<(outs GR128:$dst), (ins GR32:$src), []>; - def ZEXT128_64 : Pseudo<(outs GR128:$dst), (ins GR64:$src), []>; -} - // Search a block of memory for a character. let mayLoad = 1, Defs = [CC] in defm SRST : StringRRE<"srst", 0xB25E, z_search_string>; diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp index 3a0e01da42f03..d4cd89ce590fc 100644 --- a/lib/Target/SystemZ/SystemZLDCleanup.cpp +++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp @@ -127,7 +127,7 @@ MachineInstr *SystemZLDCleanup::ReplaceTLSCall(MachineInstr *I, return Copy; } -// Create a virtal register in *TLSBaseAddrReg, and populate it by +// Create a virtual register in *TLSBaseAddrReg, and populate it by // inserting a copy instruction after I. Returns the new instruction. MachineInstr *SystemZLDCleanup::SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) { diff --git a/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/lib/Target/SystemZ/SystemZMachineScheduler.cpp index b6feaa49d8585..8342463c1086f 100644 --- a/lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ b/lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -18,7 +18,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" #ifndef NDEBUG // Print the set of SUs diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index ab2392809f3be..9c6d5819f8a7e 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -36,14 +36,10 @@ def SDT_ZWrapOffset : SDTypeProfile<1, 2, SDTCisSameAs<0, 2>, SDTCisPtrTy<0>]>; def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; -def SDT_ZGR128Binary32 : SDTypeProfile<1, 2, +def SDT_ZGR128Binary : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, - SDTCisVT<1, untyped>, - SDTCisVT<2, i32>]>; -def SDT_ZGR128Binary64 : SDTypeProfile<1, 2, - [SDTCisVT<0, untyped>, - SDTCisVT<1, untyped>, - SDTCisVT<2, i64>]>; + SDTCisInt<1>, + SDTCisInt<2>]>; def SDT_ZAtomicLoadBinaryW : SDTypeProfile<1, 5, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, @@ -185,11 +181,9 @@ def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask, [SDNPInGlue]>; def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>; -def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>; -def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>; -def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>; -def z_udivrem32 : SDNode<"SystemZISD::UDIVREM32", SDT_ZGR128Binary32>; -def z_udivrem64 : SDNode<"SystemZISD::UDIVREM64", SDT_ZGR128Binary64>; +def z_umul_lohi : SDNode<"SystemZISD::UMUL_LOHI", SDT_ZGR128Binary>; +def z_sdivrem : SDNode<"SystemZISD::SDIVREM", SDT_ZGR128Binary>; +def z_udivrem : SDNode<"SystemZISD::UDIVREM", SDT_ZGR128Binary>; def z_membarrier : SDNode<"SystemZISD::MEMBARRIER", SDTNone, [SDNPHasChain, SDNPSideEffect]>; diff --git a/lib/Target/SystemZ/SystemZScheduleZ13.td b/lib/Target/SystemZ/SystemZScheduleZ13.td index adc9f2976f871..72543c1eaee2e 100644 --- a/lib/Target/SystemZ/SystemZScheduleZ13.td +++ b/lib/Target/SystemZ/SystemZScheduleZ13.td @@ -15,7 +15,7 @@ def Z13Model : SchedMachineModel { let UnsupportedFeatures = Arch11UnsupportedFeatures.List; - + let IssueWidth = 8; let MicroOpBufferSize = 60; // Issue queues let LoadLatency = 1; // Optimistic load latency. @@ -159,7 +159,7 @@ def : InstRW<[FXb], (instregex "CondReturn$")>; // Select instructions //===----------------------------------------------------------------------===// -// Select pseudo +// Select pseudo def : InstRW<[FXa], (instregex "Select(32|64|32Mux)$")>; // CondStore pseudos @@ -226,7 +226,7 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>; def : InstRW<[FXa, Lat2], (instregex "LOCRMux$")>; def : InstRW<[FXa, Lat2], (instregex "LOC(G|FH)?R(Asm.*)?$")>; -def : InstRW<[FXa, Lat2], (instregex "LOC(G|H)?HI(Asm.*)?$")>; +def : InstRW<[FXa, Lat2], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>; def : InstRW<[FXa, LSU, Lat6], (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>; def : InstRW<[FXb, LSU, Lat5], (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>; @@ -282,7 +282,7 @@ def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone], (instregex "LM(H|Y|G)?$")>; // Load multiple disjoint -def : InstRW<[FXb, Lat30, GroupAlone], (instregex "LMD$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "LMD$")>; // Store multiple (estimated average of ceil(5/2) FXb ops) def : InstRW<[LSU, LSU, FXb, FXb, FXb, Lat10, @@ -446,13 +446,13 @@ def : InstRW<[FXa, Lat6], (instregex "MS(R|FI)$")>; def : InstRW<[FXa, LSU, Lat12], (instregex "MSG$")>; def : InstRW<[FXa, Lat8], (instregex "MSGR$")>; def : InstRW<[FXa, Lat6], (instregex "MSGF(I|R)$")>; -def : InstRW<[FXa, LSU, Lat15, GroupAlone], (instregex "MLG$")>; -def : InstRW<[FXa, Lat9, GroupAlone], (instregex "MLGR$")>; +def : InstRW<[FXa2, LSU, Lat15, GroupAlone], (instregex "MLG$")>; +def : InstRW<[FXa2, Lat9, GroupAlone], (instregex "MLGR$")>; def : InstRW<[FXa, Lat5], (instregex "MGHI$")>; def : InstRW<[FXa, Lat5], (instregex "MHI$")>; def : InstRW<[FXa, LSU, Lat9], (instregex "MH(Y)?$")>; -def : InstRW<[FXa, Lat7, GroupAlone], (instregex "M(L)?R$")>; -def : InstRW<[FXa, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; +def : InstRW<[FXa2, Lat7, GroupAlone], (instregex "M(L)?R$")>; +def : InstRW<[FXa2, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; //===----------------------------------------------------------------------===// // Division and remainder @@ -460,8 +460,8 @@ def : InstRW<[FXa, LSU, Lat7, GroupAlone], (instregex "M(FY|L)?$")>; def : InstRW<[FXa2, FXa2, Lat20, GroupAlone], (instregex "DR$")>; def : InstRW<[FXa2, FXa2, LSU, Lat30, GroupAlone], (instregex "D$")>; -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "DSG(F)?R$")>; -def : InstRW<[LSU, FXa, Lat30, GroupAlone], (instregex "DSG(F)?$")>; +def : InstRW<[FXa2, Lat30, GroupAlone], (instregex "DSG(F)?R$")>; +def : InstRW<[LSU, FXa2, Lat30, GroupAlone], (instregex "DSG(F)?$")>; def : InstRW<[FXa2, FXa2, Lat20, GroupAlone], (instregex "DLR$")>; def : InstRW<[FXa2, FXa2, Lat30, GroupAlone], (instregex "DLGR$")>; def : InstRW<[FXa2, FXa2, LSU, Lat30, GroupAlone], (instregex "DL(G)?$")>; @@ -474,7 +474,8 @@ def : InstRW<[FXa], (instregex "SLL(G|K)?$")>; def : InstRW<[FXa], (instregex "SRL(G|K)?$")>; def : InstRW<[FXa], (instregex "SRA(G|K)?$")>; def : InstRW<[FXa], (instregex "SLA(G|K)?$")>; -def : InstRW<[FXa, FXa, FXa, FXa, Lat8], (instregex "S(L|R)D(A|L)$")>; +def : InstRW<[FXa, FXa, FXa, FXa, LSU, Lat8, GroupAlone], + (instregex "S(L|R)D(A|L)$")>; // Rotate def : InstRW<[FXa, LSU, Lat6], (instregex "RLL(G)?$")>; @@ -537,7 +538,7 @@ def : InstRW<[FXb], (instregex "TMLH(64)?$")>; def : InstRW<[FXb], (instregex "TMLL(64)?$")>; // Compare logical characters under mask -def : InstRW<[FXb, LSU, Lat5], (instregex "CLM(H|Y)?$")>; +def : InstRW<[FXb, LSU, Lat6], (instregex "CLM(H|Y)?$")>; //===----------------------------------------------------------------------===// // Prefetch and execution hint @@ -573,7 +574,7 @@ def : InstRW<[FXa, FXa, FXb, FXb, LSU, FXb, FXb, LSU, LSU, Lat20, GroupAlone], (instregex "CDSG$")>; // Compare and swap and store -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "CSST$")>; +def : InstRW<[FXa, LSU, Lat30], (instregex "CSST$")>; // Perform locked operation def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PLO$")>; @@ -589,36 +590,45 @@ def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>; // Translate and convert //===----------------------------------------------------------------------===// -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "TR(T|TR)?(E|EOpt)?$")>; -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "TR(T|O)(T|O)(Opt)?$")>; -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "(CUUTF|CUTFU)(Opt)?$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "TR$")>; +def : InstRW<[FXa, FXa, FXa, LSU, LSU, Lat30, GroupAlone], (instregex "TRT$")>; +def : InstRW<[FXa, LSU, Lat30], (instregex "TRTR$")>; +def : InstRW<[FXa, Lat30], (instregex "TR(TR)?(T)?(E|EOpt)?$")>; +def : InstRW<[LSU, Lat30], (instregex "TR(T|O)(T|O)(Opt)?$")>; +def : InstRW<[FXa, Lat30], (instregex "CU(12|14|21|24|41|42)(Opt)?$")>; +def : InstRW<[FXa, Lat30], (instregex "(CUUTF|CUTFU)(Opt)?$")>; //===----------------------------------------------------------------------===// // Message-security assist //===----------------------------------------------------------------------===// -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "KM(C|F|O|CTR)?$")>; -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "(KIMD|KLMD|KMAC|PCC|PPNO)$")>; +def : InstRW<[FXa, Lat30], (instregex "KM(C|F|O|CTR)?$")>; +def : InstRW<[FXa, Lat30], (instregex "(KIMD|KLMD|KMAC|PCC|PPNO)$")>; //===----------------------------------------------------------------------===// // Decimal arithmetic //===----------------------------------------------------------------------===// -def : InstRW<[FXb, VecDF, LSU, Lat30, GroupAlone], (instregex "CVB(Y|G)?$")>; -def : InstRW<[FXb, VecDF, FXb, Lat30, GroupAlone], (instregex "CVD(Y|G)?$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z|O)$")>; +def : InstRW<[FXb, VecDF, VecDF, LSU, LSU, Lat30, GroupAlone], + (instregex "CVBG$")>; +def : InstRW<[FXb, VecDF, LSU, Lat30, GroupAlone], (instregex "CVB(Y)?$")>; +def : InstRW<[FXb, FXb, FXb, VecDF2, VecDF2, LSU, Lat30, GroupAlone], + (instregex "CVDG$")>; +def : InstRW<[FXb, VecDF, FXb, LSU, Lat30, GroupAlone], (instregex "CVD(Y)?$")>; +def : InstRW<[LSU, Lat10, GroupAlone], (instregex "MVO$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MV(N|Z)$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(PACK|PKA|PKU)$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UNPK(A|U)?$")>; +def : InstRW<[LSU, Lat12, GroupAlone], (instregex "UNPK(A|U)$")>; +def : InstRW<[FXb, LSU, LSU, Lat9, BeginGroup], (instregex "UNPK$")>; -def : InstRW<[FXb, VecDFX, LSU, LSU, Lat9, GroupAlone], +def : InstRW<[FXb, VecDFX, LSU, LSU, LSU, Lat9, GroupAlone], (instregex "(A|S|ZA)P$")>; -def : InstRW<[FXb, VecDFX2, LSU, LSU, Lat30, GroupAlone], +def : InstRW<[FXb, VecDFX2, VecDFX2, LSU, LSU, LSU, Lat30, GroupAlone], (instregex "(M|D)P$")>; -def : InstRW<[FXb, FXb, VecDFX2, LSU, LSU, LSU, Lat15, GroupAlone], +def : InstRW<[FXb, VecDFX, VecDFX, LSU, LSU, Lat15, GroupAlone], (instregex "SRP$")>; def : InstRW<[VecDFX, LSU, LSU, Lat5, GroupAlone], (instregex "CP$")>; -def : InstRW<[VecDFX, LSU, Lat4, GroupAlone], (instregex "TP$")>; +def : InstRW<[VecDFX, LSU, Lat4, BeginGroup], (instregex "TP$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "ED(MK)?$")>; //===----------------------------------------------------------------------===// @@ -688,25 +698,25 @@ def : InstRW<[FXb], (instregex "PPA$")>; //===----------------------------------------------------------------------===// // Find leftmost one -def : InstRW<[FXa, Lat6, GroupAlone], (instregex "FLOGR$")>; +def : InstRW<[FXa, FXa, Lat6, GroupAlone], (instregex "FLOGR$")>; // Population count def : InstRW<[FXa, Lat3], (instregex "POPCNT$")>; // Extend -def : InstRW<[FXa], (instregex "AEXT128_64$")>; -def : InstRW<[FXa], (instregex "ZEXT128_(32|64)$")>; +def : InstRW<[FXa], (instregex "AEXT128$")>; +def : InstRW<[FXa], (instregex "ZEXT128$")>; // String instructions def : InstRW<[FXa, LSU, Lat30], (instregex "SRST$")>; -def : InstRW<[LSU, Lat30], (instregex "SRSTU$")>; +def : InstRW<[FXa, Lat30], (instregex "SRSTU$")>; def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CUSE$")>; // Various complex instructions -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CFC$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "UPT$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CKSM$")>; -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CMPSC$")>; +def : InstRW<[LSU, Lat30], (instregex "CFC$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "UPT$")>; +def : InstRW<[LSU, Lat30], (instregex "CKSM$")>; +def : InstRW<[FXa, Lat30], (instregex "CMPSC$")>; // Execute def : InstRW<[FXb, GroupAlone], (instregex "EX(RL)?$")>; @@ -833,7 +843,7 @@ def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXBR(A)?$")>; // Addition def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D)B$")>; def : InstRW<[VecBF], (instregex "A(E|D)BR$")>; -def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXBR$")>; +def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXBR$")>; // Subtraction def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D)B$")>; @@ -848,9 +858,9 @@ def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDBR$")>; def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXBR$")>; // Multiply and add / subtract -def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>; def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)EBR$")>; -def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>; def : InstRW<[VecBF], (instregex "M(A|S)DBR$")>; // Division @@ -859,7 +869,7 @@ def : InstRW<[VecFPd], (instregex "D(E|D)BR$")>; def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXBR$")>; // Divide to integer -def : InstRW<[VecFPd, Lat30, GroupAlone], (instregex "DI(E|D)BR$")>; +def : InstRW<[VecFPd, Lat30], (instregex "DI(E|D)BR$")>; //===----------------------------------------------------------------------===// // FP: Comparisons @@ -882,8 +892,8 @@ def : InstRW<[FXa, LSU, Lat4, GroupAlone], (instregex "EFPC$")>; def : InstRW<[FXb, LSU, Lat5, GroupAlone], (instregex "STFPC$")>; def : InstRW<[LSU, Lat3, GroupAlone], (instregex "SFPC$")>; def : InstRW<[LSU, LSU, Lat6, GroupAlone], (instregex "LFPC$")>; -def : InstRW<[FXa, Lat30, GroupAlone], (instregex "SFASR$")>; -def : InstRW<[FXa, LSU, Lat30, GroupAlone], (instregex "LFAS$")>; +def : InstRW<[FXa, Lat30], (instregex "SFASR$")>; +def : InstRW<[FXa, LSU, Lat30], (instregex "LFAS$")>; def : InstRW<[FXb, Lat3, GroupAlone], (instregex "SRNM(B|T)?$")>; @@ -904,7 +914,7 @@ def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXR$")>; // Load rounded def : InstRW<[VecBF], (instregex "(LEDR|LRER)$")>; def : InstRW<[VecBF], (instregex "LEXR$")>; -def : InstRW<[VecDF2, VecDF2], (instregex "(LDXR|LRDR)$")>; +def : InstRW<[VecDF2], (instregex "(LDXR|LRDR)$")>; // Load lengthened def : InstRW<[LSU], (instregex "LDE$")>; @@ -955,7 +965,7 @@ def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXR$")>; // Addition def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D|U|W)$")>; def : InstRW<[VecBF], (instregex "A(E|D|U|W)R$")>; -def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXR$")>; +def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXR$")>; // Subtraction def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D|U|W)$")>; @@ -968,16 +978,20 @@ def : InstRW<[VecBF], (instregex "M(D|DE|E|EE)R$")>; def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MXD$")>; def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDR$")>; def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXR$")>; -def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MY(H|L)?$")>; -def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MY(H|L)?R$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MY$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "MY(H|L)$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MYR$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "MY(H|L)R$")>; // Multiply and add / subtract -def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)E$")>; def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)ER$")>; -def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>; -def : InstRW<[VecBF], (instregex "M(A|S)DR$")>; -def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)?$")>; -def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MAY(H|L)?R$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "M(A|S)D$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)DR$")>; +def : InstRW<[VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY(H|L)$")>; +def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MAY$")>; +def : InstRW<[VecBF, GroupAlone], (instregex "MAY(H|L)R$")>; +def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MAYR$")>; // Division def : InstRW<[VecFPd, LSU], (instregex "D(E|D)$")>; @@ -989,8 +1003,8 @@ def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXR$")>; //===----------------------------------------------------------------------===// // Compare -def : InstRW<[VecXsPm, LSU, Lat8], (instregex "C(E|D)$")>; -def : InstRW<[VecXsPm, Lat4], (instregex "C(E|D)R$")>; +def : InstRW<[VecBF, LSU, Lat12], (instregex "C(E|D)$")>; +def : InstRW<[VecBF], (instregex "C(E|D)R$")>; def : InstRW<[VecDF, VecDF, Lat20, GroupAlone], (instregex "CXR$")>; @@ -1032,7 +1046,7 @@ def : InstRW<[FXb, VecDF, VecDF, Lat30, BeginGroup], (instregex "CL(F|G)XTR$")>; def : InstRW<[FXb, VecDF, Lat9, BeginGroup], (instregex "CD(S|U)TR$")>; def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "CX(S|U)TR$")>; def : InstRW<[FXb, VecDF, Lat12, BeginGroup], (instregex "C(S|U)DTR$")>; -def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, BeginGroup], (instregex "C(S|U)XTR$")>; +def : InstRW<[FXb, FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "C(S|U)XTR$")>; // Convert from / to zoned def : InstRW<[LSU, VecDF, Lat11, BeginGroup], (instregex "CDZT$")>; @@ -1047,7 +1061,7 @@ def : InstRW<[FXb, LSU, VecDF, Lat11, BeginGroup], (instregex "CPDT$")>; def : InstRW<[FXb, LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "CPXT$")>; // Perform floating-point operation -def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PFPO$")>; +def : InstRW<[FXb, Lat30], (instregex "PFPO$")>; //===----------------------------------------------------------------------===// // DFP: Unary arithmetic @@ -1071,7 +1085,7 @@ def : InstRW<[FXb, VecDF, VecDF, Lat15, BeginGroup], (instregex "ESXTR$")>; // Addition def : InstRW<[VecDF], (instregex "ADTR(A)?$")>; -def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXTR(A)?$")>; +def : InstRW<[VecDF2, VecDF2, Lat10, GroupAlone], (instregex "AXTR(A)?$")>; // Subtraction def : InstRW<[VecDF], (instregex "SDTR(A)?$")>; @@ -1090,15 +1104,15 @@ def : InstRW<[VecDF], (instregex "QADTR$")>; def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "QAXTR$")>; // Reround -def : InstRW<[FXb, VecDF, Lat11], (instregex "RRDTR$")>; +def : InstRW<[FXb, VecDF, Lat11, BeginGroup], (instregex "RRDTR$")>; def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "RRXTR$")>; // Shift significand left/right -def : InstRW<[LSU, VecDF, Lat11], (instregex "S(L|R)DT$")>; +def : InstRW<[LSU, VecDF, Lat11, GroupAlone], (instregex "S(L|R)DT$")>; def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "S(L|R)XT$")>; // Insert biased exponent -def : InstRW<[FXb, VecDF, Lat11], (instregex "IEDTR$")>; +def : InstRW<[FXb, VecDF, Lat11, BeginGroup], (instregex "IEDTR$")>; def : InstRW<[FXb, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "IEXTR$")>; //===----------------------------------------------------------------------===// @@ -1115,7 +1129,7 @@ def : InstRW<[VecDF], (instregex "CEXTR$")>; // Test Data Class/Group def : InstRW<[LSU, VecDF, Lat11], (instregex "TD(C|G)(E|D)T$")>; -def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>; +def : InstRW<[LSU, VecDF, VecDF, Lat15, GroupAlone], (instregex "TD(C|G)XT$")>; // --------------------------------- Vector --------------------------------- // @@ -1271,32 +1285,43 @@ def : InstRW<[VecStr, Lat5], (instregex "VTM$")>; // Vector: Floating-point arithmetic //===----------------------------------------------------------------------===// -def : InstRW<[VecBF2], (instregex "VCD(G|GB|LG|LGB)$")>; -def : InstRW<[VecBF], (instregex "WCD(GB|LGB)$")>; +// Conversion and rounding +def : InstRW<[VecBF2], (instregex "VCD(L)?G$")>; +def : InstRW<[VecBF2], (instregex "VCD(L)?GB$")>; +def : InstRW<[VecBF], (instregex "WCD(L)?GB$")>; def : InstRW<[VecBF2], (instregex "VC(L)?GD$")>; -def : InstRW<[VecBF2], (instregex "VFADB$")>; -def : InstRW<[VecBF], (instregex "WFADB$")>; -def : InstRW<[VecBF2], (instregex "VCGDB$")>; -def : InstRW<[VecBF], (instregex "WCGDB$")>; -def : InstRW<[VecBF2], (instregex "VF(I|M|A|S)$")>; -def : InstRW<[VecBF2], (instregex "VF(I|M|S)DB$")>; -def : InstRW<[VecBF], (instregex "WF(I|M|S)DB$")>; -def : InstRW<[VecBF2], (instregex "VCLGDB$")>; -def : InstRW<[VecBF], (instregex "WCLGDB$")>; -def : InstRW<[VecXsPm], (instregex "VFL(C|N|P)DB$")>; -def : InstRW<[VecXsPm], (instregex "WFL(C|N|P)DB$")>; -def : InstRW<[VecBF2], (instregex "VFM(A|S)$")>; -def : InstRW<[VecBF2], (instregex "VFM(A|S)DB$")>; -def : InstRW<[VecBF], (instregex "WFM(A|S)DB$")>; -def : InstRW<[VecXsPm], (instregex "VFPSO$")>; -def : InstRW<[VecXsPm], (instregex "(V|W)FPSODB$")>; -def : InstRW<[VecXsPm, Lat4], (instregex "VFTCI(DB)?$")>; -def : InstRW<[VecXsPm, Lat4], (instregex "WFTCIDB$")>; +def : InstRW<[VecBF2], (instregex "VC(L)?GDB$")>; +def : InstRW<[VecBF], (instregex "WC(L)?GDB$")>; def : InstRW<[VecBF2], (instregex "VL(DE|ED)$")>; def : InstRW<[VecBF2], (instregex "VL(DE|ED)B$")>; def : InstRW<[VecBF], (instregex "WL(DE|ED)B$")>; +def : InstRW<[VecBF2], (instregex "VFI$")>; +def : InstRW<[VecBF2], (instregex "VFIDB$")>; +def : InstRW<[VecBF], (instregex "WFIDB$")>; + +// Sign operations +def : InstRW<[VecXsPm], (instregex "VFPSO$")>; +def : InstRW<[VecXsPm], (instregex "(V|W)FPSODB$")>; +def : InstRW<[VecXsPm], (instregex "(V|W)FL(C|N|P)DB$")>; -// divide / square root +// Test data class +def : InstRW<[VecXsPm, Lat4], (instregex "VFTCI$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "(V|W)FTCIDB$")>; + +// Add / subtract +def : InstRW<[VecBF2], (instregex "VF(A|S)$")>; +def : InstRW<[VecBF2], (instregex "VF(A|S)DB$")>; +def : InstRW<[VecBF], (instregex "WF(A|S)DB$")>; + +// Multiply / multiply-and-add/subtract +def : InstRW<[VecBF2], (instregex "VFM$")>; +def : InstRW<[VecBF2], (instregex "VFMDB$")>; +def : InstRW<[VecBF], (instregex "WFMDB$")>; +def : InstRW<[VecBF2], (instregex "VFM(A|S)$")>; +def : InstRW<[VecBF2], (instregex "VFM(A|S)DB$")>; +def : InstRW<[VecBF], (instregex "WFM(A|S)DB$")>; + +// Divide / square root def : InstRW<[VecFPd], (instregex "VFD$")>; def : InstRW<[VecFPd], (instregex "(V|W)FDDB$")>; def : InstRW<[VecFPd], (instregex "VFSQ$")>; @@ -1308,10 +1333,10 @@ def : InstRW<[VecFPd], (instregex "(V|W)FSQDB$")>; def : InstRW<[VecXsPm], (instregex "VFC(E|H|HE)$")>; def : InstRW<[VecXsPm], (instregex "VFC(E|H|HE)DB$")>; -def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)$")>; def : InstRW<[VecXsPm], (instregex "WFC(E|H|HE)DB$")>; def : InstRW<[VecXsPm, Lat4], (instregex "VFC(E|H|HE)DBS$")>; def : InstRW<[VecXsPm, Lat4], (instregex "WFC(E|H|HE)DBS$")>; +def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)$")>; def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)DB$")>; //===----------------------------------------------------------------------===// @@ -1351,12 +1376,12 @@ def : InstRW<[VecStr, Lat5], (instregex "VSTRCZ(B|F|H)S$")>; def : InstRW<[FXb, Lat30], (instregex "EPSW$")>; def : InstRW<[FXb, LSU, Lat30], (instregex "LPSW(E)?$")>; -def : InstRW<[FXa, Lat3], (instregex "IPK$")>; -def : InstRW<[LSU], (instregex "SPKA$")>; -def : InstRW<[LSU], (instregex "SSM$")>; -def : InstRW<[FXb], (instregex "ST(N|O)SM$")>; +def : InstRW<[FXa, Lat3, GroupAlone], (instregex "IPK$")>; +def : InstRW<[LSU, EndGroup], (instregex "SPKA$")>; +def : InstRW<[LSU, EndGroup], (instregex "SSM$")>; +def : InstRW<[FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>; def : InstRW<[FXa, Lat3], (instregex "IAC$")>; -def : InstRW<[LSU], (instregex "SAC(F)?$")>; +def : InstRW<[LSU, EndGroup], (instregex "SAC(F)?$")>; //===----------------------------------------------------------------------===// // System: Control Register Instructions @@ -1411,14 +1436,14 @@ def : InstRW<[FXb, LSU, Lat30], (instregex "TPROT$")>; def : InstRW<[FXa, FXa, FXb, LSU, Lat8, GroupAlone], (instregex "MVC(K|P|S)$")>; def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "MVC(S|D)K$")>; def : InstRW<[FXb, LSU, Lat30], (instregex "MVCOS$")>; -def : InstRW<[FXb, LSU, Lat30], (instregex "MVPG$")>; +def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVPG$")>; //===----------------------------------------------------------------------===// // System: Address-Space Instructions //===----------------------------------------------------------------------===// def : InstRW<[FXb, LSU, Lat30], (instregex "LASP$")>; -def : InstRW<[LSU], (instregex "PALB$")>; +def : InstRW<[LSU, GroupAlone], (instregex "PALB$")>; def : InstRW<[FXb, LSU, Lat30], (instregex "PC$")>; def : InstRW<[FXb, Lat30], (instregex "PR$")>; def : InstRW<[FXb, Lat30], (instregex "PT(I)?$")>; @@ -1430,7 +1455,7 @@ def : InstRW<[FXb, Lat20], (instregex "TAR$")>; // System: Linkage-Stack Instructions //===----------------------------------------------------------------------===// -def : InstRW<[FXb, Lat30], (instregex "BAKR$")>; +def : InstRW<[FXb, Lat30, EndGroup], (instregex "BAKR$")>; def : InstRW<[FXb, Lat30], (instregex "EREG(G)?$")>; def : InstRW<[FXb, Lat30], (instregex "(E|M)STA$")>; @@ -1442,13 +1467,13 @@ def : InstRW<[FXb, Lat30], (instregex "PTFF$")>; def : InstRW<[FXb, LSU, Lat20], (instregex "SCK$")>; def : InstRW<[FXb, Lat30], (instregex "SCKPF$")>; def : InstRW<[FXb, LSU, Lat20], (instregex "SCKC$")>; -def : InstRW<[LSU, GroupAlone], (instregex "SPT$")>; +def : InstRW<[LSU, LSU, GroupAlone], (instregex "SPT$")>; def : InstRW<[LSU, LSU, LSU, FXa, FXa, FXb, Lat9, GroupAlone], (instregex "STCK(F)?$")>; def : InstRW<[LSU, LSU, LSU, LSU, FXa, FXa, FXb, FXb, Lat11, GroupAlone], (instregex "STCKE$")>; def : InstRW<[FXb, LSU, Lat9], (instregex "STCKC$")>; -def : InstRW<[LSU, LSU, FXb, Lat3], (instregex "STPT$")>; +def : InstRW<[LSU, LSU, FXb, Lat5, BeginGroup], (instregex "STPT$")>; //===----------------------------------------------------------------------===// // System: CPU-Related Instructions @@ -1459,7 +1484,7 @@ def : InstRW<[FXb, LSU, Lat30], (instregex "STIDP$")>; def : InstRW<[FXb, LSU, Lat30], (instregex "STSI$")>; def : InstRW<[FXb, LSU, Lat30], (instregex "STFL(E)?$")>; def : InstRW<[FXb, LSU, Lat30], (instregex "ECAG$")>; -def : InstRW<[FXb, LSU, Lat30], (instregex "ECTG$")>; +def : InstRW<[FXa, LSU, Lat30], (instregex "ECTG$")>; def : InstRW<[FXb, Lat30], (instregex "PTF$")>; def : InstRW<[FXb, Lat30], (instregex "PCKMO$")>; @@ -1468,7 +1493,7 @@ def : InstRW<[FXb, Lat30], (instregex "PCKMO$")>; //===----------------------------------------------------------------------===// def : InstRW<[FXb, Lat30], (instregex "SVC$")>; -def : InstRW<[FXb], (instregex "MC$")>; +def : InstRW<[FXb, GroupAlone], (instregex "MC$")>; def : InstRW<[FXb, Lat30], (instregex "DIAG$")>; def : InstRW<[FXb], (instregex "TRAC(E|G)$")>; def : InstRW<[FXb, Lat30], (instregex "TRAP(2|4)$")>; @@ -1483,7 +1508,8 @@ def : InstRW<[FXb, LSU, Lat30], (instregex "SIE$")>; def : InstRW<[FXb], (instregex "LPP$")>; def : InstRW<[FXb, Lat30], (instregex "ECPGA$")>; def : InstRW<[FXb, Lat30], (instregex "E(C|P)CTR$")>; -def : InstRW<[FXb, LSU, Lat30], (instregex "L(C|P|S)CTL$")>; +def : InstRW<[FXb, Lat30], (instregex "LCCTL$")>; +def : InstRW<[FXb, LSU, Lat30], (instregex "L(P|S)CTL$")>; def : InstRW<[FXb, LSU, Lat30], (instregex "Q(S|CTR)I$")>; def : InstRW<[FXb, Lat30], (instregex "S(C|P)CTR$")>; diff --git a/lib/Target/SystemZ/SystemZScheduleZ196.td b/lib/Target/SystemZ/SystemZScheduleZ196.td index 128049a090863..e3e1999d8ad8d 100644 --- a/lib/Target/SystemZ/SystemZScheduleZ196.td +++ b/lib/Target/SystemZ/SystemZScheduleZ196.td @@ -627,8 +627,8 @@ def : InstRW<[FXU, Lat7, GroupAlone], (instregex "FLOGR$")>; def : InstRW<[FXU, Lat3], (instregex "POPCNT$")>; // Extend -def : InstRW<[FXU], (instregex "AEXT128_64$")>; -def : InstRW<[FXU], (instregex "ZEXT128_(32|64)$")>; +def : InstRW<[FXU], (instregex "AEXT128$")>; +def : InstRW<[FXU], (instregex "ZEXT128$")>; // String instructions def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>; diff --git a/lib/Target/SystemZ/SystemZScheduleZEC12.td b/lib/Target/SystemZ/SystemZScheduleZEC12.td index 76b378454631e..59f37205f4127 100644 --- a/lib/Target/SystemZ/SystemZScheduleZEC12.td +++ b/lib/Target/SystemZ/SystemZScheduleZEC12.td @@ -665,8 +665,8 @@ def : InstRW<[FXU, Lat7, GroupAlone], (instregex "FLOGR$")>; def : InstRW<[FXU, Lat3], (instregex "POPCNT$")>; // Extend -def : InstRW<[FXU], (instregex "AEXT128_64$")>; -def : InstRW<[FXU], (instregex "ZEXT128_(32|64)$")>; +def : InstRW<[FXU], (instregex "AEXT128$")>; +def : InstRW<[FXU], (instregex "ZEXT128$")>; // String instructions def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>; diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index ce5c57e0f519b..9ac768b2189d7 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -779,15 +779,14 @@ int SystemZTTIImpl:: getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { // vlvgp will insert two grs into a vector register, so only count half the // number of instructions. - if (Opcode == Instruction::InsertElement && - Val->getScalarType()->isIntegerTy(64)) + if (Opcode == Instruction::InsertElement && Val->isIntOrIntVectorTy(64)) return ((Index % 2 == 0) ? 1 : 0); if (Opcode == Instruction::ExtractElement) { int Cost = ((Val->getScalarSizeInBits() == 1) ? 2 /*+test-under-mask*/ : 1); // Give a slight penalty for moving out of vector pipeline to FXU unit. - if (Index == 0 && Val->getScalarType()->isIntegerTy()) + if (Index == 0 && Val->isIntOrIntVectorTy()) Cost += 1; return Cost; diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp index ad59f2f405879..00bf02469bdd9 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp @@ -115,8 +115,8 @@ void WebAssemblyTargetAsmStreamer::emitStackPointer(uint32_t Index) { void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; } void WebAssemblyTargetAsmStreamer::emitIndirectFunctionType( - StringRef name, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) { - OS << "\t.functype\t" << name; + MCSymbol *Symbol, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) { + OS << "\t.functype\t" << Symbol->getName(); if (Results.empty()) OS << ", void"; else { @@ -171,7 +171,7 @@ void WebAssemblyTargetELFStreamer::emitIndIdx(const MCExpr *Value) { } void WebAssemblyTargetELFStreamer::emitIndirectFunctionType( - StringRef name, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) { + MCSymbol *Symbol, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) { // Nothing to emit here. TODO: Re-design how linking works and re-evaluate // whether it's necessary for .o files to declare indirect function types. } @@ -255,9 +255,25 @@ void WebAssemblyTargetWasmStreamer::emitIndIdx(const MCExpr *Value) { } void WebAssemblyTargetWasmStreamer::emitIndirectFunctionType( - StringRef name, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) { - // Nothing to emit here. TODO: Re-design how linking works and re-evaluate - // whether it's necessary for .o files to declare indirect function types. + MCSymbol *Symbol, SmallVectorImpl<MVT> &Params, + SmallVectorImpl<MVT> &Results) { + MCSymbolWasm *WasmSym = cast<MCSymbolWasm>(Symbol); + if (WasmSym->isFunction()) { + // Symbol already has its arguments and result set. + return; + } + + SmallVector<wasm::ValType, 4> ValParams; + for (MVT Ty : Params) + ValParams.push_back(WebAssembly::toValType(Ty)); + + SmallVector<wasm::ValType, 1> ValResults; + for (MVT Ty : Results) + ValResults.push_back(WebAssembly::toValType(Ty)); + + WasmSym->setParams(std::move(ValParams)); + WasmSym->setReturns(std::move(ValResults)); + WasmSym->setIsFunction(true); } void WebAssemblyTargetWasmStreamer::emitGlobalImport(StringRef name) { diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h index 5ad147e5e5960..102d7219a1e74 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h @@ -44,7 +44,7 @@ public: /// .endfunc virtual void emitEndFunc() = 0; /// .functype - virtual void emitIndirectFunctionType(StringRef name, + virtual void emitIndirectFunctionType(MCSymbol *Symbol, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) = 0; /// .indidx @@ -69,7 +69,7 @@ public: void emitGlobal(ArrayRef<wasm::Global> Globals) override; void emitStackPointer(uint32_t Index) override; void emitEndFunc() override; - void emitIndirectFunctionType(StringRef name, + void emitIndirectFunctionType(MCSymbol *Symbol, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) override; void emitIndIdx(const MCExpr *Value) override; @@ -87,7 +87,7 @@ public: void emitGlobal(ArrayRef<wasm::Global> Globals) override; void emitStackPointer(uint32_t Index) override; void emitEndFunc() override; - void emitIndirectFunctionType(StringRef name, + void emitIndirectFunctionType(MCSymbol *Symbol, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) override; void emitIndIdx(const MCExpr *Value) override; @@ -105,7 +105,7 @@ public: void emitGlobal(ArrayRef<wasm::Global> Globals) override; void emitStackPointer(uint32_t Index) override; void emitEndFunc() override; - void emitIndirectFunctionType(StringRef name, + void emitIndirectFunctionType(MCSymbol *Symbol, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) override; void emitIndIdx(const MCExpr *Value) override; diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index f51585a10ca12..211358ad66cd5 100644 --- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -84,7 +84,7 @@ void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) { SmallVector<MVT, 4> Results; SmallVector<MVT, 4> Params; ComputeSignatureVTs(F, TM, Params, Results); - getTargetStreamer()->emitIndirectFunctionType(F.getName(), Params, + getTargetStreamer()->emitIndirectFunctionType(getSymbol(&F), Params, Results); } } @@ -214,11 +214,8 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCExpr *WebAssemblyAsmPrinter::lowerConstant(const Constant *CV) { if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) if (GV->getValueType()->isFunctionTy()) { - MCSymbol* Sym = getSymbol(GV); - if (!isa<MCSymbolELF>(Sym)) - cast<MCSymbolWasm>(Sym)->setIsFunction(true); return MCSymbolRefExpr::create( - Sym, MCSymbolRefExpr::VK_WebAssembly_FUNCTION, OutContext); + getSymbol(GV), MCSymbolRefExpr::VK_WebAssembly_FUNCTION, OutContext); } return AsmPrinter::lowerConstant(CV); } diff --git a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp index 1691808d05a0f..700111743ee8e 100644 --- a/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp +++ b/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp @@ -132,7 +132,7 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, // no blocks not dominated by the loop header. // - It's desirable to preserve the original block order when possible. // We use two ready lists; Preferred and Ready. Preferred has recently - // processed sucessors, to help preserve block sequences from the original + // processed successors, to help preserve block sequences from the original // order. Ready has the remaining ready blocks. PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>, CompareBlockNumbers> diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp index ff186eb915039..8880539804cae 100644 --- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -112,8 +112,6 @@ MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(MCSymbol *Sym, MCSymbolRefExpr::VariantKind VK = IsFunc ? MCSymbolRefExpr::VK_WebAssembly_FUNCTION : MCSymbolRefExpr::VK_None; - if (!isa<MCSymbolELF>(Sym)) - cast<MCSymbolWasm>(Sym)->setIsFunction(IsFunc); const MCExpr *Expr = MCSymbolRefExpr::create(Sym, VK, Ctx); diff --git a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp index c02ef4a1c399b..2599064334ee8 100644 --- a/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -394,11 +394,22 @@ RuntimeLibcallSignatures[RTLIB::UNKNOWN_LIBCALL] = { /* MEMMOVE */ iPTR_func_iPTR_iPTR_iPTR, // ELEMENT-WISE ATOMIC MEMORY -/* MEMCPY_ELEMENT_ATOMIC_1 */ iPTR_func_iPTR_iPTR_iPTR, -/* MEMCPY_ELEMENT_ATOMIC_2 */ iPTR_func_iPTR_iPTR_iPTR, -/* MEMCPY_ELEMENT_ATOMIC_4 */ iPTR_func_iPTR_iPTR_iPTR, -/* MEMCPY_ELEMENT_ATOMIC_8 */ iPTR_func_iPTR_iPTR_iPTR, -/* MEMCPY_ELEMENT_ATOMIC_16 */ iPTR_func_iPTR_iPTR_iPTR, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_2 */ unsupported, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2 */ unsupported, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported, + +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_2 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported, // EXCEPTION HANDLING /* UNWIND_RESUME */ unsupported, @@ -839,11 +850,21 @@ RuntimeLibcallNames[RTLIB::UNKNOWN_LIBCALL] = { /* MEMCPY */ "memcpy", /* MEMMOVE */ "memset", /* MEMSET */ "memmove", -/* MEMCPY_ELEMENT_ATOMIC_1 */ "MEMCPY_ELEMENT_ATOMIC_1", -/* MEMCPY_ELEMENT_ATOMIC_2 */ "MEMCPY_ELEMENT_ATOMIC_2", -/* MEMCPY_ELEMENT_ATOMIC_4 */ "MEMCPY_ELEMENT_ATOMIC_4", -/* MEMCPY_ELEMENT_ATOMIC_8 */ "MEMCPY_ELEMENT_ATOMIC_8", -/* MEMCPY_ELEMENT_ATOMIC_16 */ "MEMCPY_ELEMENT_ATOMIC_16", +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr, +/* MEMCPY_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr, +/* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr, /* UNWIND_RESUME */ "_Unwind_Resume", /* SYNC_VAL_COMPARE_AND_SWAP_1 */ "__sync_val_compare_and_swap_1", /* SYNC_VAL_COMPARE_AND_SWAP_2 */ "__sync_val_compare_and_swap_2", diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 825f23dc52d9b..c1d216c8b7af8 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2453,8 +2453,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, break; } - // In MS inline asm curly braces mark the begining/end of a block, therefore - // they should be interepreted as end of statement + // In MS inline asm curly braces mark the beginning/end of a block, + // therefore they should be interepreted as end of statement CurlyAsEndOfStatement = isParsingIntelSyntax() && isParsingInlineAsm() && (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly)); diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 5e809c34325ee..f5f3a4cc83dc9 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -1038,7 +1038,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::EXTRQI: if (MI->getOperand(2).isImm() && MI->getOperand(3).isImm()) - DecodeEXTRQIMask(MI->getOperand(2).getImm(), + DecodeEXTRQIMask(MVT::v16i8, MI->getOperand(2).getImm(), MI->getOperand(3).getImm(), ShuffleMask); @@ -1049,7 +1049,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, case X86::INSERTQI: if (MI->getOperand(3).isImm() && MI->getOperand(4).isImm()) - DecodeINSERTQIMask(MI->getOperand(3).getImm(), + DecodeINSERTQIMask(MVT::v16i8, MI->getOperand(3).getImm(), MI->getOperand(4).getImm(), ShuffleMask); diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 914fb36f91a7d..733eac7c03212 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -110,7 +110,7 @@ public: void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, - uint64_t Value, bool IsPCRel) const override { + uint64_t Value, bool IsResolved) const override { unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind()); assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 1be5aec849fc6..8a0fbfb45b22d 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -452,15 +452,20 @@ void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) { Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i); } -void DecodeEXTRQIMask(int Len, int Idx, +void DecodeEXTRQIMask(MVT VT, int Len, int Idx, SmallVectorImpl<int> &ShuffleMask) { + assert(VT.is128BitVector() && "Expected 128-bit vector"); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSize = VT.getScalarSizeInBits(); + unsigned HalfElts = NumElts / 2; + // Only the bottom 6 bits are valid for each immediate. Len &= 0x3F; Idx &= 0x3F; // We can only decode this bit extraction instruction as a shuffle if both the - // length and index work with whole bytes. - if (0 != (Len % 8) || 0 != (Idx % 8)) + // length and index work with whole elements. + if (0 != (Len % EltSize) || 0 != (Idx % EltSize)) return; // A length of zero is equivalent to a bit length of 64. @@ -469,33 +474,38 @@ void DecodeEXTRQIMask(int Len, int Idx, // If the length + index exceeds the bottom 64 bits the result is undefined. if ((Len + Idx) > 64) { - ShuffleMask.append(16, SM_SentinelUndef); + ShuffleMask.append(NumElts, SM_SentinelUndef); return; } - // Convert index and index to work with bytes. - Len /= 8; - Idx /= 8; + // Convert index and index to work with elements. + Len /= EltSize; + Idx /= EltSize; - // EXTRQ: Extract Len bytes starting from Idx. Zero pad the remaining bytes - // of the lower 64-bits. The upper 64-bits are undefined. + // EXTRQ: Extract Len elements starting from Idx. Zero pad the remaining + // elements of the lower 64-bits. The upper 64-bits are undefined. for (int i = 0; i != Len; ++i) ShuffleMask.push_back(i + Idx); - for (int i = Len; i != 8; ++i) + for (int i = Len; i != (int)HalfElts; ++i) ShuffleMask.push_back(SM_SentinelZero); - for (int i = 8; i != 16; ++i) + for (int i = HalfElts; i != (int)NumElts; ++i) ShuffleMask.push_back(SM_SentinelUndef); } -void DecodeINSERTQIMask(int Len, int Idx, +void DecodeINSERTQIMask(MVT VT, int Len, int Idx, SmallVectorImpl<int> &ShuffleMask) { + assert(VT.is128BitVector() && "Expected 128-bit vector"); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSize = VT.getScalarSizeInBits(); + unsigned HalfElts = NumElts / 2; + // Only the bottom 6 bits are valid for each immediate. Len &= 0x3F; Idx &= 0x3F; // We can only decode this bit insertion instruction as a shuffle if both the - // length and index work with whole bytes. - if (0 != (Len % 8) || 0 != (Idx % 8)) + // length and index work with whole elements. + if (0 != (Len % EltSize) || 0 != (Idx % EltSize)) return; // A length of zero is equivalent to a bit length of 64. @@ -504,24 +514,24 @@ void DecodeINSERTQIMask(int Len, int Idx, // If the length + index exceeds the bottom 64 bits the result is undefined. if ((Len + Idx) > 64) { - ShuffleMask.append(16, SM_SentinelUndef); + ShuffleMask.append(NumElts, SM_SentinelUndef); return; } - // Convert index and index to work with bytes. - Len /= 8; - Idx /= 8; + // Convert index and index to work with elements. + Len /= EltSize; + Idx /= EltSize; - // INSERTQ: Extract lowest Len bytes from lower half of second source and - // insert over first source starting at Idx byte. The upper 64-bits are + // INSERTQ: Extract lowest Len elements from lower half of second source and + // insert over first source starting at Idx element. The upper 64-bits are // undefined. for (int i = 0; i != Idx; ++i) ShuffleMask.push_back(i); for (int i = 0; i != Len; ++i) - ShuffleMask.push_back(i + 16); - for (int i = Idx + Len; i != 8; ++i) + ShuffleMask.push_back(i + NumElts); + for (int i = Idx + Len; i != (int)HalfElts; ++i) ShuffleMask.push_back(i); - for (int i = 8; i != 16; ++i) + for (int i = HalfElts; i != (int)NumElts; ++i) ShuffleMask.push_back(SM_SentinelUndef); } diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 17619d09d0594..251c9f7558ec7 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -134,12 +134,12 @@ void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask); void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &ShuffleMask); -/// Decode a SSE4A EXTRQ instruction as a v16i8 shuffle mask. -void DecodeEXTRQIMask(int Len, int Idx, +/// Decode a SSE4A EXTRQ instruction as a shuffle mask. +void DecodeEXTRQIMask(MVT VT, int Len, int Idx, SmallVectorImpl<int> &ShuffleMask); -/// Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask. -void DecodeINSERTQIMask(int Len, int Idx, +/// Decode a SSE4A INSERTQ instruction as a shuffle mask. +void DecodeINSERTQIMask(MVT VT, int Len, int Idx, SmallVectorImpl<int> &ShuffleMask); /// Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants. diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 7437ebacfac3a..4ca57fe9fb00f 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -451,6 +451,7 @@ class GoldmontProc<string Name> : ProcessorModel<Name, SLMModel, [ FeatureLAHFSAHF, FeatureMPX, FeatureSHA, + FeatureRDRAND, FeatureRDSEED, FeatureXSAVE, FeatureXSAVEOPT, diff --git a/lib/Target/X86/X86CallLowering.cpp b/lib/Target/X86/X86CallLowering.cpp index 161bfa7b54748..99aeec67c3266 100644 --- a/lib/Target/X86/X86CallLowering.cpp +++ b/lib/Target/X86/X86CallLowering.cpp @@ -19,6 +19,7 @@ #include "X86InstrInfo.h" #include "X86TargetMachine.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineValueType.h" @@ -35,7 +36,7 @@ using namespace llvm; X86CallLowering::X86CallLowering(const X86TargetLowering &TLI) : CallLowering(&TLI) {} -void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg, +bool X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs, const DataLayout &DL, MachineRegisterInfo &MRI, @@ -43,14 +44,24 @@ void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg, const X86TargetLowering &TLI = *getTLI<X86TargetLowering>(); LLVMContext &Context = OrigArg.Ty->getContext(); - EVT VT = TLI.getValueType(DL, OrigArg.Ty); + + SmallVector<EVT, 4> SplitVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); + + if (SplitVTs.size() != 1) { + // TODO: support struct/array split + return false; + } + + EVT VT = SplitVTs[0]; unsigned NumParts = TLI.getNumRegisters(Context, VT); if (NumParts == 1) { // replace the original type ( pointer -> GPR ). SplitArgs.emplace_back(OrigArg.Reg, VT.getTypeForEVT(Context), OrigArg.Flags, OrigArg.IsFixed); - return; + return true; } SmallVector<unsigned, 8> SplitRegs; @@ -67,6 +78,7 @@ void X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg, } PerformArgSplit(SplitRegs); + return true; } namespace { @@ -113,9 +125,11 @@ bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); SmallVector<ArgInfo, 8> SplitArgs; - splitToValueTypes( - OrigArg, SplitArgs, DL, MRI, - [&](ArrayRef<unsigned> Regs) { MIRBuilder.buildUnmerge(Regs, VReg); }); + if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI, + [&](ArrayRef<unsigned> Regs) { + MIRBuilder.buildUnmerge(Regs, VReg); + })) + return false; FuncReturnHandler Handler(MIRBuilder, MRI, MIB, RetCC_X86); if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) @@ -181,12 +195,23 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, SmallVector<ArgInfo, 8> SplitArgs; unsigned Idx = 0; for (auto &Arg : F.args()) { + + // TODO: handle not simple cases. + if (Arg.hasAttribute(Attribute::ByVal) || + Arg.hasAttribute(Attribute::InReg) || + Arg.hasAttribute(Attribute::StructRet) || + Arg.hasAttribute(Attribute::SwiftSelf) || + Arg.hasAttribute(Attribute::SwiftError) || + Arg.hasAttribute(Attribute::Nest)) + return false; + ArgInfo OrigArg(VRegs[Idx], Arg.getType()); - setArgFlags(OrigArg, Idx + 1, DL, F); - splitToValueTypes(OrigArg, SplitArgs, DL, MRI, - [&](ArrayRef<unsigned> Regs) { - MIRBuilder.buildMerge(VRegs[Idx], Regs); - }); + setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F); + if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI, + [&](ArrayRef<unsigned> Regs) { + MIRBuilder.buildMerge(VRegs[Idx], Regs); + })) + return false; Idx++; } diff --git a/lib/Target/X86/X86CallLowering.h b/lib/Target/X86/X86CallLowering.h index 8a8afb5682982..6a5dabf33a0a0 100644 --- a/lib/Target/X86/X86CallLowering.h +++ b/lib/Target/X86/X86CallLowering.h @@ -39,7 +39,7 @@ private: /// A function of this type is used to perform value split action. typedef std::function<void(ArrayRef<unsigned>)> SplitArgTy; - void splitToValueTypes(const ArgInfo &OrigArgInfo, + bool splitToValueTypes(const ArgInfo &OrigArgInfo, SmallVectorImpl<ArgInfo> &SplitArgs, const DataLayout &DL, MachineRegisterInfo &MRI, SplitArgTy SplitArg) const; diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 7d146d050a5c2..6decb550ad5f8 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -651,7 +651,15 @@ def CC_X86_64_GHC : CallingConv<[ // Pass in STG registers: F1, F2, F3, F4, D1, D2 CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCIfSubtarget<"hasSSE1()", - CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>> + CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>, + // AVX + CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], + CCIfSubtarget<"hasAVX()", + CCAssignToReg<[YMM1, YMM2, YMM3, YMM4, YMM5, YMM6]>>>, + // AVX-512 + CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64], + CCIfSubtarget<"hasAVX512()", + CCAssignToReg<[ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6]>>> ]>; def CC_X86_64_HiPE : CallingConv<[ diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 621505aaded9e..ee9e78146305d 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -3039,6 +3039,9 @@ bool X86FastISel::fastLowerArguments() { if (!Subtarget->is64Bit()) return false; + if (Subtarget->useSoftFloat()) + return false; + // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. unsigned GPRCnt = 0; unsigned FPRCnt = 0; diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index e3aa227702bea..f294e819090bc 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -972,7 +972,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, X86FI->setCalleeSavedFrameSize( X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); - bool UseRedZone = false; bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty(); // The default stack probe size is 4096 if the function has no stackprobesize @@ -1011,7 +1010,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0); StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); MFI.setStackSize(StackSize); - UseRedZone = true; } // Insert stack pointer adjustment for later moving of return addr. Only @@ -1189,7 +1187,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign); if (AlignedNumBytes >= StackProbeSize && UseStackProbe) { - assert(!UseRedZone && "The Red Zone is not accounted for in stack probes"); + assert(!X86FI->getUsesRedZone() && + "The Red Zone is not accounted for in stack probes"); // Check whether EAX is livein for this block. bool isEAXAlive = isEAXLiveIn(MBB); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b89914f8893e7..65486cf7f529e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4217,6 +4217,8 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::PSHUFLW: case X86ISD::SHUFP: case X86ISD::INSERTPS: + case X86ISD::EXTRQI: + case X86ISD::INSERTQI: case X86ISD::PALIGNR: case X86ISD::VSHLDQ: case X86ISD::VSRLDQ: @@ -5554,6 +5556,24 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); break; + case X86ISD::EXTRQI: + if (isa<ConstantSDNode>(N->getOperand(1)) && + isa<ConstantSDNode>(N->getOperand(2))) { + int BitLen = N->getConstantOperandVal(1); + int BitIdx = N->getConstantOperandVal(2); + DecodeEXTRQIMask(VT, BitLen, BitIdx, Mask); + IsUnary = true; + } + break; + case X86ISD::INSERTQI: + if (isa<ConstantSDNode>(N->getOperand(2)) && + isa<ConstantSDNode>(N->getOperand(3))) { + int BitLen = N->getConstantOperandVal(2); + int BitIdx = N->getConstantOperandVal(3); + DecodeINSERTQIMask(VT, BitLen, BitIdx, Mask); + IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); + } + break; case X86ISD::UNPCKH: DecodeUNPCKHMask(VT, Mask); IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); @@ -9317,11 +9337,11 @@ static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, return DAG.getBitcast(VT, V); } -/// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ. -static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, - SDValue V2, ArrayRef<int> Mask, - const APInt &Zeroable, - SelectionDAG &DAG) { +// EXTRQ: Extract Len elements from lower half of source, starting at Idx. +// Remainder of lower half result is zero and upper half is all undef. +static bool matchVectorShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2, + ArrayRef<int> Mask, uint64_t &BitLen, + uint64_t &BitIdx, const APInt &Zeroable) { int Size = Mask.size(); int HalfSize = Size / 2; assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); @@ -9329,120 +9349,133 @@ static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, // Upper half must be undefined. if (!isUndefInRange(Mask, HalfSize, HalfSize)) - return SDValue(); + return false; - // EXTRQ: Extract Len elements from lower half of source, starting at Idx. - // Remainder of lower half result is zero and upper half is all undef. - auto LowerAsEXTRQ = [&]() { - // Determine the extraction length from the part of the - // lower half that isn't zeroable. - int Len = HalfSize; - for (; Len > 0; --Len) - if (!Zeroable[Len - 1]) - break; - assert(Len > 0 && "Zeroable shuffle mask"); + // Determine the extraction length from the part of the + // lower half that isn't zeroable. + int Len = HalfSize; + for (; Len > 0; --Len) + if (!Zeroable[Len - 1]) + break; + assert(Len > 0 && "Zeroable shuffle mask"); - // Attempt to match first Len sequential elements from the lower half. - SDValue Src; - int Idx = -1; - for (int i = 0; i != Len; ++i) { - int M = Mask[i]; - if (M < 0) - continue; - SDValue &V = (M < Size ? V1 : V2); - M = M % Size; + // Attempt to match first Len sequential elements from the lower half. + SDValue Src; + int Idx = -1; + for (int i = 0; i != Len; ++i) { + int M = Mask[i]; + if (M == SM_SentinelUndef) + continue; + SDValue &V = (M < Size ? V1 : V2); + M = M % Size; - // The extracted elements must start at a valid index and all mask - // elements must be in the lower half. - if (i > M || M >= HalfSize) - return SDValue(); + // The extracted elements must start at a valid index and all mask + // elements must be in the lower half. + if (i > M || M >= HalfSize) + return false; - if (Idx < 0 || (Src == V && Idx == (M - i))) { - Src = V; - Idx = M - i; - continue; - } - return SDValue(); + if (Idx < 0 || (Src == V && Idx == (M - i))) { + Src = V; + Idx = M - i; + continue; } + return false; + } - if (Idx < 0) - return SDValue(); + if (!Src || Idx < 0) + return false; - assert((Idx + Len) <= HalfSize && "Illegal extraction mask"); - int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; - int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; - return DAG.getNode(X86ISD::EXTRQI, DL, VT, Src, - DAG.getConstant(BitLen, DL, MVT::i8), - DAG.getConstant(BitIdx, DL, MVT::i8)); - }; + assert((Idx + Len) <= HalfSize && "Illegal extraction mask"); + BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; + BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; + V1 = Src; + return true; +} + +// INSERTQ: Extract lowest Len elements from lower half of second source and +// insert over first source, starting at Idx. +// { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... } +static bool matchVectorShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2, + ArrayRef<int> Mask, uint64_t &BitLen, + uint64_t &BitIdx) { + int Size = Mask.size(); + int HalfSize = Size / 2; + assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); + + // Upper half must be undefined. + if (!isUndefInRange(Mask, HalfSize, HalfSize)) + return false; + + for (int Idx = 0; Idx != HalfSize; ++Idx) { + SDValue Base; + + // Attempt to match first source from mask before insertion point. + if (isUndefInRange(Mask, 0, Idx)) { + /* EMPTY */ + } else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) { + Base = V1; + } else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) { + Base = V2; + } else { + continue; + } - if (SDValue ExtrQ = LowerAsEXTRQ()) - return ExtrQ; + // Extend the extraction length looking to match both the insertion of + // the second source and the remaining elements of the first. + for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) { + SDValue Insert; + int Len = Hi - Idx; - // INSERTQ: Extract lowest Len elements from lower half of second source and - // insert over first source, starting at Idx. - // { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... } - auto LowerAsInsertQ = [&]() { - for (int Idx = 0; Idx != HalfSize; ++Idx) { - SDValue Base; + // Match insertion. + if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) { + Insert = V1; + } else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) { + Insert = V2; + } else { + continue; + } - // Attempt to match first source from mask before insertion point. - if (isUndefInRange(Mask, 0, Idx)) { + // Match the remaining elements of the lower half. + if (isUndefInRange(Mask, Hi, HalfSize - Hi)) { /* EMPTY */ - } else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) { + } else if ((!Base || (Base == V1)) && + isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) { Base = V1; - } else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) { + } else if ((!Base || (Base == V2)) && + isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, + Size + Hi)) { Base = V2; } else { continue; } - // Extend the extraction length looking to match both the insertion of - // the second source and the remaining elements of the first. - for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) { - SDValue Insert; - int Len = Hi - Idx; - - // Match insertion. - if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) { - Insert = V1; - } else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) { - Insert = V2; - } else { - continue; - } - - // Match the remaining elements of the lower half. - if (isUndefInRange(Mask, Hi, HalfSize - Hi)) { - /* EMPTY */ - } else if ((!Base || (Base == V1)) && - isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) { - Base = V1; - } else if ((!Base || (Base == V2)) && - isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, - Size + Hi)) { - Base = V2; - } else { - continue; - } - - // We may not have a base (first source) - this can safely be undefined. - if (!Base) - Base = DAG.getUNDEF(VT); - - int BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; - int BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; - return DAG.getNode(X86ISD::INSERTQI, DL, VT, Base, Insert, - DAG.getConstant(BitLen, DL, MVT::i8), - DAG.getConstant(BitIdx, DL, MVT::i8)); - } + BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; + BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; + V1 = Base; + V2 = Insert; + return true; } + } - return SDValue(); - }; + return false; +} + +/// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ. +static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef<int> Mask, + const APInt &Zeroable, + SelectionDAG &DAG) { + uint64_t BitLen, BitIdx; + if (matchVectorShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) + return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1, + DAG.getConstant(BitLen, DL, MVT::i8), + DAG.getConstant(BitIdx, DL, MVT::i8)); - if (SDValue InsertQ = LowerAsInsertQ()) - return InsertQ; + if (matchVectorShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx)) + return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT), + V2 ? V2 : DAG.getUNDEF(VT), + DAG.getConstant(BitLen, DL, MVT::i8), + DAG.getConstant(BitIdx, DL, MVT::i8)); return SDValue(); } @@ -22817,7 +22850,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { auto Builder = IRBuilder<>(AI); Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - auto SynchScope = AI->getSynchScope(); + auto SSID = AI->getSyncScopeID(); // We must restrict the ordering to avoid generating loads with Release or // ReleaseAcquire orderings. auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering()); @@ -22839,7 +22872,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { // otherwise, we might be able to be more aggressive on relaxed idempotent // rmw. In practice, they do not look useful, so we don't try to be // especially clever. - if (SynchScope == SingleThread) + if (SSID == SyncScope::SingleThread) // FIXME: we could just insert an X86ISD::MEMBARRIER here, except we are at // the IR level, so we must wrap it in an intrinsic. return nullptr; @@ -22858,7 +22891,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { // Finally we can emit the atomic load. LoadInst *Loaded = Builder.CreateAlignedLoad(Ptr, AI->getType()->getPrimitiveSizeInBits()); - Loaded->setAtomic(Order, SynchScope); + Loaded->setAtomic(Order, SSID); AI->replaceAllUsesWith(Loaded); AI->eraseFromParent(); return Loaded; @@ -22869,13 +22902,13 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget, SDLoc dl(Op); AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>( cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()); - SynchronizationScope FenceScope = static_cast<SynchronizationScope>( + SyncScope::ID FenceSSID = static_cast<SyncScope::ID>( cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue()); // The only fence that needs an instruction is a sequentially-consistent // cross-thread fence. if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && - FenceScope == CrossThread) { + FenceSSID == SyncScope::System) { if (Subtarget.hasMFence()) return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); @@ -23203,6 +23236,20 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget, SDLoc DL(Op.getNode()); SDValue Op0 = Op.getOperand(0); + // TRUNC(CTPOP(ZEXT(X))) to make use of vXi32/vXi64 VPOPCNT instructions. + if (Subtarget.hasVPOPCNTDQ()) { + if (VT == MVT::v8i16) { + Op = DAG.getNode(X86ISD::VZEXT, DL, MVT::v8i64, Op0); + Op = DAG.getNode(ISD::CTPOP, DL, MVT::v8i64, Op); + return DAG.getNode(X86ISD::VTRUNC, DL, VT, Op); + } + if (VT == MVT::v16i8 || VT == MVT::v16i16) { + Op = DAG.getNode(X86ISD::VZEXT, DL, MVT::v16i32, Op0); + Op = DAG.getNode(ISD::CTPOP, DL, MVT::v16i32, Op); + return DAG.getNode(X86ISD::VTRUNC, DL, VT, Op); + } + } + if (!Subtarget.hasSSSE3()) { // We can't use the fast LUT approach, so fall back on vectorized bitmath. assert(VT.is128BitVector() && "Only 128-bit vectors supported in SSE!"); @@ -27101,6 +27148,7 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, // permute instructions. // TODO: Investigate sharing more of this with shuffle lowering. static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, + const APInt &Zeroable, bool AllowFloatDomain, bool AllowIntDomain, const X86Subtarget &Subtarget, @@ -27111,38 +27159,67 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts; MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits); - bool ContainsZeros = false; - APInt Zeroable(NumMaskElts, false); - for (unsigned i = 0; i != NumMaskElts; ++i) { - int M = Mask[i]; - if (isUndefOrZero(M)) - Zeroable.setBit(i); - ContainsZeros |= (M == SM_SentinelZero); - } + bool ContainsZeros = + llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; }); - // Attempt to match against byte/bit shifts. - // FIXME: Add 512-bit support. - if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || - (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) { - int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle, - MaskScalarSizeInBits, Mask, - 0, Zeroable, Subtarget); - if (0 < ShiftAmt) { - PermuteImm = (unsigned)ShiftAmt; + // Handle VPERMI/VPERMILPD vXi64/vXi64 patterns. + if (!ContainsZeros && MaskScalarSizeInBits == 64) { + // Check for lane crossing permutes. + if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) { + // PERMPD/PERMQ permutes within a 256-bit vector (AVX2+). + if (Subtarget.hasAVX2() && MaskVT.is256BitVector()) { + Shuffle = X86ISD::VPERMI; + ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64); + PermuteImm = getV4X86ShuffleImm(Mask); + return true; + } + if (Subtarget.hasAVX512() && MaskVT.is512BitVector()) { + SmallVector<int, 4> RepeatedMask; + if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) { + Shuffle = X86ISD::VPERMI; + ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64); + PermuteImm = getV4X86ShuffleImm(RepeatedMask); + return true; + } + } + } else if (AllowFloatDomain && Subtarget.hasAVX()) { + // VPERMILPD can permute with a non-repeating shuffle. + Shuffle = X86ISD::VPERMILPI; + ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size()); + PermuteImm = 0; + for (int i = 0, e = Mask.size(); i != e; ++i) { + int M = Mask[i]; + if (M == SM_SentinelUndef) + continue; + assert(((M / 2) == (i / 2)) && "Out of range shuffle mask index"); + PermuteImm |= (M & 1) << i; + } return true; } } - // Ensure we don't contain any zero elements. - if (ContainsZeros) - return false; - - assert(llvm::all_of(Mask, [&](int M) { - return SM_SentinelUndef <= M && M < (int)NumMaskElts; - }) && "Expected unary shuffle"); + // Handle PSHUFD/VPERMILPI vXi32/vXf32 repeated patterns. + // AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we + // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here). + if ((MaskScalarSizeInBits == 64 || MaskScalarSizeInBits == 32) && + !ContainsZeros && (AllowIntDomain || Subtarget.hasAVX())) { + SmallVector<int, 4> RepeatedMask; + if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { + // Narrow the repeated mask to create 32-bit element permutes. + SmallVector<int, 4> WordMask = RepeatedMask; + if (MaskScalarSizeInBits == 64) + scaleShuffleMask(2, RepeatedMask, WordMask); + + Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI); + ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32); + ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32); + PermuteImm = getV4X86ShuffleImm(WordMask); + return true; + } + } - // Handle PSHUFLW/PSHUFHW repeated patterns. - if (MaskScalarSizeInBits == 16) { + // Handle PSHUFLW/PSHUFHW vXi16 repeated patterns. + if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16) { SmallVector<int, 4> RepeatedMask; if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { ArrayRef<int> LoMask(Mask.data() + 0, 4); @@ -27170,78 +27247,23 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, PermuteImm = getV4X86ShuffleImm(OffsetHiMask); return true; } - - return false; } - return false; - } - - // We only support permutation of 32/64 bit elements after this. - if (MaskScalarSizeInBits != 32 && MaskScalarSizeInBits != 64) - return false; - - // AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we - // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here). - if ((AllowFloatDomain && !AllowIntDomain) && !Subtarget.hasAVX()) - return false; - - // Pre-AVX2 we must use float shuffles on 256-bit vectors. - if (MaskVT.is256BitVector() && !Subtarget.hasAVX2()) { - AllowFloatDomain = true; - AllowIntDomain = false; } - // Check for lane crossing permutes. - if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) { - // PERMPD/PERMQ permutes within a 256-bit vector (AVX2+). - if (Subtarget.hasAVX2() && MaskVT.is256BitVector() && Mask.size() == 4) { - Shuffle = X86ISD::VPERMI; - ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64); - PermuteImm = getV4X86ShuffleImm(Mask); + // Attempt to match against byte/bit shifts. + // FIXME: Add 512-bit support. + if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || + (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) { + int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle, + MaskScalarSizeInBits, Mask, + 0, Zeroable, Subtarget); + if (0 < ShiftAmt) { + PermuteImm = (unsigned)ShiftAmt; return true; } - if (Subtarget.hasAVX512() && MaskVT.is512BitVector() && Mask.size() == 8) { - SmallVector<int, 4> RepeatedMask; - if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) { - Shuffle = X86ISD::VPERMI; - ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64); - PermuteImm = getV4X86ShuffleImm(RepeatedMask); - return true; - } - } - return false; } - // VPERMILPD can permute with a non-repeating shuffle. - if (AllowFloatDomain && MaskScalarSizeInBits == 64) { - Shuffle = X86ISD::VPERMILPI; - ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size()); - PermuteImm = 0; - for (int i = 0, e = Mask.size(); i != e; ++i) { - int M = Mask[i]; - if (M == SM_SentinelUndef) - continue; - assert(((M / 2) == (i / 2)) && "Out of range shuffle mask index"); - PermuteImm |= (M & 1) << i; - } - return true; - } - - // We need a repeating shuffle mask for VPERMILPS/PSHUFD. - SmallVector<int, 4> RepeatedMask; - if (!is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) - return false; - - // Narrow the repeated mask for 32-bit element permutes. - SmallVector<int, 4> WordMask = RepeatedMask; - if (MaskScalarSizeInBits == 64) - scaleShuffleMask(2, RepeatedMask, WordMask); - - Shuffle = (AllowFloatDomain ? X86ISD::VPERMILPI : X86ISD::PSHUFD); - ShuffleVT = (AllowFloatDomain ? MVT::f32 : MVT::i32); - ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32); - PermuteImm = getV4X86ShuffleImm(WordMask); - return true; + return false; } // Attempt to match a combined unary shuffle mask against supported binary @@ -27303,6 +27325,7 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, } static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, + const APInt &Zeroable, bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, SDValue &V2, SDLoc &DL, @@ -27388,11 +27411,6 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask, // Attempt to combine to INSERTPS. if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() && MaskVT.is128BitVector()) { - APInt Zeroable(4, 0); - for (unsigned i = 0; i != NumMaskElts; ++i) - if (Mask[i] < 0) - Zeroable.setBit(i); - if (Zeroable.getBoolValue() && matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { Shuffle = X86ISD::INSERTPS; @@ -27578,7 +27596,14 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, // Which shuffle domains are permitted? // Permit domain crossing at higher combine depths. bool AllowFloatDomain = FloatDomain || (Depth > 3); - bool AllowIntDomain = !FloatDomain || (Depth > 3); + bool AllowIntDomain = (!FloatDomain || (Depth > 3)) && + (!MaskVT.is256BitVector() || Subtarget.hasAVX2()); + + // Determine zeroable mask elements. + APInt Zeroable(NumMaskElts, 0); + for (unsigned i = 0; i != NumMaskElts; ++i) + if (isUndefOrZero(Mask[i])) + Zeroable.setBit(i); if (UnaryShuffle) { // If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load @@ -27612,7 +27637,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, return true; } - if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain, + if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, Subtarget, Shuffle, ShuffleVT, PermuteImm)) { if (Depth == 1 && Root.getOpcode() == Shuffle) @@ -27648,7 +27673,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, return true; } - if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain, + if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, V1, V2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm)) { @@ -27668,6 +27693,45 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, return true; } + // Typically from here on, we need an integer version of MaskVT. + MVT IntMaskVT = MVT::getIntegerVT(MaskEltSizeInBits); + IntMaskVT = MVT::getVectorVT(IntMaskVT, NumMaskElts); + + // Annoyingly, SSE4A instructions don't map into the above match helpers. + if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) { + uint64_t BitLen, BitIdx; + if (matchVectorShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx, + Zeroable)) { + if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI) + return false; // Nothing to do! + V1 = DAG.getBitcast(IntMaskVT, V1); + DCI.AddToWorklist(V1.getNode()); + Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1, + DAG.getConstant(BitLen, DL, MVT::i8), + DAG.getConstant(BitIdx, DL, MVT::i8)); + DCI.AddToWorklist(Res.getNode()); + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), + /*AddTo*/ true); + return true; + } + + if (matchVectorShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) { + if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI) + return false; // Nothing to do! + V1 = DAG.getBitcast(IntMaskVT, V1); + DCI.AddToWorklist(V1.getNode()); + V2 = DAG.getBitcast(IntMaskVT, V2); + DCI.AddToWorklist(V2.getNode()); + Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2, + DAG.getConstant(BitLen, DL, MVT::i8), + DAG.getConstant(BitIdx, DL, MVT::i8)); + DCI.AddToWorklist(Res.getNode()); + DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res), + /*AddTo*/ true); + return true; + } + } + // Don't try to re-form single instruction chains under any circumstances now // that we've done encoding canonicalization for them. if (Depth < 2) @@ -27688,9 +27752,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) || (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) || (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) { - MVT VPermMaskSVT = MVT::getIntegerVT(MaskEltSizeInBits); - MVT VPermMaskVT = MVT::getVectorVT(VPermMaskSVT, NumMaskElts); - SDValue VPermMask = getConstVector(Mask, VPermMaskVT, DAG, DL, true); + SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); DCI.AddToWorklist(VPermMask.getNode()); Res = DAG.getBitcast(MaskVT, V1); DCI.AddToWorklist(Res.getNode()); @@ -27719,9 +27781,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, if (Mask[i] == SM_SentinelZero) Mask[i] = NumMaskElts + i; - MVT VPermMaskSVT = MVT::getIntegerVT(MaskEltSizeInBits); - MVT VPermMaskVT = MVT::getVectorVT(VPermMaskSVT, NumMaskElts); - SDValue VPermMask = getConstVector(Mask, VPermMaskVT, DAG, DL, true); + SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); DCI.AddToWorklist(VPermMask.getNode()); Res = DAG.getBitcast(MaskVT, V1); DCI.AddToWorklist(Res.getNode()); @@ -27746,9 +27806,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) || (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) || (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) { - MVT VPermMaskSVT = MVT::getIntegerVT(MaskEltSizeInBits); - MVT VPermMaskVT = MVT::getVectorVT(VPermMaskSVT, NumMaskElts); - SDValue VPermMask = getConstVector(Mask, VPermMaskVT, DAG, DL, true); + SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); DCI.AddToWorklist(VPermMask.getNode()); V1 = DAG.getBitcast(MaskVT, V1); DCI.AddToWorklist(V1.getNode()); @@ -27807,8 +27865,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, M < 0 ? DAG.getUNDEF(MVT::i32) : DAG.getConstant(M % 4, DL, MVT::i32); VPermIdx.push_back(Idx); } - MVT VPermMaskVT = MVT::getVectorVT(MVT::i32, NumMaskElts); - SDValue VPermMask = DAG.getBuildVector(VPermMaskVT, DL, VPermIdx); + SDValue VPermMask = DAG.getBuildVector(IntMaskVT, DL, VPermIdx); DCI.AddToWorklist(VPermMask.getNode()); Res = DAG.getBitcast(MaskVT, V1); DCI.AddToWorklist(Res.getNode()); @@ -27831,8 +27888,6 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, unsigned NumLanes = MaskVT.getSizeInBits() / 128; unsigned NumEltsPerLane = NumMaskElts / NumLanes; SmallVector<int, 8> VPerm2Idx; - MVT MaskIdxSVT = MVT::getIntegerVT(MaskVT.getScalarSizeInBits()); - MVT MaskIdxVT = MVT::getVectorVT(MaskIdxSVT, NumMaskElts); unsigned M2ZImm = 0; for (int M : Mask) { if (M == SM_SentinelUndef) { @@ -27852,7 +27907,7 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, DCI.AddToWorklist(V1.getNode()); V2 = DAG.getBitcast(MaskVT, V2); DCI.AddToWorklist(V2.getNode()); - SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, MaskIdxVT, DAG, DL, true); + SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true); DCI.AddToWorklist(VPerm2MaskOp.getNode()); Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp, DAG.getConstant(M2ZImm, DL, MVT::i8)); @@ -29163,9 +29218,9 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast, // v8i16 and v16i16. // For these two cases, we can shuffle the upper element bytes to a // consecutive sequence at the start of the vector and treat the results as - // v16i8 or v32i8, and for v61i8 this is the prefferable solution. However, + // v16i8 or v32i8, and for v61i8 this is the preferable solution. However, // for v16i16 this is not the case, because the shuffle is expensive, so we - // avoid sign-exteding to this type entirely. + // avoid sign-extending to this type entirely. // For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as: // (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef) MVT SExtVT; @@ -29207,7 +29262,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast, SExtVT = MVT::v16i8; // For the case (i16 bitcast (v16i1 setcc v16i16 v1, v2)), // it is not profitable to sign-extend to 256-bit because this will - // require an extra cross-lane shuffle which is more exprensive than + // require an extra cross-lane shuffle which is more expensive than // truncating the result of the compare to 128-bits. break; case MVT::v32i1: @@ -29580,8 +29635,8 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG, // (extends the sign bit which is zero). // So it is correct to skip the sign/zero extend instruction. if (Root && (Root.getOpcode() == ISD::SIGN_EXTEND || - Root.getOpcode() == ISD::ZERO_EXTEND || - Root.getOpcode() == ISD::ANY_EXTEND)) + Root.getOpcode() == ISD::ZERO_EXTEND || + Root.getOpcode() == ISD::ANY_EXTEND)) Root = Root.getOperand(0); // If there was a match, we want Root to be a select that is the root of an @@ -34950,6 +35005,40 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0); + // If X is -1 or 0, then we have an opportunity to avoid constants required in + // the general case below. + auto *ConstantX = dyn_cast<ConstantSDNode>(X); + if (ConstantX) { + if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnesValue()) || + (IsSub && CC == X86::COND_B && ConstantX->isNullValue())) { + // This is a complicated way to get -1 or 0 from the carry flag: + // -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax + // 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax + return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getConstant(X86::COND_B, DL, MVT::i8), + Y.getOperand(1)); + } + + if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnesValue()) || + (IsSub && CC == X86::COND_A && ConstantX->isNullValue())) { + SDValue EFLAGS = Y->getOperand(1); + if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && + EFLAGS.getValueType().isInteger() && + !isa<ConstantSDNode>(EFLAGS.getOperand(1))) { + // Swap the operands of a SUB, and we have the same pattern as above. + // -1 + SETBE (SUB A, B) --> -1 + SETAE (SUB B, A) --> SUB + SBB + // 0 - SETA (SUB A, B) --> 0 - SETB (SUB B, A) --> SUB + SBB + SDValue NewSub = DAG.getNode( + X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), + EFLAGS.getOperand(1), EFLAGS.getOperand(0)); + SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); + return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, + DAG.getConstant(X86::COND_B, DL, MVT::i8), + NewEFLAGS); + } + } + } + if (CC == X86::COND_B) { // X + SETB Z --> X + (mask SBB Z, Z) // X - SETB Z --> X - (mask SBB Z, Z) @@ -34996,7 +35085,7 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { // If X is -1 or 0, then we have an opportunity to avoid constants required in // the general case below. - if (auto *ConstantX = dyn_cast<ConstantSDNode>(X)) { + if (ConstantX) { // 'neg' sets the carry flag when Z != 0, so create 0 or -1 using 'sbb' with // fake operands: // 0 - (Z != 0) --> sbb %eax, %eax, (neg Z) @@ -35549,6 +35638,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget); case X86ISD::SHUFP: // Handle all target specific shuffles case X86ISD::INSERTPS: + case X86ISD::EXTRQI: + case X86ISD::INSERTQI: case X86ISD::PALIGNR: case X86ISD::VSHLDQ: case X86ISD::VSRLDQ: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index e1ade92979dc0..dbbc2bbba6a4a 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -767,6 +767,19 @@ namespace llvm { SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + // Return true if it is profitable to combine a BUILD_VECTOR to a TRUNCATE + // for given operand and result types. + // Example of such a combine: + // v4i32 build_vector((extract_elt V, 0), + // (extract_elt V, 2), + // (extract_elt V, 4), + // (extract_elt V, 6)) + // --> + // v4i32 truncate (bitcast V to v4i64) + bool isDesirableToCombineBuildVectorToTruncate() const override { + return true; + } + /// Return true if the target has native support for /// the specified value type and it is 'desirable' to use the type for the /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index f3094b781c494..34d4816a25183 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -10488,7 +10488,7 @@ namespace { return Copy; } - // Create a virtal register in *TLSBaseAddrReg, and populate it by + // Create a virtual register in *TLSBaseAddrReg, and populate it by // inserting a copy instruction after I. Returns the new instruction. MachineInstr *SetRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) { MachineFunction *MF = I.getParent()->getParent(); diff --git a/lib/Target/X86/X86InstructionSelector.cpp b/lib/Target/X86/X86InstructionSelector.cpp index e34a90e975b84..859d3288db896 100644 --- a/lib/Target/X86/X86InstructionSelector.cpp +++ b/lib/Target/X86/X86InstructionSelector.cpp @@ -32,6 +32,8 @@ #define DEBUG_TYPE "X86-isel" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" + using namespace llvm; #ifndef LLVM_BUILD_GLOBAL_ISEL @@ -56,7 +58,7 @@ private: /// the patterns that don't require complex C++. bool selectImpl(MachineInstr &I) const; - // TODO: remove after suported by Tablegen-erated instruction selection. + // TODO: remove after supported by Tablegen-erated instruction selection. unsigned getLoadStoreOp(LLT &Ty, const RegisterBank &RB, unsigned Opc, uint64_t Alignment) const; @@ -64,6 +66,8 @@ private: MachineFunction &MF) const; bool selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; + bool selectGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectTrunc(MachineInstr &I, MachineRegisterInfo &MRI, @@ -75,6 +79,8 @@ private: bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const; + bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI, + MachineFunction &MF) const; bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI, @@ -262,6 +268,8 @@ bool X86InstructionSelector::select(MachineInstr &I) const { return true; if (selectFrameIndexOrGep(I, MRI, MF)) return true; + if (selectGlobalValue(I, MRI, MF)) + return true; if (selectConstant(I, MRI, MF)) return true; if (selectTrunc(I, MRI, MF)) @@ -272,6 +280,8 @@ bool X86InstructionSelector::select(MachineInstr &I) const { return true; if (selectUadde(I, MRI, MF)) return true; + if (selectUnmergeValues(I, MRI, MF)) + return true; if (selectMergeValues(I, MRI, MF)) return true; if (selectExtract(I, MRI, MF)) @@ -423,6 +433,15 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } +static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) { + if (Ty == LLT::pointer(0, 64)) + return X86::LEA64r; + else if (Ty == LLT::pointer(0, 32)) + return STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r; + else + llvm_unreachable("Can't get LEA opcode. Unsupported type."); +} + bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { @@ -435,14 +454,7 @@ bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I, LLT Ty = MRI.getType(DefReg); // Use LEA to calculate frame index and GEP - unsigned NewOpc; - if (Ty == LLT::pointer(0, 64)) - NewOpc = X86::LEA64r; - else if (Ty == LLT::pointer(0, 32)) - NewOpc = STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r; - else - llvm_unreachable("Can't select G_FRAME_INDEX/G_GEP, unsupported type."); - + unsigned NewOpc = getLeaOP(Ty, STI); I.setDesc(TII.get(NewOpc)); MachineInstrBuilder MIB(MF, I); @@ -458,6 +470,54 @@ bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I, return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } +bool X86InstructionSelector::selectGlobalValue(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + unsigned Opc = I.getOpcode(); + + if (Opc != TargetOpcode::G_GLOBAL_VALUE) + return false; + + auto GV = I.getOperand(1).getGlobal(); + if (GV->isThreadLocal()) { + return false; // TODO: we don't support TLS yet. + } + + // Can't handle alternate code models yet. + if (TM.getCodeModel() != CodeModel::Small) + return 0; + + X86AddressMode AM; + AM.GV = GV; + AM.GVOpFlags = STI.classifyGlobalReference(GV); + + // TODO: The ABI requires an extra load. not supported yet. + if (isGlobalStubReference(AM.GVOpFlags)) + return false; + + // TODO: This reference is relative to the pic base. not supported yet. + if (isGlobalRelativeToPICBase(AM.GVOpFlags)) + return false; + + if (STI.isPICStyleRIPRel()) { + // Use rip-relative addressing. + assert(AM.Base.Reg == 0 && AM.IndexReg == 0); + AM.Base.Reg = X86::RIP; + } + + const unsigned DefReg = I.getOperand(0).getReg(); + LLT Ty = MRI.getType(DefReg); + unsigned NewOpc = getLeaOP(Ty, STI); + + I.setDesc(TII.get(NewOpc)); + MachineInstrBuilder MIB(MF, I); + + I.RemoveOperand(1); + addFullAddress(MIB, AM); + + return constrainSelectedInstRegOperands(I, TII, TRI, RBI); +} + bool X86InstructionSelector::selectConstant(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { @@ -467,7 +527,8 @@ bool X86InstructionSelector::selectConstant(MachineInstr &I, const unsigned DefReg = I.getOperand(0).getReg(); LLT Ty = MRI.getType(DefReg); - assert(Ty.isScalar() && "invalid element type."); + if (RBI.getRegBank(DefReg, MRI, TRI)->getID() != X86::GPRRegBankID) + return false; uint64_t Val = 0; if (I.getOperand(1).isCImm()) { @@ -576,37 +637,40 @@ bool X86InstructionSelector::selectZext(MachineInstr &I, const LLT DstTy = MRI.getType(DstReg); const LLT SrcTy = MRI.getType(SrcReg); - if (SrcTy == LLT::scalar(1)) { - - unsigned AndOpc; - if (DstTy == LLT::scalar(32)) - AndOpc = X86::AND32ri8; - else if (DstTy == LLT::scalar(64)) - AndOpc = X86::AND64ri8; - else - return false; + if (SrcTy != LLT::scalar(1)) + return false; - unsigned DefReg = - MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI)); + unsigned AndOpc; + if (DstTy == LLT::scalar(8)) + AndOpc = X86::AND8ri; + else if (DstTy == LLT::scalar(16)) + AndOpc = X86::AND16ri8; + else if (DstTy == LLT::scalar(32)) + AndOpc = X86::AND32ri8; + else if (DstTy == LLT::scalar(64)) + AndOpc = X86::AND64ri8; + else + return false; + unsigned DefReg = SrcReg; + if (DstTy != LLT::scalar(8)) { + DefReg = MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI)); BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::SUBREG_TO_REG), DefReg) .addImm(0) .addReg(SrcReg) .addImm(X86::sub_8bit); + } - MachineInstr &AndInst = - *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AndOpc), DstReg) - .addReg(DefReg) - .addImm(1); - - constrainSelectedInstRegOperands(AndInst, TII, TRI, RBI); + MachineInstr &AndInst = + *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AndOpc), DstReg) + .addReg(DefReg) + .addImm(1); - I.eraseFromParent(); - return true; - } + constrainSelectedInstRegOperands(AndInst, TII, TRI, RBI); - return false; + I.eraseFromParent(); + return true; } bool X86InstructionSelector::selectCmp(MachineInstr &I, @@ -918,6 +982,33 @@ bool X86InstructionSelector::selectInsert(MachineInstr &I, return constrainSelectedInstRegOperands(I, TII, TRI, RBI); } +bool X86InstructionSelector::selectUnmergeValues(MachineInstr &I, + MachineRegisterInfo &MRI, + MachineFunction &MF) const { + if (I.getOpcode() != TargetOpcode::G_UNMERGE_VALUES) + return false; + + // Split to extracts. + unsigned NumDefs = I.getNumOperands() - 1; + unsigned SrcReg = I.getOperand(NumDefs).getReg(); + unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits(); + + for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { + + MachineInstr &ExtrInst = + *BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII.get(TargetOpcode::G_EXTRACT), I.getOperand(Idx).getReg()) + .addReg(SrcReg) + .addImm(Idx * DefSize); + + if (!select(ExtrInst)) + return false; + } + + I.eraseFromParent(); + return true; +} + bool X86InstructionSelector::selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { diff --git a/lib/Target/X86/X86LegalizerInfo.cpp b/lib/Target/X86/X86LegalizerInfo.cpp index a5fa3340c3f12..744ba21011af7 100644 --- a/lib/Target/X86/X86LegalizerInfo.cpp +++ b/lib/Target/X86/X86LegalizerInfo.cpp @@ -69,12 +69,14 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { for (auto Ty : {s8, s16, s32, p0}) setAction({MemOp, Ty}, Legal); + setAction({MemOp, s1}, WidenScalar); // And everything's fine in addrspace 0. setAction({MemOp, 1, p0}, Legal); } // Pointer-handling setAction({G_FRAME_INDEX, p0}, Legal); + setAction({G_GLOBAL_VALUE, p0}, Legal); setAction({G_GEP, p0}, Legal); setAction({G_GEP, 1, s32}, Legal); @@ -90,8 +92,10 @@ void X86LegalizerInfo::setLegalizerInfo32bit() { setAction({TargetOpcode::G_CONSTANT, s64}, NarrowScalar); // Extensions - setAction({G_ZEXT, s32}, Legal); - setAction({G_SEXT, s32}, Legal); + for (auto Ty : {s8, s16, s32}) { + setAction({G_ZEXT, Ty}, Legal); + setAction({G_SEXT, Ty}, Legal); + } for (auto Ty : {s1, s8, s16}) { setAction({G_ZEXT, 1, Ty}, Legal); @@ -125,12 +129,14 @@ void X86LegalizerInfo::setLegalizerInfo64bit() { for (auto Ty : {s8, s16, s32, s64, p0}) setAction({MemOp, Ty}, Legal); + setAction({MemOp, s1}, WidenScalar); // And everything's fine in addrspace 0. setAction({MemOp, 1, p0}, Legal); } // Pointer-handling setAction({G_FRAME_INDEX, p0}, Legal); + setAction({G_GLOBAL_VALUE, p0}, Legal); setAction({G_GEP, p0}, Legal); setAction({G_GEP, 1, s32}, Legal); @@ -146,7 +152,7 @@ void X86LegalizerInfo::setLegalizerInfo64bit() { setAction({TargetOpcode::G_CONSTANT, s1}, WidenScalar); // Extensions - for (auto Ty : {s32, s64}) { + for (auto Ty : {s8, s16, s32, s64}) { setAction({G_ZEXT, Ty}, Legal); setAction({G_SEXT, Ty}, Legal); } diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 33bc8e11a5729..fd2837b79103e 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -1042,7 +1042,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, X86MCInstLower &MCIL) { - assert(Subtarget->is64Bit() && "XRay custom events only suports X86-64"); + assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64"); // We want to emit the following pattern, which follows the x86 calling // convention to prepare for the trampoline call to be patched in. @@ -1332,6 +1332,32 @@ static std::string getShuffleComment(const MachineInstr *MI, return Comment; } +static void printConstant(const Constant *COp, raw_ostream &CS) { + if (isa<UndefValue>(COp)) { + CS << "u"; + } else if (auto *CI = dyn_cast<ConstantInt>(COp)) { + if (CI->getBitWidth() <= 64) { + CS << CI->getZExtValue(); + } else { + // print multi-word constant as (w0,w1) + const auto &Val = CI->getValue(); + CS << "("; + for (int i = 0, N = Val.getNumWords(); i < N; ++i) { + if (i > 0) + CS << ","; + CS << Val.getRawData()[i]; + } + CS << ")"; + } + } else if (auto *CF = dyn_cast<ConstantFP>(COp)) { + SmallString<32> Str; + CF->getValueAPF().toString(Str); + CS << Str; + } else { + CS << "?"; + } +} + void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(*MF, *this); const X86RegisterInfo *RI = MF->getSubtarget<X86Subtarget>().getRegisterInfo(); @@ -1766,59 +1792,73 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { // For loads from a constant pool to a vector register, print the constant // loaded. CASE_ALL_MOV_RM() + case X86::VBROADCASTF128: + case X86::VBROADCASTI128: + case X86::VBROADCASTF32X4Z256rm: + case X86::VBROADCASTF32X4rm: + case X86::VBROADCASTF32X8rm: + case X86::VBROADCASTF64X2Z128rm: + case X86::VBROADCASTF64X2rm: + case X86::VBROADCASTF64X4rm: + case X86::VBROADCASTI32X4Z256rm: + case X86::VBROADCASTI32X4rm: + case X86::VBROADCASTI32X8rm: + case X86::VBROADCASTI64X2Z128rm: + case X86::VBROADCASTI64X2rm: + case X86::VBROADCASTI64X4rm: if (!OutStreamer->isVerboseAsm()) break; if (MI->getNumOperands() <= 4) break; if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) { + int NumLanes = 1; + // Override NumLanes for the broadcast instructions. + switch (MI->getOpcode()) { + case X86::VBROADCASTF128: NumLanes = 2; break; + case X86::VBROADCASTI128: NumLanes = 2; break; + case X86::VBROADCASTF32X4Z256rm: NumLanes = 2; break; + case X86::VBROADCASTF32X4rm: NumLanes = 4; break; + case X86::VBROADCASTF32X8rm: NumLanes = 2; break; + case X86::VBROADCASTF64X2Z128rm: NumLanes = 2; break; + case X86::VBROADCASTF64X2rm: NumLanes = 4; break; + case X86::VBROADCASTF64X4rm: NumLanes = 2; break; + case X86::VBROADCASTI32X4Z256rm: NumLanes = 2; break; + case X86::VBROADCASTI32X4rm: NumLanes = 4; break; + case X86::VBROADCASTI32X8rm: NumLanes = 2; break; + case X86::VBROADCASTI64X2Z128rm: NumLanes = 2; break; + case X86::VBROADCASTI64X2rm: NumLanes = 4; break; + case X86::VBROADCASTI64X4rm: NumLanes = 2; break; + } + std::string Comment; raw_string_ostream CS(Comment); const MachineOperand &DstOp = MI->getOperand(0); CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) { CS << "["; - for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) { - if (i != 0) - CS << ","; - if (CDS->getElementType()->isIntegerTy()) - CS << CDS->getElementAsInteger(i); - else if (CDS->getElementType()->isFloatTy()) - CS << CDS->getElementAsFloat(i); - else if (CDS->getElementType()->isDoubleTy()) - CS << CDS->getElementAsDouble(i); - else - CS << "?"; + for (int l = 0; l != NumLanes; ++l) { + for (int i = 0, NumElements = CDS->getNumElements(); i < NumElements; ++i) { + if (i != 0 || l != 0) + CS << ","; + if (CDS->getElementType()->isIntegerTy()) + CS << CDS->getElementAsInteger(i); + else if (CDS->getElementType()->isFloatTy()) + CS << CDS->getElementAsFloat(i); + else if (CDS->getElementType()->isDoubleTy()) + CS << CDS->getElementAsDouble(i); + else + CS << "?"; + } } CS << "]"; OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo); } else if (auto *CV = dyn_cast<ConstantVector>(C)) { CS << "<"; - for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) { - if (i != 0) - CS << ","; - Constant *COp = CV->getOperand(i); - if (isa<UndefValue>(COp)) { - CS << "u"; - } else if (auto *CI = dyn_cast<ConstantInt>(COp)) { - if (CI->getBitWidth() <= 64) { - CS << CI->getZExtValue(); - } else { - // print multi-word constant as (w0,w1) - const auto &Val = CI->getValue(); - CS << "("; - for (int i = 0, N = Val.getNumWords(); i < N; ++i) { - if (i > 0) - CS << ","; - CS << Val.getRawData()[i]; - } - CS << ")"; - } - } else if (auto *CF = dyn_cast<ConstantFP>(COp)) { - SmallString<32> Str; - CF->getValueAPF().toString(Str); - CS << Str; - } else { - CS << "?"; + for (int l = 0; l != NumLanes; ++l) { + for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) { + if (i != 0 || l != 0) + CS << ","; + printConstant(CV->getOperand(i), CS); } } CS << ">"; @@ -1826,6 +1866,85 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { } } break; + case X86::VBROADCASTSSrm: + case X86::VBROADCASTSSYrm: + case X86::VBROADCASTSSZ128m: + case X86::VBROADCASTSSZ256m: + case X86::VBROADCASTSSZm: + case X86::VBROADCASTSDYrm: + case X86::VBROADCASTSDZ256m: + case X86::VBROADCASTSDZm: + case X86::VPBROADCASTBrm: + case X86::VPBROADCASTBYrm: + case X86::VPBROADCASTBZ128m: + case X86::VPBROADCASTBZ256m: + case X86::VPBROADCASTBZm: + case X86::VPBROADCASTDrm: + case X86::VPBROADCASTDYrm: + case X86::VPBROADCASTDZ128m: + case X86::VPBROADCASTDZ256m: + case X86::VPBROADCASTDZm: + case X86::VPBROADCASTQrm: + case X86::VPBROADCASTQYrm: + case X86::VPBROADCASTQZ128m: + case X86::VPBROADCASTQZ256m: + case X86::VPBROADCASTQZm: + case X86::VPBROADCASTWrm: + case X86::VPBROADCASTWYrm: + case X86::VPBROADCASTWZ128m: + case X86::VPBROADCASTWZ256m: + case X86::VPBROADCASTWZm: + if (!OutStreamer->isVerboseAsm()) + break; + if (MI->getNumOperands() <= 4) + break; + if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) { + int NumElts; + switch (MI->getOpcode()) { + default: llvm_unreachable("Invalid opcode"); + case X86::VBROADCASTSSrm: NumElts = 4; break; + case X86::VBROADCASTSSYrm: NumElts = 8; break; + case X86::VBROADCASTSSZ128m: NumElts = 4; break; + case X86::VBROADCASTSSZ256m: NumElts = 8; break; + case X86::VBROADCASTSSZm: NumElts = 16; break; + case X86::VBROADCASTSDYrm: NumElts = 4; break; + case X86::VBROADCASTSDZ256m: NumElts = 4; break; + case X86::VBROADCASTSDZm: NumElts = 8; break; + case X86::VPBROADCASTBrm: NumElts = 16; break; + case X86::VPBROADCASTBYrm: NumElts = 32; break; + case X86::VPBROADCASTBZ128m: NumElts = 16; break; + case X86::VPBROADCASTBZ256m: NumElts = 32; break; + case X86::VPBROADCASTBZm: NumElts = 64; break; + case X86::VPBROADCASTDrm: NumElts = 4; break; + case X86::VPBROADCASTDYrm: NumElts = 8; break; + case X86::VPBROADCASTDZ128m: NumElts = 4; break; + case X86::VPBROADCASTDZ256m: NumElts = 8; break; + case X86::VPBROADCASTDZm: NumElts = 16; break; + case X86::VPBROADCASTQrm: NumElts = 2; break; + case X86::VPBROADCASTQYrm: NumElts = 4; break; + case X86::VPBROADCASTQZ128m: NumElts = 2; break; + case X86::VPBROADCASTQZ256m: NumElts = 4; break; + case X86::VPBROADCASTQZm: NumElts = 8; break; + case X86::VPBROADCASTWrm: NumElts = 8; break; + case X86::VPBROADCASTWYrm: NumElts = 16; break; + case X86::VPBROADCASTWZ128m: NumElts = 8; break; + case X86::VPBROADCASTWZ256m: NumElts = 16; break; + case X86::VPBROADCASTWZm: NumElts = 32; break; + } + + std::string Comment; + raw_string_ostream CS(Comment); + const MachineOperand &DstOp = MI->getOperand(0); + CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = "; + CS << "["; + for (int i = 0; i != NumElts; ++i) { + if (i != 0) + CS << ","; + printConstant(C, CS); + } + CS << "]"; + OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo); + } } MCInst TmpInst; diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index b8ec5883152c3..6d85ca6cad647 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -24,8 +24,8 @@ def SandyBridgeModel : SchedMachineModel { // Based on the LSD (loop-stream detector) queue size. let LoopMicroOpBufferSize = 28; - // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow - // the scheduler to assign a default model to unrecognized opcodes. + // This flag is set to allow the scheduler to assign + // a default model to unrecognized opcodes. let CompleteModel = 0; } @@ -48,6 +48,7 @@ def SBPort23 : ProcResource<2>; def SBPort4 : ProcResource<1>; // Many micro-ops are capable of issuing on multiple ports. +def SBPort01 : ProcResGroup<[SBPort0, SBPort1]>; def SBPort05 : ProcResGroup<[SBPort0, SBPort5]>; def SBPort15 : ProcResGroup<[SBPort1, SBPort5]>; def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>; @@ -115,10 +116,10 @@ def : WriteRes<WriteIDivLd, [SBPort23, SBPort0, SBDivider]> { // Scalar and vector floating point. defm : SBWriteResPair<WriteFAdd, SBPort1, 3>; defm : SBWriteResPair<WriteFMul, SBPort0, 5>; -defm : SBWriteResPair<WriteFDiv, SBPort0, 12>; // 10-14 cycles. +defm : SBWriteResPair<WriteFDiv, SBPort0, 24>; defm : SBWriteResPair<WriteFRcp, SBPort0, 5>; defm : SBWriteResPair<WriteFRsqrt, SBPort0, 5>; -defm : SBWriteResPair<WriteFSqrt, SBPort0, 15>; +defm : SBWriteResPair<WriteFSqrt, SBPort0, 14>; defm : SBWriteResPair<WriteCvtF2I, SBPort1, 3>; defm : SBWriteResPair<WriteCvtI2F, SBPort1, 4>; defm : SBWriteResPair<WriteCvtF2F, SBPort1, 3>; @@ -134,11 +135,11 @@ def : WriteRes<WriteFVarBlendLd, [SBPort0, SBPort5, SBPort23]> { } // Vector integer operations. -defm : SBWriteResPair<WriteVecShift, SBPort05, 1>; -defm : SBWriteResPair<WriteVecLogic, SBPort015, 1>; -defm : SBWriteResPair<WriteVecALU, SBPort15, 1>; +defm : SBWriteResPair<WriteVecShift, SBPort5, 1>; +defm : SBWriteResPair<WriteVecLogic, SBPort5, 1>; +defm : SBWriteResPair<WriteVecALU, SBPort1, 3>; defm : SBWriteResPair<WriteVecIMul, SBPort0, 5>; -defm : SBWriteResPair<WriteShuffle, SBPort15, 1>; +defm : SBWriteResPair<WriteShuffle, SBPort5, 1>; defm : SBWriteResPair<WriteBlend, SBPort15, 1>; def : WriteRes<WriteVarBlend, [SBPort1, SBPort5]> { let Latency = 2; @@ -148,13 +149,15 @@ def : WriteRes<WriteVarBlendLd, [SBPort1, SBPort5, SBPort23]> { let Latency = 6; let ResourceCycles = [1, 1, 1]; } -def : WriteRes<WriteMPSAD, [SBPort0, SBPort1, SBPort5]> { - let Latency = 6; - let ResourceCycles = [1, 1, 1]; +def : WriteRes<WriteMPSAD, [SBPort0,SBPort15]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; } -def : WriteRes<WriteMPSADLd, [SBPort0, SBPort1, SBPort5, SBPort23]> { - let Latency = 6; - let ResourceCycles = [1, 1, 1, 1]; +def : WriteRes<WriteMPSADLd, [SBPort0,SBPort23,SBPort15]> { + let Latency = 11; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; } //////////////////////////////////////////////////////////////////////////////// @@ -204,13 +207,15 @@ def : WriteRes<WritePCmpEStrMLd, [SBPort015, SBPort23]> { } // Packed Compare Implicit Length Strings, Return Index -def : WriteRes<WritePCmpIStrI, [SBPort015]> { - let Latency = 3; +def : WriteRes<WritePCmpIStrI, [SBPort0]> { + let Latency = 11; + let NumMicroOps = 3; let ResourceCycles = [3]; } -def : WriteRes<WritePCmpIStrILd, [SBPort015, SBPort23]> { - let Latency = 3; - let ResourceCycles = [3, 1]; +def : WriteRes<WritePCmpIStrILd, [SBPort0,SBPort23]> { + let Latency = 17; + let NumMicroOps = 4; + let ResourceCycles = [3,1]; } // Packed Compare Explicit Length Strings, Return Index @@ -224,22 +229,26 @@ def : WriteRes<WritePCmpEStrILd, [SBPort015, SBPort23]> { } // AES Instructions. -def : WriteRes<WriteAESDecEnc, [SBPort015]> { - let Latency = 8; - let ResourceCycles = [2]; +def : WriteRes<WriteAESDecEnc, [SBPort5,SBPort015]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; } -def : WriteRes<WriteAESDecEncLd, [SBPort015, SBPort23]> { - let Latency = 8; - let ResourceCycles = [2, 1]; +def : WriteRes<WriteAESDecEncLd, [SBPort5,SBPort23,SBPort015]> { + let Latency = 13; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; } -def : WriteRes<WriteAESIMC, [SBPort015]> { - let Latency = 8; +def : WriteRes<WriteAESIMC, [SBPort5]> { + let Latency = 12; + let NumMicroOps = 2; let ResourceCycles = [2]; } -def : WriteRes<WriteAESIMCLd, [SBPort015, SBPort23]> { - let Latency = 8; - let ResourceCycles = [2, 1]; +def : WriteRes<WriteAESIMCLd, [SBPort5,SBPort23]> { + let Latency = 18; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; } def : WriteRes<WriteAESKeyGen, [SBPort015]> { @@ -272,4 +281,2407 @@ def : WriteRes<WriteNop, []>; defm : SBWriteResPair<WriteFShuffle256, SBPort0, 1>; defm : SBWriteResPair<WriteShuffle256, SBPort0, 1>; defm : SBWriteResPair<WriteVarVecShift, SBPort0, 1>; + +// Remaining SNB instrs. + +def SBWriteResGroup0 : SchedWriteRes<[SBPort0]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup0], (instregex "CVTSS2SDrr")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSLLDri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSLLQri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSLLWri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSRADri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSRAWri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSRLDri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSRLQri")>; +def: InstRW<[SBWriteResGroup0], (instregex "PSRLWri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VCVTSS2SDrr")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPMOVMSKBrr")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSLLDri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSLLQri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSLLWri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSRADri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSRAWri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSRLDri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSRLQri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VPSRLWri")>; +def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDYrr")>; +def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDrr")>; +def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSYrr")>; +def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSrr")>; + +def SBWriteResGroup1 : SchedWriteRes<[SBPort1]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup1], (instregex "COMP_FST0r")>; +def: InstRW<[SBWriteResGroup1], (instregex "COM_FST0r")>; +def: InstRW<[SBWriteResGroup1], (instregex "UCOM_FPr")>; +def: InstRW<[SBWriteResGroup1], (instregex "UCOM_Fr")>; + +def SBWriteResGroup2 : SchedWriteRes<[SBPort5]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup2], (instregex "ANDNPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "ANDNPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "ANDPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "ANDPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "FDECSTP")>; +def: InstRW<[SBWriteResGroup2], (instregex "FFREE")>; +def: InstRW<[SBWriteResGroup2], (instregex "FINCSTP")>; +def: InstRW<[SBWriteResGroup2], (instregex "FNOP")>; +def: InstRW<[SBWriteResGroup2], (instregex "INSERTPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "JMP64r")>; +def: InstRW<[SBWriteResGroup2], (instregex "LD_Frr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOV64toPQIrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVAPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVAPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVDDUPrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVDI2PDIrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVHLPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVLHPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVSDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVSHDUPrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVSLDUPrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVSSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVUPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "MOVUPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "ORPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "ORPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "RETQ")>; +def: InstRW<[SBWriteResGroup2], (instregex "SHUFPDrri")>; +def: InstRW<[SBWriteResGroup2], (instregex "SHUFPSrri")>; +def: InstRW<[SBWriteResGroup2], (instregex "ST_FPrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "ST_Frr")>; +def: InstRW<[SBWriteResGroup2], (instregex "UNPCKHPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "UNPCKHPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "UNPCKLPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "UNPCKLPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VANDNPDYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VANDNPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VANDNPSYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VANDNPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VANDPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VANDPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VANDPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VEXTRACTF128rr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VINSERTF128rr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VINSERTPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOV64toPQIrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOV64toPQIrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPDYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPSYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVAPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVDDUPYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVDDUPrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVHLPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVHLPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVSDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVSHDUPYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVSHDUPrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVSLDUPYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVSLDUPrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVSSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPDYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPSYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VMOVUPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VORPDYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VORPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VORPSYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VORPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDri")>; +def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDrm")>; +def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSri")>; +def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrm")>; +def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VPERMILPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPDYrri")>; +def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPDrri")>; +def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPSYrri")>; +def: InstRW<[SBWriteResGroup2], (instregex "VSHUFPSrri")>; +def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKHPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPDYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPSYrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VUNPCKLPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VXORPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "VXORPSrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "XORPDrr")>; +def: InstRW<[SBWriteResGroup2], (instregex "XORPSrr")>; + +def SBWriteResGroup3 : SchedWriteRes<[SBPort01]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup3], (instregex "LEA64_32r")>; + +def SBWriteResGroup4 : SchedWriteRes<[SBPort0]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup4], (instregex "BLENDPDrri")>; +def: InstRW<[SBWriteResGroup4], (instregex "BLENDPSrri")>; +def: InstRW<[SBWriteResGroup4], (instregex "BT32ri8")>; +def: InstRW<[SBWriteResGroup4], (instregex "BT32rr")>; +def: InstRW<[SBWriteResGroup4], (instregex "BTC32ri8")>; +def: InstRW<[SBWriteResGroup4], (instregex "BTC32rr")>; +def: InstRW<[SBWriteResGroup4], (instregex "BTR32ri8")>; +def: InstRW<[SBWriteResGroup4], (instregex "BTR32rr")>; +def: InstRW<[SBWriteResGroup4], (instregex "BTS32ri8")>; +def: InstRW<[SBWriteResGroup4], (instregex "BTS32rr")>; +def: InstRW<[SBWriteResGroup4], (instregex "CDQ")>; +def: InstRW<[SBWriteResGroup4], (instregex "CQO")>; +def: InstRW<[SBWriteResGroup4], (instregex "LAHF")>; +def: InstRW<[SBWriteResGroup4], (instregex "SAHF")>; +def: InstRW<[SBWriteResGroup4], (instregex "SAR32ri")>; +def: InstRW<[SBWriteResGroup4], (instregex "SAR8ri")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETAEr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETBr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETEr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETGEr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETGr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETLEr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETLr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETNEr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETNOr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETNPr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETNSr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETOr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETPr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SETSr")>; +def: InstRW<[SBWriteResGroup4], (instregex "SHL32ri")>; +def: InstRW<[SBWriteResGroup4], (instregex "SHL64r1")>; +def: InstRW<[SBWriteResGroup4], (instregex "SHL8r1")>; +def: InstRW<[SBWriteResGroup4], (instregex "SHL8ri")>; +def: InstRW<[SBWriteResGroup4], (instregex "SHR32ri")>; +def: InstRW<[SBWriteResGroup4], (instregex "SHR8ri")>; +def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPDYrri")>; +def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPDrri")>; +def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPSYrri")>; +def: InstRW<[SBWriteResGroup4], (instregex "VBLENDPSrri")>; +def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQAYrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQArr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQUYrr")>; +def: InstRW<[SBWriteResGroup4], (instregex "VMOVDQUrr")>; + +def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup5], (instregex "KORTESTBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSBrr64")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSDrr64")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSWrr64")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_PADDQirr")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_PALIGNR64irr")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSHUFBrr64")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNBrr64")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNDrr64")>; +def: InstRW<[SBWriteResGroup5], (instregex "MMX_PSIGNWrr64")>; +def: InstRW<[SBWriteResGroup5], (instregex "PABSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PABSDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PABSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PACKSSDWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PACKSSWBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PACKUSDWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PACKUSWBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PADDBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PADDDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PADDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PADDSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PADDSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PADDUSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PADDUSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PADDWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PALIGNRrri")>; +def: InstRW<[SBWriteResGroup5], (instregex "PAVGBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PAVGWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PBLENDWrri")>; +def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PCMPEQWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PCMPGTWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMAXSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMAXSDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMAXSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMAXUBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMAXUDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMAXUWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMINSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMINSDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMINSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMINUBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMINUDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMINUWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXBWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXWDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVSXWQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXBWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXWDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PMOVZXWQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSHUFBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSHUFDri")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSHUFHWri")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSHUFLWri")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSIGNBrr128")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSIGNDrr128")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSIGNWrr128")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSLLDQri")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSRLDQri")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSUBBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSUBDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSUBQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSUBSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSUBSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSUBUSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSUBUSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PSUBWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHBWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHQDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKHWDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLBWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLQDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "PUNPCKLWDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VMASKMOVPSYrm")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPABSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPABSDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPABSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPACKSSDWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPACKSSWBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPACKUSDWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPACKUSWBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPADDBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPADDDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPADDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPADDUSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPADDUSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPALIGNRrri")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPAVGBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPAVGWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPBLENDWrri")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPCMPEQWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPCMPGTWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMAXSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMAXUWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMINSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMINSDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMINSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMINUBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMINUDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMINUWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXBWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXWDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVSXWQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXBWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXWDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPMOVZXWQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFDri")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSHUFLWri")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNBrr128")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNDrr128")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSIGNWrr128")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSLLDQri")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSRLDQri")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSUBBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSUBDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSUBQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSUBSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSUBSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSUBUSBrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSUBUSWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPSUBWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHBWrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKHWDrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLQDQrr")>; +def: InstRW<[SBWriteResGroup5], (instregex "VPUNPCKLWDrr")>; + +def SBWriteResGroup6 : SchedWriteRes<[SBPort015]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup6], (instregex "ADD32ri8")>; +def: InstRW<[SBWriteResGroup6], (instregex "ADD32rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "ADD8ri")>; +def: InstRW<[SBWriteResGroup6], (instregex "ADD8rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "AND32ri")>; +def: InstRW<[SBWriteResGroup6], (instregex "AND64ri8")>; +def: InstRW<[SBWriteResGroup6], (instregex "AND64rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "AND8ri")>; +def: InstRW<[SBWriteResGroup6], (instregex "AND8rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "CBW")>; +def: InstRW<[SBWriteResGroup6], (instregex "CMC")>; +def: InstRW<[SBWriteResGroup6], (instregex "CMP16ri8")>; +def: InstRW<[SBWriteResGroup6], (instregex "CMP32i32")>; +def: InstRW<[SBWriteResGroup6], (instregex "CMP64rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "CMP8ri")>; +def: InstRW<[SBWriteResGroup6], (instregex "CMP8rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "CWDE")>; +def: InstRW<[SBWriteResGroup6], (instregex "DEC64r")>; +def: InstRW<[SBWriteResGroup6], (instregex "DEC8r")>; +def: InstRW<[SBWriteResGroup6], (instregex "INC64r")>; +def: InstRW<[SBWriteResGroup6], (instregex "INC8r")>; +def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVD64from64rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVQ2DQrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOV32rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOV8ri")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOV8rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOVDQArr")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOVDQUrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOVPQI2QIrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOVSX32rr16")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOVSX32rr8")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOVZX32rr16")>; +def: InstRW<[SBWriteResGroup6], (instregex "MOVZX32rr8")>; +def: InstRW<[SBWriteResGroup6], (instregex "NEG64r")>; +def: InstRW<[SBWriteResGroup6], (instregex "NEG8r")>; +def: InstRW<[SBWriteResGroup6], (instregex "NOT64r")>; +def: InstRW<[SBWriteResGroup6], (instregex "NOT8r")>; +def: InstRW<[SBWriteResGroup6], (instregex "OR64ri8")>; +def: InstRW<[SBWriteResGroup6], (instregex "OR64rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "OR8ri")>; +def: InstRW<[SBWriteResGroup6], (instregex "OR8rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "PANDNrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "PANDrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "PORrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "PXORrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "STC")>; +def: InstRW<[SBWriteResGroup6], (instregex "SUB64ri8")>; +def: InstRW<[SBWriteResGroup6], (instregex "SUB64rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "SUB8ri")>; +def: InstRW<[SBWriteResGroup6], (instregex "SUB8rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "TEST64rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "TEST8ri")>; +def: InstRW<[SBWriteResGroup6], (instregex "TEST8rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "VMOVPQI2QIrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "VMOVZPQILo2PQIrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "VPANDNrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "VPANDrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "VPORrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "VPXORrr")>; +def: InstRW<[SBWriteResGroup6], (instregex "XOR32rr")>; +def: InstRW<[SBWriteResGroup6], (instregex "XOR64ri8")>; +def: InstRW<[SBWriteResGroup6], (instregex "XOR8ri")>; +def: InstRW<[SBWriteResGroup6], (instregex "XOR8rr")>; + +def SBWriteResGroup7 : SchedWriteRes<[SBPort0]> { + let Latency = 2; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup7], (instregex "MOVMSKPDrr")>; +def: InstRW<[SBWriteResGroup7], (instregex "MOVMSKPSrr")>; +def: InstRW<[SBWriteResGroup7], (instregex "MOVPDI2DIrr")>; +def: InstRW<[SBWriteResGroup7], (instregex "MOVPQIto64rr")>; +def: InstRW<[SBWriteResGroup7], (instregex "PMOVMSKBrr")>; +def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPDYrr")>; +def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPDrr")>; +def: InstRW<[SBWriteResGroup7], (instregex "VMOVMSKPSrr")>; +def: InstRW<[SBWriteResGroup7], (instregex "VMOVPDI2DIrr")>; +def: InstRW<[SBWriteResGroup7], (instregex "VMOVPQIto64rr")>; + +def SBWriteResGroup9 : SchedWriteRes<[SBPort0]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPDrr0")>; +def: InstRW<[SBWriteResGroup9], (instregex "BLENDVPSrr0")>; +def: InstRW<[SBWriteResGroup9], (instregex "ROL32ri")>; +def: InstRW<[SBWriteResGroup9], (instregex "ROL8ri")>; +def: InstRW<[SBWriteResGroup9], (instregex "ROR32ri")>; +def: InstRW<[SBWriteResGroup9], (instregex "ROR8ri")>; +def: InstRW<[SBWriteResGroup9], (instregex "SETAr")>; +def: InstRW<[SBWriteResGroup9], (instregex "SETBEr")>; +def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDYrr")>; +def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPDrr")>; +def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPSYrr")>; +def: InstRW<[SBWriteResGroup9], (instregex "VBLENDVPSrr")>; + +def SBWriteResGroup10 : SchedWriteRes<[SBPort15]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SBWriteResGroup10], (instregex "VPBLENDVBrr")>; + +def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SBWriteResGroup11], (instregex "SCASB")>; +def: InstRW<[SBWriteResGroup11], (instregex "SCASL")>; +def: InstRW<[SBWriteResGroup11], (instregex "SCASQ")>; +def: InstRW<[SBWriteResGroup11], (instregex "SCASW")>; + +def SBWriteResGroup12 : SchedWriteRes<[SBPort0,SBPort1]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup12], (instregex "COMISDrr")>; +def: InstRW<[SBWriteResGroup12], (instregex "COMISSrr")>; +def: InstRW<[SBWriteResGroup12], (instregex "UCOMISDrr")>; +def: InstRW<[SBWriteResGroup12], (instregex "UCOMISSrr")>; +def: InstRW<[SBWriteResGroup12], (instregex "VCOMISDrr")>; +def: InstRW<[SBWriteResGroup12], (instregex "VCOMISSrr")>; +def: InstRW<[SBWriteResGroup12], (instregex "VUCOMISDrr")>; +def: InstRW<[SBWriteResGroup12], (instregex "VUCOMISSrr")>; + +def SBWriteResGroup13 : SchedWriteRes<[SBPort0,SBPort5]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup13], (instregex "CVTPS2PDrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "PTESTrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "VCVTPS2PDYrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "VCVTPS2PDrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "VPTESTYrr")>; +def: InstRW<[SBWriteResGroup13], (instregex "VPTESTrr")>; + +def SBWriteResGroup14 : SchedWriteRes<[SBPort0,SBPort15]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup14], (instregex "PSLLDrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "PSLLQrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "PSLLWrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "PSRADrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "PSRAWrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "PSRLDrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "PSRLQrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "PSRLWrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "VPSRADrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "VPSRAWrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "VPSRLDrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "VPSRLQrr")>; +def: InstRW<[SBWriteResGroup14], (instregex "VPSRLWrr")>; + +def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup15], (instregex "FNSTSW16r")>; + +def SBWriteResGroup16 : SchedWriteRes<[SBPort1,SBPort0]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup16], (instregex "BSWAP32r")>; + +def SBWriteResGroup17 : SchedWriteRes<[SBPort5,SBPort15]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup17], (instregex "PINSRBrr")>; +def: InstRW<[SBWriteResGroup17], (instregex "PINSRDrr")>; +def: InstRW<[SBWriteResGroup17], (instregex "PINSRQrr")>; +def: InstRW<[SBWriteResGroup17], (instregex "PINSRWrri")>; +def: InstRW<[SBWriteResGroup17], (instregex "VPINSRBrr")>; +def: InstRW<[SBWriteResGroup17], (instregex "VPINSRDrr")>; +def: InstRW<[SBWriteResGroup17], (instregex "VPINSRQrr")>; +def: InstRW<[SBWriteResGroup17], (instregex "VPINSRWrri")>; + +def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup18], (instregex "MMX_MOVDQ2Qrr")>; + +def SBWriteResGroup19 : SchedWriteRes<[SBPort0,SBPort015]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup19], (instregex "ADC64ri8")>; +def: InstRW<[SBWriteResGroup19], (instregex "ADC64rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "ADC8ri")>; +def: InstRW<[SBWriteResGroup19], (instregex "ADC8rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVAE32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVB32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVE32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVG32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVGE32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVL32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVLE32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVNE32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVNO32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVNP32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVNS32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVO32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVP32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "CMOVS32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "SBB32rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "SBB64ri8")>; +def: InstRW<[SBWriteResGroup19], (instregex "SBB8ri")>; +def: InstRW<[SBWriteResGroup19], (instregex "SBB8rr")>; +def: InstRW<[SBWriteResGroup19], (instregex "SHLD32rri8")>; +def: InstRW<[SBWriteResGroup19], (instregex "SHRD32rri8")>; + +def SBWriteResGroup20 : SchedWriteRes<[SBPort0]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMADDUBSWrr64")>; +def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMULHRSWrr64")>; +def: InstRW<[SBWriteResGroup20], (instregex "MMX_PMULUDQirr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PMADDUBSWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PMADDWDrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PMULDQrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PMULHRSWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PMULHUWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PMULHWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PMULLDrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PMULLWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PMULUDQrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "PSADBWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "VMOVMSKPSYrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "VPMADDUBSWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "VPMADDWDrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "VPMULDQrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "VPMULHRSWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "VPMULHWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "VPMULLDrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "VPMULLWrr")>; +def: InstRW<[SBWriteResGroup20], (instregex "VPSADBWrr")>; + +def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup21], (instregex "ADDPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ADDPSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ADDSDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ADDSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ADDSUBPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ADDSUBPSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0")>; +def: InstRW<[SBWriteResGroup21], (instregex "ADD_FST0r")>; +def: InstRW<[SBWriteResGroup21], (instregex "ADD_FrST0")>; +def: InstRW<[SBWriteResGroup21], (instregex "BSF32rr")>; +def: InstRW<[SBWriteResGroup21], (instregex "BSR32rr")>; +def: InstRW<[SBWriteResGroup21], (instregex "CMPPDrri")>; +def: InstRW<[SBWriteResGroup21], (instregex "CMPPSrri")>; +def: InstRW<[SBWriteResGroup21], (instregex "CMPSDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "CMPSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "CRC32r32r32")>; +def: InstRW<[SBWriteResGroup21], (instregex "CRC32r32r8")>; +def: InstRW<[SBWriteResGroup21], (instregex "CVTDQ2PSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "CVTPS2DQrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "CVTTPS2DQrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MAXPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MAXPSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MAXSDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MAXSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MINPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MINPSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MINSDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MINSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPI2PSirr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTPS2PIirr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MMX_CVTTPS2PIirr")>; +def: InstRW<[SBWriteResGroup21], (instregex "MUL8r")>; +def: InstRW<[SBWriteResGroup21], (instregex "POPCNT32rr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ROUNDPDr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ROUNDPSr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ROUNDSDr")>; +def: InstRW<[SBWriteResGroup21], (instregex "ROUNDSSr")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUBPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUBPSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FPrST0")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FST0r")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUBR_FrST0")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUBSDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUBSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUB_FPrST0")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUB_FST0r")>; +def: InstRW<[SBWriteResGroup21], (instregex "SUB_FrST0")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDPDYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDPSYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDPSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDSDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPDYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPSYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VADDSUBPSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VBROADCASTF128")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCMPPDYrri")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCMPPDrri")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCMPPSYrri")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCMPPSrri")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCMPSDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCMPSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCVTDQ2PSYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCVTDQ2PSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCVTPS2DQrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VCVTTPS2DQrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMAXPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMAXPSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMAXSDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMAXSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMINPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMINPSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMINSDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VMINSSrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPDr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VROUNDPSr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VROUNDSDr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VSUBPDYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VSUBPDrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VSUBPSYrr")>; +def: InstRW<[SBWriteResGroup21], (instregex "VSUBPSrr")>; + +def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> { + let Latency = 3; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup22], (instregex "EXTRACTPSrr")>; +def: InstRW<[SBWriteResGroup22], (instregex "VEXTRACTPSrr")>; + +def SBWriteResGroup23 : SchedWriteRes<[SBPort0,SBPort15]> { + let Latency = 3; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup23], (instregex "PEXTRBrr")>; +def: InstRW<[SBWriteResGroup23], (instregex "PEXTRDrr")>; +def: InstRW<[SBWriteResGroup23], (instregex "PEXTRQrr")>; +def: InstRW<[SBWriteResGroup23], (instregex "PEXTRWri")>; +def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRBrr")>; +def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRDrr")>; +def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRQrr")>; +def: InstRW<[SBWriteResGroup23], (instregex "VPEXTRWri")>; +def: InstRW<[SBWriteResGroup23], (instregex "SHL64rCL")>; +def: InstRW<[SBWriteResGroup23], (instregex "SHL8rCL")>; + +def SBWriteResGroup24 : SchedWriteRes<[SBPort15]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDSWrr64")>; +def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDWrr64")>; +def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHADDrr64")>; +def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBDrr64")>; +def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBSWrr64")>; +def: InstRW<[SBWriteResGroup24], (instregex "MMX_PHSUBWrr64")>; +def: InstRW<[SBWriteResGroup24], (instregex "PHADDDrr")>; +def: InstRW<[SBWriteResGroup24], (instregex "PHADDSWrr128")>; +def: InstRW<[SBWriteResGroup24], (instregex "PHADDWrr")>; +def: InstRW<[SBWriteResGroup24], (instregex "PHSUBDrr")>; +def: InstRW<[SBWriteResGroup24], (instregex "PHSUBSWrr128")>; +def: InstRW<[SBWriteResGroup24], (instregex "PHSUBWrr")>; +def: InstRW<[SBWriteResGroup24], (instregex "VPHADDDrr")>; +def: InstRW<[SBWriteResGroup24], (instregex "VPHADDSWrr128")>; +def: InstRW<[SBWriteResGroup24], (instregex "VPHADDWrr")>; +def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBDrr")>; +def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBSWrr128")>; +def: InstRW<[SBWriteResGroup24], (instregex "VPHSUBWrr")>; + +def SBWriteResGroup25 : SchedWriteRes<[SBPort015]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SBWriteResGroup25], (instregex "LEAVE64")>; +def: InstRW<[SBWriteResGroup25], (instregex "XADD32rr")>; +def: InstRW<[SBWriteResGroup25], (instregex "XADD8rr")>; + +def SBWriteResGroup26 : SchedWriteRes<[SBPort0,SBPort015]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup26], (instregex "CMOVA32rr")>; +def: InstRW<[SBWriteResGroup26], (instregex "CMOVBE32rr")>; + +def SBWriteResGroup27 : SchedWriteRes<[SBPort0,SBPort1]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup27], (instregex "MUL64r")>; + +def SBWriteResGroup28 : SchedWriteRes<[SBPort1,SBPort5]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup28], (instregex "CVTDQ2PDrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "CVTPD2DQrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "CVTPD2PSrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "CVTSD2SSrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "CVTSI2SD64rr")>; +def: InstRW<[SBWriteResGroup28], (instregex "CVTSI2SDrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "CVTTPD2DQrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTPD2PIirr")>; +def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTPI2PDirr")>; +def: InstRW<[SBWriteResGroup28], (instregex "MMX_CVTTPD2PIirr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTDQ2PDYrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTDQ2PDrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2DQYrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2DQrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2PSYrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTPD2PSrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTSI2SD64rr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTSI2SDrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTTPD2DQYrr")>; +def: InstRW<[SBWriteResGroup28], (instregex "VCVTTPD2DQrr")>; + +def SBWriteResGroup29 : SchedWriteRes<[SBPort1,SBPort015]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup29], (instregex "MOV64sr")>; +def: InstRW<[SBWriteResGroup29], (instregex "PAUSE")>; + +def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup30], (instregex "MULPDrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "MULPSrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "MULSDrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "MULSSrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "MUL_FPrST0")>; +def: InstRW<[SBWriteResGroup30], (instregex "MUL_FST0r")>; +def: InstRW<[SBWriteResGroup30], (instregex "MUL_FrST0")>; +def: InstRW<[SBWriteResGroup30], (instregex "PCMPGTQrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "PHMINPOSUWrr128")>; +def: InstRW<[SBWriteResGroup30], (instregex "RCPPSr")>; +def: InstRW<[SBWriteResGroup30], (instregex "RCPSSr")>; +def: InstRW<[SBWriteResGroup30], (instregex "RSQRTPSr")>; +def: InstRW<[SBWriteResGroup30], (instregex "RSQRTSSr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VMULPDYrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VMULPDrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VMULPSYrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VMULPSrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VMULSDrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VMULSSrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VPCMPGTQrr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VPHMINPOSUWrr128")>; +def: InstRW<[SBWriteResGroup30], (instregex "VRSQRTPSr")>; +def: InstRW<[SBWriteResGroup30], (instregex "VRSQRTSSr")>; + +def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup31], (instregex "MOV32rm")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOV8rm")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVSX32rm16")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVSX32rm8")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVZX32rm16")>; +def: InstRW<[SBWriteResGroup31], (instregex "MOVZX32rm8")>; +def: InstRW<[SBWriteResGroup31], (instregex "PREFETCH")>; + +def SBWriteResGroup32 : SchedWriteRes<[SBPort0,SBPort1]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup32], (instregex "CVTSD2SI64rr")>; +def: InstRW<[SBWriteResGroup32], (instregex "CVTSD2SIrr")>; +def: InstRW<[SBWriteResGroup32], (instregex "CVTSS2SI64rr")>; +def: InstRW<[SBWriteResGroup32], (instregex "CVTSS2SIrr")>; +def: InstRW<[SBWriteResGroup32], (instregex "CVTTSD2SI64rr")>; +def: InstRW<[SBWriteResGroup32], (instregex "CVTTSD2SIrr")>; +def: InstRW<[SBWriteResGroup32], (instregex "CVTTSS2SI64rr")>; +def: InstRW<[SBWriteResGroup32], (instregex "CVTTSS2SIrr")>; +def: InstRW<[SBWriteResGroup32], (instregex "VCVTSD2SI64rr")>; +def: InstRW<[SBWriteResGroup32], (instregex "VCVTSS2SI64rr")>; +def: InstRW<[SBWriteResGroup32], (instregex "VCVTSS2SIrr")>; +def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSD2SI64rr")>; +def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSD2SIrr")>; +def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSS2SI64rr")>; +def: InstRW<[SBWriteResGroup32], (instregex "VCVTTSS2SIrr")>; + +def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup33], (instregex "MOV64mr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOV8mr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVAPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVAPSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVDQAmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVDQUmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVHPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVHPSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVLPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVLPSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVNTDQmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVNTI_64mr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVNTImr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVNTPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVNTPSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVPDI2DImr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVPQI2QImr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVPQIto64mr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVSSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVUPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "MOVUPSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "PUSH64i8")>; +def: InstRW<[SBWriteResGroup33], (instregex "PUSH64r")>; +def: InstRW<[SBWriteResGroup33], (instregex "VEXTRACTF128mr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPDYmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPSYmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVAPSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQAYmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQAmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQUYmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVDQUmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVHPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVHPSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVLPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVLPSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTDQYmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTDQmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPDYmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPSYmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVNTPSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVPDI2DImr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVPQI2QImr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVPQIto64mr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVSDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVSSmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPDYmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPDmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPSYmr")>; +def: InstRW<[SBWriteResGroup33], (instregex "VMOVUPSmr")>; + +def SBWriteResGroup34 : SchedWriteRes<[SBPort0,SBPort15]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup34], (instregex "MPSADBWrri")>; +def: InstRW<[SBWriteResGroup34], (instregex "VMPSADBWrri")>; + +def SBWriteResGroup35 : SchedWriteRes<[SBPort1,SBPort5]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup35], (instregex "CLI")>; +def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SS64rr")>; +def: InstRW<[SBWriteResGroup35], (instregex "CVTSI2SSrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "HADDPDrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "HADDPSrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "HSUBPDrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "HSUBPSrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SS64rr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VCVTSI2SSrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VHADDPDrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VHADDPSYrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VHADDPSrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPDYrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPDrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPSYrr")>; +def: InstRW<[SBWriteResGroup35], (instregex "VHSUBPSrr")>; + +def SBWriteResGroup36 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup36], (instregex "CALL64r")>; +def: InstRW<[SBWriteResGroup36], (instregex "EXTRACTPSmr")>; +def: InstRW<[SBWriteResGroup36], (instregex "VEXTRACTPSmr")>; + +def SBWriteResGroup37 : SchedWriteRes<[SBPort4,SBPort01,SBPort23]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPDYrm")>; +def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPDmr")>; +def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPSmr")>; + +def SBWriteResGroup38 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup38], (instregex "SETAEm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETBm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETEm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETGEm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETGm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETLEm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETLm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETNEm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETNOm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETNPm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETNSm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETOm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETPm")>; +def: InstRW<[SBWriteResGroup38], (instregex "SETSm")>; + +def SBWriteResGroup39 : SchedWriteRes<[SBPort4,SBPort23,SBPort15]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup39], (instregex "PEXTRBmr")>; +def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRBmr")>; +def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRDmr")>; +def: InstRW<[SBWriteResGroup39], (instregex "VPEXTRWmr")>; + +def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup40], (instregex "MOV8mi")>; +def: InstRW<[SBWriteResGroup40], (instregex "STOSB")>; +def: InstRW<[SBWriteResGroup40], (instregex "STOSL")>; +def: InstRW<[SBWriteResGroup40], (instregex "STOSQ")>; +def: InstRW<[SBWriteResGroup40], (instregex "STOSW")>; + +def SBWriteResGroup41 : SchedWriteRes<[SBPort5,SBPort015]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SBWriteResGroup41], (instregex "FNINIT")>; + +def SBWriteResGroup42 : SchedWriteRes<[SBPort0,SBPort015]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG32rr")>; +def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG8rr")>; + +def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SBWriteResGroup43], (instregex "SETAm")>; +def: InstRW<[SBWriteResGroup43], (instregex "SETBEm")>; + +def SBWriteResGroup44 : SchedWriteRes<[SBPort0,SBPort4,SBPort5,SBPort23]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SBWriteResGroup44], (instregex "LDMXCSR")>; +def: InstRW<[SBWriteResGroup44], (instregex "STMXCSR")>; +def: InstRW<[SBWriteResGroup44], (instregex "VLDMXCSR")>; +def: InstRW<[SBWriteResGroup44], (instregex "VSTMXCSR")>; + +def SBWriteResGroup45 : SchedWriteRes<[SBPort0,SBPort4,SBPort23,SBPort15]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SBWriteResGroup45], (instregex "PEXTRDmr")>; +def: InstRW<[SBWriteResGroup45], (instregex "PEXTRQmr")>; +def: InstRW<[SBWriteResGroup45], (instregex "VPEXTRQmr")>; +def: InstRW<[SBWriteResGroup45], (instregex "PUSHF16")>; +def: InstRW<[SBWriteResGroup45], (instregex "PUSHF64")>; + +def SBWriteResGroup46 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { + let Latency = 5; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SBWriteResGroup46], (instregex "CLFLUSH")>; + +def SBWriteResGroup47 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { + let Latency = 5; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SBWriteResGroup47], (instregex "FXRSTOR")>; + +def SBWriteResGroup48 : SchedWriteRes<[SBPort23]> { + let Latency = 6; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup48], (instregex "LDDQUrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MMX_MOVD64from64rm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOV64toPQIrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVAPDrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVAPSrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVDDUPrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVDI2PDIrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVDQArm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVDQUrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVNTDQArm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVSHDUPrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVSLDUPrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVSSrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVUPDrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "MOVUPSrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "POP64r")>; +def: InstRW<[SBWriteResGroup48], (instregex "VBROADCASTSSrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VLDDQUYrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VLDDQUrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOV64toPQIrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVAPDrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVAPSrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVDDUPrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVDI2PDIrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVDQArm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVDQUrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVNTDQArm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVQI2PQIrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVSDrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVSHDUPrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVSLDUPrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVSSrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVUPDrm")>; +def: InstRW<[SBWriteResGroup48], (instregex "VMOVUPSrm")>; + +def SBWriteResGroup49 : SchedWriteRes<[SBPort5,SBPort23]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup49], (instregex "JMP64m")>; +def: InstRW<[SBWriteResGroup49], (instregex "MOV64sm")>; + +def SBWriteResGroup50 : SchedWriteRes<[SBPort23,SBPort0]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup50], (instregex "BT64mi8")>; + +def SBWriteResGroup51 : SchedWriteRes<[SBPort23,SBPort15]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSBrm64")>; +def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSDrm64")>; +def: InstRW<[SBWriteResGroup51], (instregex "MMX_PABSWrm64")>; +def: InstRW<[SBWriteResGroup51], (instregex "MMX_PALIGNR64irm")>; +def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSHUFBrm64")>; +def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNBrm64")>; +def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNDrm64")>; +def: InstRW<[SBWriteResGroup51], (instregex "MMX_PSIGNWrm64")>; + +def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> { + let Latency = 6; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup52], (instregex "ADD64rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "ADD8rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "AND64rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "AND8rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "CMP64mi8")>; +def: InstRW<[SBWriteResGroup52], (instregex "CMP64mr")>; +def: InstRW<[SBWriteResGroup52], (instregex "CMP64rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "CMP8mi")>; +def: InstRW<[SBWriteResGroup52], (instregex "CMP8mr")>; +def: InstRW<[SBWriteResGroup52], (instregex "CMP8rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "LODSL")>; +def: InstRW<[SBWriteResGroup52], (instregex "LODSQ")>; +def: InstRW<[SBWriteResGroup52], (instregex "OR64rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "OR8rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "SUB64rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "SUB8rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "XOR64rm")>; +def: InstRW<[SBWriteResGroup52], (instregex "XOR8rm")>; + +def SBWriteResGroup53 : SchedWriteRes<[SBPort4,SBPort23]> { + let Latency = 6; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup53], (instregex "POP64rmm")>; +def: InstRW<[SBWriteResGroup53], (instregex "PUSH64rmm")>; +def: InstRW<[SBWriteResGroup53], (instregex "ST_F32m")>; +def: InstRW<[SBWriteResGroup53], (instregex "ST_F64m")>; +def: InstRW<[SBWriteResGroup53], (instregex "ST_FP32m")>; +def: InstRW<[SBWriteResGroup53], (instregex "ST_FP64m")>; +def: InstRW<[SBWriteResGroup53], (instregex "ST_FP80m")>; + +def SBWriteResGroup54 : SchedWriteRes<[SBPort23]> { + let Latency = 7; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSDYrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VBROADCASTSSrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VMOVAPDYrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VMOVAPSYrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VMOVDDUPYrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VMOVDQAYrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VMOVDQUYrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VMOVSHDUPYrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VMOVSLDUPYrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VMOVUPDYrm")>; +def: InstRW<[SBWriteResGroup54], (instregex "VMOVUPSYrm")>; + +def SBWriteResGroup55 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup55], (instregex "CVTPS2PDrm")>; +def: InstRW<[SBWriteResGroup55], (instregex "CVTSS2SDrm")>; +def: InstRW<[SBWriteResGroup55], (instregex "VCVTPS2PDYrm")>; +def: InstRW<[SBWriteResGroup55], (instregex "VCVTPS2PDrm")>; +def: InstRW<[SBWriteResGroup55], (instregex "VCVTSS2SDrm")>; +def: InstRW<[SBWriteResGroup55], (instregex "VTESTPDrm")>; +def: InstRW<[SBWriteResGroup55], (instregex "VTESTPSrm")>; + +def SBWriteResGroup56 : SchedWriteRes<[SBPort5,SBPort23]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup56], (instregex "ANDNPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "ANDNPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "ANDPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "ANDPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "INSERTPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "MOVHPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "MOVHPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "MOVLPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "MOVLPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "ORPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "ORPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "SHUFPDrmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "SHUFPSrmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "UNPCKHPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "UNPCKHPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "UNPCKLPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "UNPCKLPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VANDNPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VANDNPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VANDPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VANDPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VBROADCASTF128")>; +def: InstRW<[SBWriteResGroup56], (instregex "VINSERTPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VMOVHPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VMOVHPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VMOVLPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VMOVLPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VORPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VORPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPDmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPDri")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPSmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "VPERMILPSri")>; +def: InstRW<[SBWriteResGroup56], (instregex "VSHUFPDrmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "VSHUFPSrmi")>; +def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKHPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKHPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKLPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VUNPCKLPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VXORPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "VXORPSrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "XORPDrm")>; +def: InstRW<[SBWriteResGroup56], (instregex "XORPSrm")>; + +def SBWriteResGroup57 : SchedWriteRes<[SBPort5,SBPort015]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup57], (instregex "AESDECLASTrr")>; +def: InstRW<[SBWriteResGroup57], (instregex "AESDECrr")>; +def: InstRW<[SBWriteResGroup57], (instregex "AESENCLASTrr")>; +def: InstRW<[SBWriteResGroup57], (instregex "AESENCrr")>; +def: InstRW<[SBWriteResGroup57], (instregex "KANDQrr")>; +def: InstRW<[SBWriteResGroup57], (instregex "VAESDECLASTrr")>; +def: InstRW<[SBWriteResGroup57], (instregex "VAESDECrr")>; +def: InstRW<[SBWriteResGroup57], (instregex "VAESENCrr")>; + +def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort0]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup58], (instregex "BLENDPDrmi")>; +def: InstRW<[SBWriteResGroup58], (instregex "BLENDPSrmi")>; +def: InstRW<[SBWriteResGroup58], (instregex "VBLENDPDrmi")>; +def: InstRW<[SBWriteResGroup58], (instregex "VBLENDPSrmi")>; +def: InstRW<[SBWriteResGroup58], (instregex "VINSERTF128rm")>; + +def SBWriteResGroup59 : SchedWriteRes<[SBPort23,SBPort15]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup59], (instregex "MMX_PADDQirm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PABSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PABSDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PABSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PACKSSDWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PACKSSWBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PACKUSDWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PACKUSWBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PADDBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PADDDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PADDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PADDSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PADDSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PADDUSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PADDUSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PADDWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PALIGNRrmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "PAVGBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PAVGWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PBLENDWrmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PCMPEQWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PCMPGTWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PINSRBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PINSRDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PINSRQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PINSRWrmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMAXSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMAXSDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMAXSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMAXUBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMAXUDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMAXUWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMINSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMINSDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMINSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMINUBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMINUDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMINUWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXWDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVSXWQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXWDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PMOVZXWQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSHUFBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSHUFDmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSHUFHWmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSHUFLWmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSIGNBrm128")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSIGNDrm128")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSIGNWrm128")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSUBBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSUBDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSUBQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSUBSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSUBSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSUBUSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSUBUSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PSUBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHQDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKHWDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLQDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "PUNPCKLWDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPABSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPABSDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPABSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPACKSSDWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPACKSSWBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPACKUSDWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPACKUSWBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPADDBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPADDDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPADDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPADDSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPADDSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPADDUSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPADDUSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPADDWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPALIGNRrmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPAVGBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPAVGWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPBLENDWrmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPCMPEQWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPCMPGTWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPINSRBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPINSRDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPINSRQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPINSRWrmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMAXSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMAXUWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMINSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMINSDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMINSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMINUBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMINUDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMINUWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXWDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVSXWQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXWDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPMOVZXWQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFDmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFHWmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSHUFLWmi")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNBrm128")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNDrm128")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSIGNWrm128")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSUBBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSUBDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSUBQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSUBSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSUBSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSUBUSBrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSUBUSWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPSUBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHQDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKHWDrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLBWrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLQDQrm")>; +def: InstRW<[SBWriteResGroup59], (instregex "VPUNPCKLWDrm")>; + +def SBWriteResGroup60 : SchedWriteRes<[SBPort23,SBPort015]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup60], (instregex "PANDNrm")>; +def: InstRW<[SBWriteResGroup60], (instregex "PANDrm")>; +def: InstRW<[SBWriteResGroup60], (instregex "PORrm")>; +def: InstRW<[SBWriteResGroup60], (instregex "PXORrm")>; +def: InstRW<[SBWriteResGroup60], (instregex "VPANDNrm")>; +def: InstRW<[SBWriteResGroup60], (instregex "VPANDrm")>; +def: InstRW<[SBWriteResGroup60], (instregex "VPORrm")>; +def: InstRW<[SBWriteResGroup60], (instregex "VPXORrm")>; + +def SBWriteResGroup61 : SchedWriteRes<[SBPort0,SBPort0]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup61], (instregex "VRCPPSr")>; +def: InstRW<[SBWriteResGroup61], (instregex "VRSQRTPSYr")>; + +def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup62], (instregex "VERRm")>; +def: InstRW<[SBWriteResGroup62], (instregex "VERWm")>; + +def SBWriteResGroup63 : SchedWriteRes<[SBPort23,SBPort015]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup63], (instregex "LODSB")>; +def: InstRW<[SBWriteResGroup63], (instregex "LODSW")>; + +def SBWriteResGroup64 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup64], (instregex "FARJMP64")>; + +def SBWriteResGroup65 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup65], (instregex "ADC64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "ADC8rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVAE64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVB64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVE64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVG64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVGE64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVL64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVLE64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVNE64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVNO64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVNP64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVNS64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVO64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVP64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "CMOVS64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "SBB64rm")>; +def: InstRW<[SBWriteResGroup65], (instregex "SBB8rm")>; + +def SBWriteResGroup66 : SchedWriteRes<[SBPort0,SBPort4,SBPort23]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SBWriteResGroup66], (instregex "FNSTSWm")>; + +def SBWriteResGroup67 : SchedWriteRes<[SBPort1,SBPort5,SBPort015]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup67], (instregex "SLDT32r")>; +def: InstRW<[SBWriteResGroup67], (instregex "STR32r")>; + +def SBWriteResGroup68 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SBWriteResGroup68], (instregex "CALL64m")>; +def: InstRW<[SBWriteResGroup68], (instregex "FNSTCW16m")>; + +def SBWriteResGroup69 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup69], (instregex "BTC64mi8")>; +def: InstRW<[SBWriteResGroup69], (instregex "BTR64mi8")>; +def: InstRW<[SBWriteResGroup69], (instregex "BTS64mi8")>; +def: InstRW<[SBWriteResGroup69], (instregex "SAR64mi")>; +def: InstRW<[SBWriteResGroup69], (instregex "SAR8mi")>; +def: InstRW<[SBWriteResGroup69], (instregex "SHL64m1")>; +def: InstRW<[SBWriteResGroup69], (instregex "SHL64mi")>; +def: InstRW<[SBWriteResGroup69], (instregex "SHL8m1")>; +def: InstRW<[SBWriteResGroup69], (instregex "SHL8mi")>; +def: InstRW<[SBWriteResGroup69], (instregex "SHR64mi")>; +def: InstRW<[SBWriteResGroup69], (instregex "SHR8mi")>; + +def SBWriteResGroup70 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup70], (instregex "ADD64mi8")>; +def: InstRW<[SBWriteResGroup70], (instregex "ADD64mr")>; +def: InstRW<[SBWriteResGroup70], (instregex "ADD8mi")>; +def: InstRW<[SBWriteResGroup70], (instregex "ADD8mr")>; +def: InstRW<[SBWriteResGroup70], (instregex "AND64mi8")>; +def: InstRW<[SBWriteResGroup70], (instregex "AND64mr")>; +def: InstRW<[SBWriteResGroup70], (instregex "AND8mi")>; +def: InstRW<[SBWriteResGroup70], (instregex "AND8mr")>; +def: InstRW<[SBWriteResGroup70], (instregex "DEC64m")>; +def: InstRW<[SBWriteResGroup70], (instregex "DEC8m")>; +def: InstRW<[SBWriteResGroup70], (instregex "INC64m")>; +def: InstRW<[SBWriteResGroup70], (instregex "INC8m")>; +def: InstRW<[SBWriteResGroup70], (instregex "NEG64m")>; +def: InstRW<[SBWriteResGroup70], (instregex "NEG8m")>; +def: InstRW<[SBWriteResGroup70], (instregex "NOT64m")>; +def: InstRW<[SBWriteResGroup70], (instregex "NOT8m")>; +def: InstRW<[SBWriteResGroup70], (instregex "OR64mi8")>; +def: InstRW<[SBWriteResGroup70], (instregex "OR64mr")>; +def: InstRW<[SBWriteResGroup70], (instregex "OR8mi")>; +def: InstRW<[SBWriteResGroup70], (instregex "OR8mr")>; +def: InstRW<[SBWriteResGroup70], (instregex "SUB64mi8")>; +def: InstRW<[SBWriteResGroup70], (instregex "SUB64mr")>; +def: InstRW<[SBWriteResGroup70], (instregex "SUB8mi")>; +def: InstRW<[SBWriteResGroup70], (instregex "SUB8mr")>; +def: InstRW<[SBWriteResGroup70], (instregex "TEST64rm")>; +def: InstRW<[SBWriteResGroup70], (instregex "TEST8mi")>; +def: InstRW<[SBWriteResGroup70], (instregex "TEST8rm")>; +def: InstRW<[SBWriteResGroup70], (instregex "XOR64mi8")>; +def: InstRW<[SBWriteResGroup70], (instregex "XOR64mr")>; +def: InstRW<[SBWriteResGroup70], (instregex "XOR8mi")>; +def: InstRW<[SBWriteResGroup70], (instregex "XOR8mr")>; + +def SBWriteResGroup71 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup71], (instregex "MMX_PMADDUBSWrm64")>; +def: InstRW<[SBWriteResGroup71], (instregex "MMX_PMULHRSWrm64")>; +def: InstRW<[SBWriteResGroup71], (instregex "VTESTPDYrm")>; +def: InstRW<[SBWriteResGroup71], (instregex "VTESTPSYrm")>; + +def SBWriteResGroup72 : SchedWriteRes<[SBPort1,SBPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup72], (instregex "BSF64rm")>; +def: InstRW<[SBWriteResGroup72], (instregex "BSR64rm")>; +def: InstRW<[SBWriteResGroup72], (instregex "CRC32r32m16")>; +def: InstRW<[SBWriteResGroup72], (instregex "CRC32r32m8")>; +def: InstRW<[SBWriteResGroup72], (instregex "FCOM32m")>; +def: InstRW<[SBWriteResGroup72], (instregex "FCOM64m")>; +def: InstRW<[SBWriteResGroup72], (instregex "FCOMP32m")>; +def: InstRW<[SBWriteResGroup72], (instregex "FCOMP64m")>; +def: InstRW<[SBWriteResGroup72], (instregex "MUL8m")>; + +def SBWriteResGroup73 : SchedWriteRes<[SBPort5,SBPort23]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup73], (instregex "VANDNPDYrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VANDNPSYrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VANDPDrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VANDPSrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VORPDYrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VORPSYrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VPERM2F128rm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPDYri")>; +def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPDmi")>; +def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPSYri")>; +def: InstRW<[SBWriteResGroup73], (instregex "VPERMILPSmi")>; +def: InstRW<[SBWriteResGroup73], (instregex "VSHUFPDYrmi")>; +def: InstRW<[SBWriteResGroup73], (instregex "VSHUFPSYrmi")>; +def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKHPDrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKHPSrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKLPDYrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VUNPCKLPSYrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VXORPDrm")>; +def: InstRW<[SBWriteResGroup73], (instregex "VXORPSrm")>; + +def SBWriteResGroup74 : SchedWriteRes<[SBPort23,SBPort0]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup74], (instregex "VBLENDPDYrmi")>; +def: InstRW<[SBWriteResGroup74], (instregex "VBLENDPSYrmi")>; + +def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort0]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup75], (instregex "BLENDVPDrm0")>; +def: InstRW<[SBWriteResGroup75], (instregex "BLENDVPSrm0")>; +def: InstRW<[SBWriteResGroup75], (instregex "VBLENDVPDrm")>; +def: InstRW<[SBWriteResGroup75], (instregex "VBLENDVPSrm")>; +def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPDrm")>; +def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPSrm")>; + +def SBWriteResGroup76 : SchedWriteRes<[SBPort23,SBPort15]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup76], (instregex "PBLENDVBrr0")>; +def: InstRW<[SBWriteResGroup76], (instregex "VPBLENDVBrm")>; + +def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup77], (instregex "COMISDrm")>; +def: InstRW<[SBWriteResGroup77], (instregex "COMISSrm")>; +def: InstRW<[SBWriteResGroup77], (instregex "UCOMISDrm")>; +def: InstRW<[SBWriteResGroup77], (instregex "UCOMISSrm")>; +def: InstRW<[SBWriteResGroup77], (instregex "VCOMISDrm")>; +def: InstRW<[SBWriteResGroup77], (instregex "VCOMISSrm")>; +def: InstRW<[SBWriteResGroup77], (instregex "VUCOMISDrm")>; +def: InstRW<[SBWriteResGroup77], (instregex "VUCOMISSrm")>; + +def SBWriteResGroup78 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup78], (instregex "PTESTrm")>; +def: InstRW<[SBWriteResGroup78], (instregex "VPTESTrm")>; + +def SBWriteResGroup79 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup79], (instregex "PSLLDrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "PSLLQrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "PSLLWrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "PSRADrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "PSRAWrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "PSRLDrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "PSRLQrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "PSRLWrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "VPSLLDri")>; +def: InstRW<[SBWriteResGroup79], (instregex "VPSLLQri")>; +def: InstRW<[SBWriteResGroup79], (instregex "VPSLLWri")>; +def: InstRW<[SBWriteResGroup79], (instregex "VPSRADrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "VPSRAWrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "VPSRLDrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "VPSRLQrm")>; +def: InstRW<[SBWriteResGroup79], (instregex "VPSRLWrm")>; + +def SBWriteResGroup80 : SchedWriteRes<[SBPort23,SBPort15]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDSWrm64")>; +def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDWrm64")>; +def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHADDrm64")>; +def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBDrm64")>; +def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBSWrm64")>; +def: InstRW<[SBWriteResGroup80], (instregex "MMX_PHSUBWrm64")>; + +def SBWriteResGroup81 : SchedWriteRes<[SBPort23,SBPort015]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG64rm")>; +def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG8rm")>; + +def SBWriteResGroup82 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> { + let Latency = 8; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup82], (instregex "CMOVA64rm")>; +def: InstRW<[SBWriteResGroup82], (instregex "CMOVBE64rm")>; + +def SBWriteResGroup83 : SchedWriteRes<[SBPort23,SBPort015]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [2,3]; +} +def: InstRW<[SBWriteResGroup83], (instregex "CMPSB")>; +def: InstRW<[SBWriteResGroup83], (instregex "CMPSL")>; +def: InstRW<[SBWriteResGroup83], (instregex "CMPSQ")>; +def: InstRW<[SBWriteResGroup83], (instregex "CMPSW")>; + +def SBWriteResGroup84 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,2,2]; +} +def: InstRW<[SBWriteResGroup84], (instregex "FLDCW16m")>; + +def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,2,2]; +} +def: InstRW<[SBWriteResGroup85], (instregex "ROL64mi")>; +def: InstRW<[SBWriteResGroup85], (instregex "ROL8mi")>; +def: InstRW<[SBWriteResGroup85], (instregex "ROR64mi")>; +def: InstRW<[SBWriteResGroup85], (instregex "ROR8mi")>; + +def SBWriteResGroup86 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,2,2]; +} +def: InstRW<[SBWriteResGroup86], (instregex "MOVSB")>; +def: InstRW<[SBWriteResGroup86], (instregex "MOVSL")>; +def: InstRW<[SBWriteResGroup86], (instregex "MOVSQ")>; +def: InstRW<[SBWriteResGroup86], (instregex "MOVSW")>; +def: InstRW<[SBWriteResGroup86], (instregex "XADD64rm")>; +def: InstRW<[SBWriteResGroup86], (instregex "XADD8rm")>; + +def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,1,1,2]; +} +def: InstRW<[SBWriteResGroup87], (instregex "FARCALL64")>; + +def SBWriteResGroup88 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> { + let Latency = 8; + let NumMicroOps = 5; + let ResourceCycles = [1,2,1,1]; +} +def: InstRW<[SBWriteResGroup88], (instregex "SHLD64mri8")>; +def: InstRW<[SBWriteResGroup88], (instregex "SHRD64mri8")>; + +def SBWriteResGroup89 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup89], (instregex "MMX_PMULUDQirm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PMADDUBSWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PMADDWDrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PMULDQrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PMULHRSWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PMULHUWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PMULHWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PMULLDrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PMULLWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PMULUDQrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "PSADBWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPMADDUBSWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPMADDWDrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPMULDQrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPMULHRSWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPMULHUWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPMULHWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPMULLDrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPMULLWrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPMULUDQrm")>; +def: InstRW<[SBWriteResGroup89], (instregex "VPSADBWrm")>; + +def SBWriteResGroup90 : SchedWriteRes<[SBPort1,SBPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup90], (instregex "ADDPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "ADDPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "ADDSDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "ADDSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "ADDSUBPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "ADDSUBPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CMPPDrmi")>; +def: InstRW<[SBWriteResGroup90], (instregex "CMPPSrmi")>; +def: InstRW<[SBWriteResGroup90], (instregex "CMPSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CVTDQ2PSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CVTPS2DQrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CVTSI2SD64rm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CVTSI2SDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "CVTTPS2DQrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MAXPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MAXPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MAXSDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MAXSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MINPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MINPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MINSDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MINSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPI2PSirm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPS2PIirm")>; +def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTTPS2PIirm")>; +def: InstRW<[SBWriteResGroup90], (instregex "POPCNT64rm")>; +def: InstRW<[SBWriteResGroup90], (instregex "ROUNDPDm")>; +def: InstRW<[SBWriteResGroup90], (instregex "ROUNDPSm")>; +def: InstRW<[SBWriteResGroup90], (instregex "ROUNDSDm")>; +def: InstRW<[SBWriteResGroup90], (instregex "ROUNDSSm")>; +def: InstRW<[SBWriteResGroup90], (instregex "SUBPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "SUBPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "SUBSDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "SUBSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VADDPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VADDPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VADDSDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VADDSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VADDSUBPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VADDSUBPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VCMPPDrmi")>; +def: InstRW<[SBWriteResGroup90], (instregex "VCMPPSrmi")>; +def: InstRW<[SBWriteResGroup90], (instregex "VCMPSDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VCMPSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VCVTDQ2PSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VCVTPS2DQrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VCVTSI2SD64rm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VCVTSI2SDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VCVTTPS2DQrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMAXPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMAXPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMAXSDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMAXSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMINPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMINPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMINSDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VMINSSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VROUNDPDm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VROUNDPSm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VROUNDSDm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VROUNDSSm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VSUBPDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VSUBPSrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VSUBSDrm")>; +def: InstRW<[SBWriteResGroup90], (instregex "VSUBSSrm")>; + +def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort0]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,2]; +} +def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPDYrm")>; +def: InstRW<[SBWriteResGroup91], (instregex "VBLENDVPSYrm")>; +def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPDrm")>; +def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPSrm")>; + +def SBWriteResGroup92 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup92], (instregex "DPPDrri")>; +def: InstRW<[SBWriteResGroup92], (instregex "VDPPDrri")>; + +def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup93], (instregex "CVTSD2SI64rm")>; +def: InstRW<[SBWriteResGroup93], (instregex "CVTSD2SIrm")>; +def: InstRW<[SBWriteResGroup93], (instregex "CVTSS2SI64rm")>; +def: InstRW<[SBWriteResGroup93], (instregex "CVTSS2SIrm")>; +def: InstRW<[SBWriteResGroup93], (instregex "CVTTSD2SI64rm")>; +def: InstRW<[SBWriteResGroup93], (instregex "CVTTSD2SIrm")>; +def: InstRW<[SBWriteResGroup93], (instregex "CVTTSS2SI64rm")>; +def: InstRW<[SBWriteResGroup93], (instregex "CVTTSS2SIrm")>; +def: InstRW<[SBWriteResGroup93], (instregex "MUL64m")>; + +def SBWriteResGroup94 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup94], (instregex "VPTESTYrm")>; + +def SBWriteResGroup95 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup95], (instregex "LD_F32m")>; +def: InstRW<[SBWriteResGroup95], (instregex "LD_F64m")>; +def: InstRW<[SBWriteResGroup95], (instregex "LD_F80m")>; + +def SBWriteResGroup96 : SchedWriteRes<[SBPort23,SBPort15]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1,3]; +} +def: InstRW<[SBWriteResGroup96], (instregex "PHADDDrm")>; +def: InstRW<[SBWriteResGroup96], (instregex "PHADDSWrm128")>; +def: InstRW<[SBWriteResGroup96], (instregex "PHADDWrm")>; +def: InstRW<[SBWriteResGroup96], (instregex "PHSUBDrm")>; +def: InstRW<[SBWriteResGroup96], (instregex "PHSUBSWrm128")>; +def: InstRW<[SBWriteResGroup96], (instregex "PHSUBWrm")>; +def: InstRW<[SBWriteResGroup96], (instregex "VPHADDDrm")>; +def: InstRW<[SBWriteResGroup96], (instregex "VPHADDSWrm128")>; +def: InstRW<[SBWriteResGroup96], (instregex "VPHADDWrm")>; +def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBDrm")>; +def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBSWrm128")>; +def: InstRW<[SBWriteResGroup96], (instregex "VPHSUBWrm")>; + +def SBWriteResGroup97 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SBWriteResGroup97], (instregex "IST_F16m")>; +def: InstRW<[SBWriteResGroup97], (instregex "IST_F32m")>; +def: InstRW<[SBWriteResGroup97], (instregex "IST_FP16m")>; +def: InstRW<[SBWriteResGroup97], (instregex "IST_FP32m")>; +def: InstRW<[SBWriteResGroup97], (instregex "IST_FP64m")>; +def: InstRW<[SBWriteResGroup97], (instregex "SHL64mCL")>; +def: InstRW<[SBWriteResGroup97], (instregex "SHL8mCL")>; + +def SBWriteResGroup98 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { + let Latency = 9; + let NumMicroOps = 6; + let ResourceCycles = [1,2,3]; +} +def: InstRW<[SBWriteResGroup98], (instregex "ADC64mi8")>; +def: InstRW<[SBWriteResGroup98], (instregex "ADC8mi")>; +def: InstRW<[SBWriteResGroup98], (instregex "SBB64mi8")>; +def: InstRW<[SBWriteResGroup98], (instregex "SBB8mi")>; + +def SBWriteResGroup99 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> { + let Latency = 9; + let NumMicroOps = 6; + let ResourceCycles = [1,2,2,1]; +} +def: InstRW<[SBWriteResGroup99], (instregex "ADC64mr")>; +def: InstRW<[SBWriteResGroup99], (instregex "ADC8mr")>; +def: InstRW<[SBWriteResGroup99], (instregex "SBB64mr")>; +def: InstRW<[SBWriteResGroup99], (instregex "SBB8mr")>; + +def SBWriteResGroup100 : SchedWriteRes<[SBPort4,SBPort5,SBPort23,SBPort0,SBPort015]> { + let Latency = 9; + let NumMicroOps = 6; + let ResourceCycles = [1,1,2,1,1]; +} +def: InstRW<[SBWriteResGroup100], (instregex "BT64mr")>; +def: InstRW<[SBWriteResGroup100], (instregex "BTC64mr")>; +def: InstRW<[SBWriteResGroup100], (instregex "BTR64mr")>; +def: InstRW<[SBWriteResGroup100], (instregex "BTS64mr")>; + +def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup101], (instregex "ADD_F32m")>; +def: InstRW<[SBWriteResGroup101], (instregex "ADD_F64m")>; +def: InstRW<[SBWriteResGroup101], (instregex "ILD_F16m")>; +def: InstRW<[SBWriteResGroup101], (instregex "ILD_F32m")>; +def: InstRW<[SBWriteResGroup101], (instregex "ILD_F64m")>; +def: InstRW<[SBWriteResGroup101], (instregex "SUBR_F32m")>; +def: InstRW<[SBWriteResGroup101], (instregex "SUBR_F64m")>; +def: InstRW<[SBWriteResGroup101], (instregex "SUB_F32m")>; +def: InstRW<[SBWriteResGroup101], (instregex "SUB_F64m")>; +def: InstRW<[SBWriteResGroup101], (instregex "VADDPDYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VADDPSYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VADDSUBPDYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VADDSUBPSYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VCMPPDYrmi")>; +def: InstRW<[SBWriteResGroup101], (instregex "VCMPPSYrmi")>; +def: InstRW<[SBWriteResGroup101], (instregex "VCVTDQ2PSYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VCVTPS2DQYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VCVTTPS2DQrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VMAXPDYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VMAXPSYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VMINPDrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VMINPSrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VROUNDPDm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VROUNDPSm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VSUBPDYrm")>; +def: InstRW<[SBWriteResGroup101], (instregex "VSUBPSYrm")>; + +def SBWriteResGroup102 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup102], (instregex "VCVTSD2SI64rm")>; +def: InstRW<[SBWriteResGroup102], (instregex "VCVTSD2SI64rr")>; +def: InstRW<[SBWriteResGroup102], (instregex "VCVTSS2SI64rm")>; +def: InstRW<[SBWriteResGroup102], (instregex "VCVTSS2SIrm")>; +def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSD2SI64rm")>; +def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSD2SI64rr")>; +def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSS2SI64rm")>; +def: InstRW<[SBWriteResGroup102], (instregex "VCVTTSS2SIrm")>; + +def SBWriteResGroup103 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup103], (instregex "CVTDQ2PDrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "CVTPD2DQrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "CVTPD2PSrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "CVTSD2SSrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "CVTSI2SS64rm")>; +def: InstRW<[SBWriteResGroup103], (instregex "CVTSI2SSrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "CVTTPD2DQrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTPD2PIirm")>; +def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTPI2PDirm")>; +def: InstRW<[SBWriteResGroup103], (instregex "MMX_CVTTPD2PIirm")>; +def: InstRW<[SBWriteResGroup103], (instregex "VCVTDQ2PDYrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "VCVTDQ2PDrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "VCVTPD2DQrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "VCVTPD2PSrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "VCVTSD2SSrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "VCVTSI2SS64rm")>; +def: InstRW<[SBWriteResGroup103], (instregex "VCVTSI2SSrm")>; +def: InstRW<[SBWriteResGroup103], (instregex "VCVTTPD2DQrm")>; + +def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 11; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup104], (instregex "MULPDrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "MULPSrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "MULSDrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "MULSSrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "PCMPGTQrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "PHMINPOSUWrm128")>; +def: InstRW<[SBWriteResGroup104], (instregex "RCPPSm")>; +def: InstRW<[SBWriteResGroup104], (instregex "RCPSSm")>; +def: InstRW<[SBWriteResGroup104], (instregex "RSQRTPSm")>; +def: InstRW<[SBWriteResGroup104], (instregex "RSQRTSSm")>; +def: InstRW<[SBWriteResGroup104], (instregex "VMULPDrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "VMULPSrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "VMULSDrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "VMULSSrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "VPCMPGTQrm")>; +def: InstRW<[SBWriteResGroup104], (instregex "VPHMINPOSUWrm128")>; +def: InstRW<[SBWriteResGroup104], (instregex "VRCPPSm")>; +def: InstRW<[SBWriteResGroup104], (instregex "VRCPSSm")>; +def: InstRW<[SBWriteResGroup104], (instregex "VRSQRTPSm")>; +def: InstRW<[SBWriteResGroup104], (instregex "VRSQRTSSm")>; + +def SBWriteResGroup105 : SchedWriteRes<[SBPort0]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SBWriteResGroup105], (instregex "PCMPISTRIrr")>; +def: InstRW<[SBWriteResGroup105], (instregex "PCMPISTRM128rr")>; +def: InstRW<[SBWriteResGroup105], (instregex "VPCMPISTRIrr")>; +def: InstRW<[SBWriteResGroup105], (instregex "VPCMPISTRM128rr")>; + +def SBWriteResGroup106 : SchedWriteRes<[SBPort1,SBPort23]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup106], (instregex "FICOM16m")>; +def: InstRW<[SBWriteResGroup106], (instregex "FICOM32m")>; +def: InstRW<[SBWriteResGroup106], (instregex "FICOMP16m")>; +def: InstRW<[SBWriteResGroup106], (instregex "FICOMP32m")>; + +def SBWriteResGroup107 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { + let Latency = 11; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup107], (instregex "VCVTPD2DQYrm")>; +def: InstRW<[SBWriteResGroup107], (instregex "VCVTPD2PSYrm")>; +def: InstRW<[SBWriteResGroup107], (instregex "VCVTTPD2DQYrm")>; + +def SBWriteResGroup108 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> { + let Latency = 11; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SBWriteResGroup108], (instregex "MPSADBWrmi")>; +def: InstRW<[SBWriteResGroup108], (instregex "VMPSADBWrmi")>; + +def SBWriteResGroup109 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { + let Latency = 11; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup109], (instregex "HADDPDrm")>; +def: InstRW<[SBWriteResGroup109], (instregex "HADDPSrm")>; +def: InstRW<[SBWriteResGroup109], (instregex "HSUBPDrm")>; +def: InstRW<[SBWriteResGroup109], (instregex "HSUBPSrm")>; +def: InstRW<[SBWriteResGroup109], (instregex "VHADDPDrm")>; +def: InstRW<[SBWriteResGroup109], (instregex "VHADDPSrm")>; +def: InstRW<[SBWriteResGroup109], (instregex "VHSUBPDrm")>; +def: InstRW<[SBWriteResGroup109], (instregex "VHSUBPSrm")>; + +def SBWriteResGroup110 : SchedWriteRes<[SBPort5]> { + let Latency = 12; + let NumMicroOps = 2; + let ResourceCycles = [2]; +} +def: InstRW<[SBWriteResGroup110], (instregex "AESIMCrr")>; +def: InstRW<[SBWriteResGroup110], (instregex "VAESIMCrr")>; + +def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 12; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup111], (instregex "MUL_F32m")>; +def: InstRW<[SBWriteResGroup111], (instregex "MUL_F64m")>; +def: InstRW<[SBWriteResGroup111], (instregex "VMULPDYrm")>; +def: InstRW<[SBWriteResGroup111], (instregex "VMULPSYrm")>; + +def SBWriteResGroup112 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { + let Latency = 12; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup112], (instregex "DPPSrri")>; +def: InstRW<[SBWriteResGroup112], (instregex "VDPPSYrri")>; +def: InstRW<[SBWriteResGroup112], (instregex "VDPPSrri")>; + +def SBWriteResGroup113 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { + let Latency = 12; + let NumMicroOps = 4; + let ResourceCycles = [1,2,1]; +} +def: InstRW<[SBWriteResGroup113], (instregex "VHADDPDrm")>; +def: InstRW<[SBWriteResGroup113], (instregex "VHADDPSYrm")>; +def: InstRW<[SBWriteResGroup113], (instregex "VHSUBPDYrm")>; +def: InstRW<[SBWriteResGroup113], (instregex "VHSUBPSYrm")>; + +def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> { + let Latency = 13; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup114], (instregex "ADD_FI16m")>; +def: InstRW<[SBWriteResGroup114], (instregex "ADD_FI32m")>; +def: InstRW<[SBWriteResGroup114], (instregex "SUBR_FI16m")>; +def: InstRW<[SBWriteResGroup114], (instregex "SUBR_FI32m")>; +def: InstRW<[SBWriteResGroup114], (instregex "SUB_FI16m")>; +def: InstRW<[SBWriteResGroup114], (instregex "SUB_FI32m")>; + +def SBWriteResGroup115 : SchedWriteRes<[SBPort5,SBPort23,SBPort015]> { + let Latency = 13; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup115], (instregex "AESDECLASTrm")>; +def: InstRW<[SBWriteResGroup115], (instregex "AESDECrm")>; +def: InstRW<[SBWriteResGroup115], (instregex "AESENCLASTrm")>; +def: InstRW<[SBWriteResGroup115], (instregex "AESENCrm")>; +def: InstRW<[SBWriteResGroup115], (instregex "VAESDECLASTrm")>; +def: InstRW<[SBWriteResGroup115], (instregex "VAESDECrm")>; +def: InstRW<[SBWriteResGroup115], (instregex "VAESENCLASTrm")>; +def: InstRW<[SBWriteResGroup115], (instregex "VAESENCrm")>; + +def SBWriteResGroup116 : SchedWriteRes<[SBPort0]> { + let Latency = 14; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup116], (instregex "DIVPSrr")>; +def: InstRW<[SBWriteResGroup116], (instregex "DIVSSrr")>; +def: InstRW<[SBWriteResGroup116], (instregex "SQRTPSr")>; +def: InstRW<[SBWriteResGroup116], (instregex "SQRTSSr")>; +def: InstRW<[SBWriteResGroup116], (instregex "VDIVPSrr")>; +def: InstRW<[SBWriteResGroup116], (instregex "VDIVSSrr")>; +def: InstRW<[SBWriteResGroup116], (instregex "VSQRTPSr")>; + +def SBWriteResGroup117 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 14; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup117], (instregex "VSQRTSSm")>; + +def SBWriteResGroup118 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> { + let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SBWriteResGroup118], (instregex "VRCPPSm")>; +def: InstRW<[SBWriteResGroup118], (instregex "VRSQRTPSYm")>; + +def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { + let Latency = 15; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI16m")>; +def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI32m")>; + +def SBWriteResGroup120 : SchedWriteRes<[SBPort0,SBPort1,SBPort5,SBPort23]> { + let Latency = 15; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SBWriteResGroup120], (instregex "DPPDrmi")>; +def: InstRW<[SBWriteResGroup120], (instregex "VDPPDrmi")>; + +def SBWriteResGroup121 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 17; + let NumMicroOps = 4; + let ResourceCycles = [3,1]; +} +def: InstRW<[SBWriteResGroup121], (instregex "PCMPISTRIrm")>; +def: InstRW<[SBWriteResGroup121], (instregex "PCMPISTRM128rm")>; +def: InstRW<[SBWriteResGroup121], (instregex "VPCMPISTRIrm")>; +def: InstRW<[SBWriteResGroup121], (instregex "VPCMPISTRM128rm")>; + +def SBWriteResGroup122 : SchedWriteRes<[SBPort5,SBPort23]> { + let Latency = 18; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup122], (instregex "AESIMCrm")>; +def: InstRW<[SBWriteResGroup122], (instregex "VAESIMCrm")>; + +def SBWriteResGroup123 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 20; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup123], (instregex "DIVPSrm")>; +def: InstRW<[SBWriteResGroup123], (instregex "DIVSSrm")>; +def: InstRW<[SBWriteResGroup123], (instregex "SQRTPSm")>; +def: InstRW<[SBWriteResGroup123], (instregex "SQRTSSm")>; +def: InstRW<[SBWriteResGroup123], (instregex "VDIVPSrm")>; +def: InstRW<[SBWriteResGroup123], (instregex "VDIVSSrm")>; +def: InstRW<[SBWriteResGroup123], (instregex "VSQRTPSm")>; + +def SBWriteResGroup124 : SchedWriteRes<[SBPort0]> { + let Latency = 21; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup124], (instregex "VSQRTSDr")>; + +def SBWriteResGroup125 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 21; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup125], (instregex "VSQRTSDm")>; + +def SBWriteResGroup126 : SchedWriteRes<[SBPort0]> { + let Latency = 22; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup126], (instregex "DIVPDrr")>; +def: InstRW<[SBWriteResGroup126], (instregex "DIVSDrr")>; +def: InstRW<[SBWriteResGroup126], (instregex "SQRTPDr")>; +def: InstRW<[SBWriteResGroup126], (instregex "SQRTSDr")>; +def: InstRW<[SBWriteResGroup126], (instregex "VDIVPDrr")>; +def: InstRW<[SBWriteResGroup126], (instregex "VDIVSDrr")>; +def: InstRW<[SBWriteResGroup126], (instregex "VSQRTPDr")>; + +def SBWriteResGroup127 : SchedWriteRes<[SBPort0]> { + let Latency = 24; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FPrST0")>; +def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FST0r")>; +def: InstRW<[SBWriteResGroup127], (instregex "DIVR_FrST0")>; +def: InstRW<[SBWriteResGroup127], (instregex "DIV_FPrST0")>; +def: InstRW<[SBWriteResGroup127], (instregex "DIV_FST0r")>; +def: InstRW<[SBWriteResGroup127], (instregex "DIV_FrST0")>; + +def SBWriteResGroup128 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 28; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup128], (instregex "DIVPDrm")>; +def: InstRW<[SBWriteResGroup128], (instregex "DIVSDrm")>; +def: InstRW<[SBWriteResGroup128], (instregex "SQRTPDm")>; +def: InstRW<[SBWriteResGroup128], (instregex "SQRTSDm")>; +def: InstRW<[SBWriteResGroup128], (instregex "VDIVPDrm")>; +def: InstRW<[SBWriteResGroup128], (instregex "VDIVSDrm")>; +def: InstRW<[SBWriteResGroup128], (instregex "VSQRTPDm")>; + +def SBWriteResGroup129 : SchedWriteRes<[SBPort0,SBPort0]> { + let Latency = 29; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup129], (instregex "VDIVPSYrr")>; +def: InstRW<[SBWriteResGroup129], (instregex "VSQRTPSYr")>; + +def SBWriteResGroup130 : SchedWriteRes<[SBPort0,SBPort23]> { + let Latency = 31; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SBWriteResGroup130], (instregex "DIVR_F32m")>; +def: InstRW<[SBWriteResGroup130], (instregex "DIVR_F64m")>; +def: InstRW<[SBWriteResGroup130], (instregex "DIV_F32m")>; +def: InstRW<[SBWriteResGroup130], (instregex "DIV_F64m")>; + +def SBWriteResGroup131 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { + let Latency = 34; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup131], (instregex "DIVR_FI16m")>; +def: InstRW<[SBWriteResGroup131], (instregex "DIVR_FI32m")>; +def: InstRW<[SBWriteResGroup131], (instregex "DIV_FI16m")>; +def: InstRW<[SBWriteResGroup131], (instregex "DIV_FI32m")>; + +def SBWriteResGroup132 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> { + let Latency = 36; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SBWriteResGroup132], (instregex "VDIVPSYrm")>; +def: InstRW<[SBWriteResGroup132], (instregex "VSQRTPSYm")>; + +def SBWriteResGroup133 : SchedWriteRes<[SBPort0,SBPort0]> { + let Latency = 45; + let NumMicroOps = 3; + let ResourceCycles = [2,1]; +} +def: InstRW<[SBWriteResGroup133], (instregex "VDIVPDYrr")>; +def: InstRW<[SBWriteResGroup133], (instregex "VSQRTPDYr")>; + +def SBWriteResGroup134 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> { + let Latency = 52; + let NumMicroOps = 4; + let ResourceCycles = [2,1,1]; +} +def: InstRW<[SBWriteResGroup134], (instregex "VDIVPDYrm")>; +def: InstRW<[SBWriteResGroup134], (instregex "VSQRTPDYm")>; + +def SBWriteResGroup135 : SchedWriteRes<[SBPort0]> { + let Latency = 114; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SBWriteResGroup135], (instregex "VSQRTSSr")>; + } // SchedModel diff --git a/lib/Target/X86/X86ScheduleBtVer2.td b/lib/Target/X86/X86ScheduleBtVer2.td index 6cb2a3694d92e..ed53893b779ce 100644 --- a/lib/Target/X86/X86ScheduleBtVer2.td +++ b/lib/Target/X86/X86ScheduleBtVer2.td @@ -369,5 +369,82 @@ def : WriteRes<WriteSystem, [JAny]> { let Latency = 100; } def : WriteRes<WriteMicrocoded, [JAny]> { let Latency = 100; } def : WriteRes<WriteFence, [JSAGU]>; def : WriteRes<WriteNop, []>; + +//////////////////////////////////////////////////////////////////////////////// +// AVX instructions. +//////////////////////////////////////////////////////////////////////////////// + +def WriteFAddY: SchedWriteRes<[JFPU0]> { + let Latency = 3; + let ResourceCycles = [2]; +} +def : InstRW<[WriteFAddY], (instregex "VADD(SUB)?P(S|D)Yrr", "VSUBP(S|D)Yrr")>; + +def WriteFAddYLd: SchedWriteRes<[JLAGU, JFPU0]> { + let Latency = 8; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteFAddYLd, ReadAfterLd], (instregex "VADD(SUB)?P(S|D)Yrm", "VSUBP(S|D)Yrm")>; + +def WriteFDivY: SchedWriteRes<[JFPU1]> { + let Latency = 38; + let ResourceCycles = [38]; +} +def : InstRW<[WriteFDivY], (instregex "VDIVP(D|S)Yrr")>; + +def WriteFDivYLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 43; + let ResourceCycles = [1, 38]; +} +def : InstRW<[WriteFDivYLd, ReadAfterLd], (instregex "VDIVP(S|D)Yrm")>; + +def WriteVMULYPD: SchedWriteRes<[JFPU1]> { + let Latency = 4; + let ResourceCycles = [4]; +} +def : InstRW<[WriteVMULYPD], (instregex "VMULPDYrr")>; + +def WriteVMULYPDLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 9; + let ResourceCycles = [1, 4]; +} +def : InstRW<[WriteVMULYPDLd, ReadAfterLd], (instregex "VMULPDYrm")>; + +def WriteVMULYPS: SchedWriteRes<[JFPU1]> { + let Latency = 2; + let ResourceCycles = [2]; +} +def : InstRW<[WriteVMULYPS], (instregex "VMULPSYrr", "VRCPPSYr", "VRSQRTPSYr")>; + +def WriteVMULYPSLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 7; + let ResourceCycles = [1, 2]; +} +def : InstRW<[WriteVMULYPSLd, ReadAfterLd], (instregex "VMULPSYrm", "VRCPPSYm", "VRSQRTPSYm")>; + +def WriteVSQRTYPD: SchedWriteRes<[JFPU1]> { + let Latency = 54; + let ResourceCycles = [54]; +} +def : InstRW<[WriteVSQRTYPD], (instregex "VSQRTPDYr")>; + +def WriteVSQRTYPDLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 59; + let ResourceCycles = [1, 54]; +} +def : InstRW<[WriteVSQRTYPDLd], (instregex "VSQRTPDYm")>; + +def WriteVSQRTYPS: SchedWriteRes<[JFPU1]> { + let Latency = 42; + let ResourceCycles = [42]; +} +def : InstRW<[WriteVSQRTYPS], (instregex "VSQRTPSYr")>; + +def WriteVSQRTYPSLd: SchedWriteRes<[JLAGU, JFPU1]> { + let Latency = 47; + let ResourceCycles = [1, 42]; +} +def : InstRW<[WriteVSQRTYPSLd], (instregex "VSQRTPSYm")>; + } // SchedModel diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 5ba8534d32d33..c9924f264939d 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -142,10 +142,15 @@ int X86TTIImpl::getArithmeticInstrCost( { ISD::FDIV, MVT::v2f64, 69 }, // divpd { ISD::FADD, MVT::v2f64, 2 }, // addpd { ISD::FSUB, MVT::v2f64, 2 }, // subpd - // v2i64/v4i64 mul is custom lowered as a series of long - // multiplies(3), shifts(3) and adds(2). - // slm muldq version throughput is 2 - { ISD::MUL, MVT::v2i64, 11 }, + // v2i64/v4i64 mul is custom lowered as a series of long: + // multiplies(3), shifts(3) and adds(2) + // slm muldq version throughput is 2 and addq throughput 4 + // thus: 3X2 (muldq throughput) + 3X1 (shift throuput) + + // 3X4 (addq throughput) = 17 + { ISD::MUL, MVT::v2i64, 17 }, + // slm addq\subq throughput is 4 + { ISD::ADD, MVT::v2i64, 4 }, + { ISD::SUB, MVT::v2i64, 4 }, }; if (ST->isSLM()) { diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index d8cf8d3f5da21..53223ab443161 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -124,6 +124,10 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; + + // There may be remaining metadata uses of the argument for things like + // llvm.dbg.value. Replace them with undef. + I->replaceAllUsesWith(UndefValue::get(I->getType())); } else { // Okay, this is being promoted. This means that the only uses are loads // or GEPs which are only used by loads diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index 6d34ab8b0d960..233a36d2bc543 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -64,6 +64,12 @@ static cl::opt<float> ImportHotMultiplier( "import-hot-multiplier", cl::init(3.0), cl::Hidden, cl::value_desc("x"), cl::desc("Multiply the `import-instr-limit` threshold for hot callsites")); +static cl::opt<float> ImportCriticalMultiplier( + "import-critical-multiplier", cl::init(100.0), cl::Hidden, + cl::value_desc("x"), + cl::desc( + "Multiply the `import-instr-limit` threshold for critical callsites")); + // FIXME: This multiplier was not really tuned up. static cl::opt<float> ImportColdMultiplier( "import-cold-multiplier", cl::init(0), cl::Hidden, cl::value_desc("N"), @@ -207,6 +213,8 @@ static void computeImportForFunction( return ImportHotMultiplier; if (Hotness == CalleeInfo::HotnessType::Cold) return ImportColdMultiplier; + if (Hotness == CalleeInfo::HotnessType::Critical) + return ImportCriticalMultiplier; return 1.0; }; @@ -537,8 +545,6 @@ void llvm::thinLTOResolveWeakForLinkerModule( }; auto updateLinkage = [&](GlobalValue &GV) { - if (!GlobalValue::isWeakForLinker(GV.getLinkage())) - return; // See if the global summary analysis computed a new resolved linkage. const auto &GS = DefinedGlobals.find(GV.getGUID()); if (GS == DefinedGlobals.end()) @@ -546,6 +552,21 @@ void llvm::thinLTOResolveWeakForLinkerModule( auto NewLinkage = GS->second->linkage(); if (NewLinkage == GV.getLinkage()) return; + + // Switch the linkage to weakany if asked for, e.g. we do this for + // linker redefined symbols (via --wrap or --defsym). + // We record that the visibility should be changed here in `addThinLTO` + // as we need access to the resolution vectors for each input file in + // order to find which symbols have been redefined. + // We may consider reorganizing this code and moving the linkage recording + // somewhere else, e.g. in thinLTOResolveWeakForLinkerInIndex. + if (NewLinkage == GlobalValue::WeakAnyLinkage) { + GV.setLinkage(NewLinkage); + return; + } + + if (!GlobalValue::isWeakForLinker(GV.getLinkage())) + return; // Check for a non-prevailing def that has interposable linkage // (e.g. non-odr weak or linkonce). In that case we can't simply // convert to available_externally, since it would lose the diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index f277a51ae659a..3d57acf06e746 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -837,7 +837,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) { // The global is initialized when the store to it occurs. new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0, - SI->getOrdering(), SI->getSynchScope(), SI); + SI->getOrdering(), SI->getSyncScopeID(), SI); SI->eraseFromParent(); continue; } @@ -854,7 +854,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy, // Replace the cmp X, 0 with a use of the bool value. // Sink the load to where the compare was, if atomic rules allow us to. Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0, - LI->getOrdering(), LI->getSynchScope(), + LI->getOrdering(), LI->getSyncScopeID(), LI->isUnordered() ? (Instruction*)ICI : LI); InitBoolUsed = true; switch (ICI->getPredicate()) { @@ -1605,7 +1605,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { assert(LI->getOperand(0) == GV && "Not a copy!"); // Insert a new load, to preserve the saved value. StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0, - LI->getOrdering(), LI->getSynchScope(), LI); + LI->getOrdering(), LI->getSyncScopeID(), LI); } else { assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) && "This is not a form that we understand!"); @@ -1614,12 +1614,12 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { } } new StoreInst(StoreVal, NewGV, false, 0, - SI->getOrdering(), SI->getSynchScope(), SI); + SI->getOrdering(), SI->getSyncScopeID(), SI); } else { // Change the load into a load of bool then a select. LoadInst *LI = cast<LoadInst>(UI); LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0, - LI->getOrdering(), LI->getSynchScope(), LI); + LI->getOrdering(), LI->getSyncScopeID(), LI); Value *NSI; if (IsOneZero) NSI = new ZExtInst(NLI, LI->getType(), "", LI); diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index ad89e40661c67..00ddb93df830a 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -989,5 +989,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // And delete the actual function from the module. M.getFunctionList().erase(DeadF); } - return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); + + if (!Changed) + return PreservedAnalyses::all(); + + // Even if we change the IR, we update the core CGSCC data structures and so + // can preserve the proxy to the function analysis manager. + PreservedAnalyses PA; + PA.preserve<FunctionAnalysisManagerCGSCCProxy>(); + return PA; } diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp index b406c22c69d7a..693df5e7ba925 100644 --- a/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/lib/Transforms/IPO/LowerTypeTests.cpp @@ -855,15 +855,20 @@ void LowerTypeTestsModule::importFunction(Function *F, bool isDefinition) { FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, Name + ".cfi_jt", &M); FDecl->setVisibility(GlobalValue::HiddenVisibility); - } else { - // Definition. - assert(isDefinition); + } else if (isDefinition) { F->setName(Name + ".cfi"); F->setLinkage(GlobalValue::ExternalLinkage); F->setVisibility(GlobalValue::HiddenVisibility); FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, Name, &M); FDecl->setVisibility(Visibility); + } else { + // Function definition without type metadata, where some other translation + // unit contained a declaration with type metadata. This normally happens + // during mixed CFI + non-CFI compilation. We do nothing with the function + // so that it is treated the same way as a function defined outside of the + // LTO unit. + return; } if (F->isWeakForLinker()) diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 5b1b58b89c32e..0b319f6a488b3 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -188,6 +188,13 @@ PassManagerBuilder::~PassManagerBuilder() { static ManagedStatic<SmallVector<std::pair<PassManagerBuilder::ExtensionPointTy, PassManagerBuilder::ExtensionFn>, 8> > GlobalExtensions; +/// Check if GlobalExtensions is constructed and not empty. +/// Since GlobalExtensions is a managed static, calling 'empty()' will trigger +/// the construction of the object. +static bool GlobalExtensionsNotEmpty() { + return GlobalExtensions.isConstructed() && !GlobalExtensions->empty(); +} + void PassManagerBuilder::addGlobalExtension( PassManagerBuilder::ExtensionPointTy Ty, PassManagerBuilder::ExtensionFn Fn) { @@ -200,9 +207,12 @@ void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) { void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, legacy::PassManagerBase &PM) const { - for (unsigned i = 0, e = GlobalExtensions->size(); i != e; ++i) - if ((*GlobalExtensions)[i].first == ETy) - (*GlobalExtensions)[i].second(*this, PM); + if (GlobalExtensionsNotEmpty()) { + for (auto &Ext : *GlobalExtensions) { + if (Ext.first == ETy) + Ext.second(*this, PM); + } + } for (unsigned i = 0, e = Extensions.size(); i != e; ++i) if (Extensions[i].first == ETy) Extensions[i].second(*this, PM); @@ -415,7 +425,7 @@ void PassManagerBuilder::populateModulePassManager( // builds. The function merging pass is if (MergeFunctions) MPM.add(createMergeFunctionsPass()); - else if (!GlobalExtensions->empty() || !Extensions.empty()) + else if (GlobalExtensionsNotEmpty() || !Extensions.empty()) MPM.add(createBarrierNoopPass()); addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 8d494fe9cde28..8ef6bb6523093 100644 --- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -271,7 +271,8 @@ void splitAndWriteThinLTOBitcode( if (!ArgT || ArgT->getBitWidth() > 64) return; } - if (computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone) + if (!F->isDeclaration() && + computeFunctionBodyMemoryAccess(*F, AARGetter(*F)) == MAK_ReadNone) EligibleVirtualFns.insert(F); }); } diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index d5f0dd1914157..809471cfd74f0 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -164,7 +164,7 @@ namespace { /// class FAddCombine { public: - FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(nullptr) {} + FAddCombine(InstCombiner::BuilderTy &B) : Builder(B), Instr(nullptr) {} Value *simplify(Instruction *FAdd); private: @@ -187,7 +187,7 @@ namespace { Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); void createInstPostProc(Instruction *NewInst, bool NoNumber = false); - InstCombiner::BuilderTy *Builder; + InstCombiner::BuilderTy &Builder; Instruction *Instr; // Debugging stuff are clustered here. @@ -735,7 +735,7 @@ Value *FAddCombine::createNaryFAdd } Value *FAddCombine::createFSub(Value *Opnd0, Value *Opnd1) { - Value *V = Builder->CreateFSub(Opnd0, Opnd1); + Value *V = Builder.CreateFSub(Opnd0, Opnd1); if (Instruction *I = dyn_cast<Instruction>(V)) createInstPostProc(I); return V; @@ -750,21 +750,21 @@ Value *FAddCombine::createFNeg(Value *V) { } Value *FAddCombine::createFAdd(Value *Opnd0, Value *Opnd1) { - Value *V = Builder->CreateFAdd(Opnd0, Opnd1); + Value *V = Builder.CreateFAdd(Opnd0, Opnd1); if (Instruction *I = dyn_cast<Instruction>(V)) createInstPostProc(I); return V; } Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) { - Value *V = Builder->CreateFMul(Opnd0, Opnd1); + Value *V = Builder.CreateFMul(Opnd0, Opnd1); if (Instruction *I = dyn_cast<Instruction>(V)) createInstPostProc(I); return V; } Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) { - Value *V = Builder->CreateFDiv(Opnd0, Opnd1); + Value *V = Builder.CreateFDiv(Opnd0, Opnd1); if (Instruction *I = dyn_cast<Instruction>(V)) createInstPostProc(I); return V; @@ -895,7 +895,7 @@ bool InstCombiner::willNotOverflowUnsignedSub(const Value *LHS, // ADD(XOR(AND(Z, C), C), 1) == NEG(OR(Z, ~C)) // XOR(AND(Z, C), (C + 1)) == NEG(OR(Z, ~C)) if C is even static Value *checkForNegativeOperand(BinaryOperator &I, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); // This function creates 2 instructions to replace ADD, we need at least one @@ -919,13 +919,13 @@ static Value *checkForNegativeOperand(BinaryOperator &I, // X = XOR(Y, C1), Y = OR(Z, C2), C2 = NOT(C1) ==> X == NOT(AND(Z, C1)) // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, AND(Z, C1)) if (match(Y, m_Or(m_Value(Z), m_APInt(C2))) && (*C2 == ~(*C1))) { - Value *NewAnd = Builder->CreateAnd(Z, *C1); - return Builder->CreateSub(RHS, NewAnd, "sub"); + Value *NewAnd = Builder.CreateAnd(Z, *C1); + return Builder.CreateSub(RHS, NewAnd, "sub"); } else if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && (*C1 == *C2)) { // X = XOR(Y, C1), Y = AND(Z, C2), C2 == C1 ==> X == NOT(OR(Z, ~C1)) // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, OR(Z, ~C1)) - Value *NewOr = Builder->CreateOr(Z, ~(*C1)); - return Builder->CreateSub(RHS, NewOr, "sub"); + Value *NewOr = Builder.CreateOr(Z, ~(*C1)); + return Builder.CreateSub(RHS, NewOr, "sub"); } } } @@ -944,8 +944,8 @@ static Value *checkForNegativeOperand(BinaryOperator &I, if (match(LHS, m_Xor(m_Value(Y), m_APInt(C1)))) if (C1->countTrailingZeros() == 0) if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && *C1 == (*C2 + 1)) { - Value *NewOr = Builder->CreateOr(Z, ~(*C2)); - return Builder->CreateSub(RHS, NewOr, "sub"); + Value *NewOr = Builder.CreateOr(Z, ~(*C2)); + return Builder.CreateSub(RHS, NewOr, "sub"); } return nullptr; } @@ -1027,7 +1027,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Value *V = SimplifyUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - if (Instruction *X = foldAddWithConstant(I, *Builder)) + if (Instruction *X = foldAddWithConstant(I, Builder)) return X; // FIXME: This should be moved into the above helper function to allow these @@ -1060,7 +1060,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (ExtendAmt) { Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt); - Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext"); + Value *NewShl = Builder.CreateShl(XorLHS, ShAmt, "sext"); return BinaryOperator::CreateAShr(NewShl, ShAmt); } @@ -1084,7 +1084,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Instruction *NV = foldOpWithConstantIntoOperand(I)) return NV; - if (I.getType()->getScalarType()->isIntegerTy(1)) + if (I.getType()->isIntOrIntVectorTy(1)) return BinaryOperator::CreateXor(LHS, RHS); // X + X --> X << 1 @@ -1101,7 +1101,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Value *LHSV = dyn_castNegVal(LHS)) { if (!isa<Constant>(RHS)) if (Value *RHSV = dyn_castNegVal(RHS)) { - Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); + Value *NewAdd = Builder.CreateAdd(LHSV, RHSV, "sum"); return BinaryOperator::CreateNeg(NewAdd); } @@ -1148,7 +1148,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (AddRHSHighBits == AddRHSHighBitsAnd) { // Okay, the xform is safe. Insert the new add pronto. - Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); + Value *NewAdd = Builder.CreateAdd(X, CRHS, LHS->getName()); return BinaryOperator::CreateAnd(NewAdd, C2); } } @@ -1191,7 +1191,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { willNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) { // Insert the new, smaller add. Value *NewAdd = - Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); + Builder.CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv"); return new SExtInst(NewAdd, I.getType()); } } @@ -1208,7 +1208,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { willNotOverflowSignedAdd(LHSConv->getOperand(0), RHSConv->getOperand(0), I)) { // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), + Value *NewAdd = Builder.CreateNSWAdd(LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); return new SExtInst(NewAdd, I.getType()); } @@ -1227,7 +1227,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { willNotOverflowUnsignedAdd(LHSConv->getOperand(0), CI, I)) { // Insert the new, smaller add. Value *NewAdd = - Builder->CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv"); + Builder.CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv"); return new ZExtInst(NewAdd, I.getType()); } } @@ -1244,7 +1244,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { willNotOverflowUnsignedAdd(LHSConv->getOperand(0), RHSConv->getOperand(0), I)) { // Insert the new integer add. - Value *NewAdd = Builder->CreateNUWAdd( + Value *NewAdd = Builder.CreateNUWAdd( LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv"); return new ZExtInst(NewAdd, I.getType()); } @@ -1362,8 +1362,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { ConstantExpr::getSIToFP(CI, I.getType()) == CFP && willNotOverflowSignedAdd(LHSIntVal, CI, I)) { // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, - CI, "addconv"); + Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, CI, "addconv"); return new SIToFPInst(NewAdd, I.getType()); } } @@ -1381,8 +1380,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && willNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) { // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, - RHSIntVal, "addconv"); + Value *NewAdd = Builder.CreateNSWAdd(LHSIntVal, RHSIntVal, "addconv"); return new SIToFPInst(NewAdd, I.getType()); } } @@ -1480,14 +1478,14 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, // pointer, subtract it from the offset we have. if (GEP2) { Value *Offset = EmitGEPOffset(GEP2); - Result = Builder->CreateSub(Result, Offset); + Result = Builder.CreateSub(Result, Offset); } // If we have p - gep(p, ...) then we have to negate the result. if (Swapped) - Result = Builder->CreateNeg(Result, "diff.neg"); + Result = Builder.CreateNeg(Result, "diff.neg"); - return Builder->CreateIntCast(Result, Ty, true); + return Builder.CreateIntCast(Result, Ty, true); } Instruction *InstCombiner::visitSub(BinaryOperator &I) { @@ -1522,7 +1520,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return Res; } - if (I.getType()->getScalarType()->isIntegerTy(1)) + if (I.getType()->isIntOrIntVectorTy(1)) return BinaryOperator::CreateXor(Op0, Op1); // Replace (-1 - A) with (~A). @@ -1552,12 +1550,12 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // Fold (sub 0, (zext bool to B)) --> (sext bool to B) if (C->isNullValue() && match(Op1, m_ZExt(m_Value(X)))) - if (X->getType()->getScalarType()->isIntegerTy(1)) + if (X->getType()->isIntOrIntVectorTy(1)) return CastInst::CreateSExtOrBitCast(X, Op1->getType()); // Fold (sub 0, (sext bool to B)) --> (zext bool to B) if (C->isNullValue() && match(Op1, m_SExt(m_Value(X)))) - if (X->getType()->getScalarType()->isIntegerTy(1)) + if (X->getType()->isIntOrIntVectorTy(1)) return CastInst::CreateZExtOrBitCast(X, Op1->getType()); } @@ -1615,7 +1613,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // ((X | Y) - X) --> (~X & Y) if (match(Op0, m_OneUse(m_c_Or(m_Value(Y), m_Specific(Op1))))) return BinaryOperator::CreateAnd( - Y, Builder->CreateNot(Op1, Op1->getName() + ".not")); + Y, Builder.CreateNot(Op1, Op1->getName() + ".not")); } if (Op1->hasOneUse()) { @@ -1625,13 +1623,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // (X - (Y - Z)) --> (X + (Z - Y)). if (match(Op1, m_Sub(m_Value(Y), m_Value(Z)))) return BinaryOperator::CreateAdd(Op0, - Builder->CreateSub(Z, Y, Op1->getName())); + Builder.CreateSub(Z, Y, Op1->getName())); // (X - (X & Y)) --> (X & ~Y) // if (match(Op1, m_c_And(m_Value(Y), m_Specific(Op0)))) return BinaryOperator::CreateAnd(Op0, - Builder->CreateNot(Y, Y->getName() + ".not")); + Builder.CreateNot(Y, Y->getName() + ".not")); // 0 - (X sdiv C) -> (X sdiv -C) provided the negation doesn't overflow. if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && match(Op0, m_Zero()) && @@ -1648,7 +1646,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // 'nuw' is dropped in favor of the canonical form. if (match(Op1, m_SExt(m_Value(Y))) && Y->getType()->getScalarSizeInBits() == 1) { - Value *Zext = Builder->CreateZExt(Y, I.getType()); + Value *Zext = Builder.CreateZExt(Y, I.getType()); BinaryOperator *Add = BinaryOperator::CreateAdd(Op0, Zext); Add->setHasNoSignedWrap(I.hasNoSignedWrap()); return Add; @@ -1659,13 +1657,13 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { Value *A, *B; Constant *CI; if (match(Op1, m_c_Mul(m_Value(A), m_Neg(m_Value(B))))) - return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B)); + return BinaryOperator::CreateAdd(Op0, Builder.CreateMul(A, B)); // X - A*CI -> X + A*-CI // No need to handle commuted multiply because multiply handling will // ensure constant will be move to the right hand side. if (match(Op1, m_Mul(m_Value(A), m_Constant(CI)))) { - Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI)); + Value *NewMul = Builder.CreateMul(A, ConstantExpr::getNeg(CI)); return BinaryOperator::CreateAdd(Op0, NewMul); } } @@ -1729,14 +1727,14 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { } if (FPTruncInst *FPTI = dyn_cast<FPTruncInst>(Op1)) { if (Value *V = dyn_castFNegVal(FPTI->getOperand(0))) { - Value *NewTrunc = Builder->CreateFPTrunc(V, I.getType()); + Value *NewTrunc = Builder.CreateFPTrunc(V, I.getType()); Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewTrunc); NewI->copyFastMathFlags(&I); return NewI; } } else if (FPExtInst *FPEI = dyn_cast<FPExtInst>(Op1)) { if (Value *V = dyn_castFNegVal(FPEI->getOperand(0))) { - Value *NewExt = Builder->CreateFPExt(V, I.getType()); + Value *NewExt = Builder.CreateFPExt(V, I.getType()); Instruction *NewI = BinaryOperator::CreateFAdd(Op0, NewExt); NewI->copyFastMathFlags(&I); return NewI; diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index db98be2c98f51..773c86e23707f 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -54,17 +54,17 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC) { /// instruction. The sign is passed in to determine which kind of predicate to /// use in the new icmp instruction. static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { ICmpInst::Predicate NewPred; if (Value *NewConstant = getICmpValue(Sign, Code, LHS, RHS, NewPred)) return NewConstant; - return Builder->CreateICmp(NewPred, LHS, RHS); + return Builder.CreateICmp(NewPred, LHS, RHS); } /// This is the complement of getFCmpCode, which turns an opcode and two /// operands into either a FCmp instruction, or a true/false constant. static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { const auto Pred = static_cast<FCmpInst::Predicate>(Code); assert(FCmpInst::FCMP_FALSE <= Pred && Pred <= FCmpInst::FCMP_TRUE && "Unexpected FCmp predicate!"); @@ -72,53 +72,45 @@ static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS, return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); if (Pred == FCmpInst::FCMP_TRUE) return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); - return Builder->CreateFCmp(Pred, LHS, RHS); + return Builder.CreateFCmp(Pred, LHS, RHS); } -/// \brief Transform BITWISE_OP(BSWAP(A),BSWAP(B)) to BSWAP(BITWISE_OP(A, B)) +/// \brief Transform BITWISE_OP(BSWAP(A),BSWAP(B)) or +/// BITWISE_OP(BSWAP(A), Constant) to BSWAP(BITWISE_OP(A, B)) /// \param I Binary operator to transform. /// \return Pointer to node that must replace the original binary operator, or /// null pointer if no transformation was made. -Value *InstCombiner::SimplifyBSwap(BinaryOperator &I) { - IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); - - // Can't do vectors. - if (I.getType()->isVectorTy()) - return nullptr; - - // Can only do bitwise ops. - if (!I.isBitwiseLogicOp()) - return nullptr; +static Value *SimplifyBSwap(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + assert(I.isBitwiseLogicOp() && "Unexpected opcode for bswap simplifying"); Value *OldLHS = I.getOperand(0); Value *OldRHS = I.getOperand(1); - ConstantInt *ConstLHS = dyn_cast<ConstantInt>(OldLHS); - ConstantInt *ConstRHS = dyn_cast<ConstantInt>(OldRHS); - IntrinsicInst *IntrLHS = dyn_cast<IntrinsicInst>(OldLHS); - IntrinsicInst *IntrRHS = dyn_cast<IntrinsicInst>(OldRHS); - bool IsBswapLHS = (IntrLHS && IntrLHS->getIntrinsicID() == Intrinsic::bswap); - bool IsBswapRHS = (IntrRHS && IntrRHS->getIntrinsicID() == Intrinsic::bswap); - - if (!IsBswapLHS && !IsBswapRHS) - return nullptr; - - if (!IsBswapLHS && !ConstLHS) - return nullptr; - if (!IsBswapRHS && !ConstRHS) + Value *NewLHS; + if (!match(OldLHS, m_BSwap(m_Value(NewLHS)))) return nullptr; - /// OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) ) - /// OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) ) - Value *NewLHS = IsBswapLHS ? IntrLHS->getOperand(0) : - Builder->getInt(ConstLHS->getValue().byteSwap()); + Value *NewRHS; + const APInt *C; - Value *NewRHS = IsBswapRHS ? IntrRHS->getOperand(0) : - Builder->getInt(ConstRHS->getValue().byteSwap()); + if (match(OldRHS, m_BSwap(m_Value(NewRHS)))) { + // OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) ) + if (!OldLHS->hasOneUse() && !OldRHS->hasOneUse()) + return nullptr; + // NewRHS initialized by the matcher. + } else if (match(OldRHS, m_APInt(C))) { + // OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) ) + if (!OldLHS->hasOneUse()) + return nullptr; + NewRHS = ConstantInt::get(I.getType(), C->byteSwap()); + } else + return nullptr; - Value *BinOp = Builder->CreateBinOp(I.getOpcode(), NewLHS, NewRHS); - Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap, ITy); - return Builder->CreateCall(F, BinOp); + Value *BinOp = Builder.CreateBinOp(I.getOpcode(), NewLHS, NewRHS); + Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap, + I.getType()); + return Builder.CreateCall(F, BinOp); } /// This handles expressions of the form ((val OP C1) & C2). Where @@ -137,7 +129,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, case Instruction::Xor: if (Op->hasOneUse()) { // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2) - Value *And = Builder->CreateAnd(X, AndRHS); + Value *And = Builder.CreateAnd(X, AndRHS); And->takeName(Op); return BinaryOperator::CreateXor(And, Together); } @@ -150,7 +142,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, // NOTE: This reduces the number of bits set in the & mask, which // can expose opportunities for store narrowing. Together = ConstantExpr::getXor(AndRHS, Together); - Value *And = Builder->CreateAnd(X, Together); + Value *And = Builder.CreateAnd(X, Together); And->takeName(Op); return BinaryOperator::CreateOr(And, OpRHS); } @@ -182,7 +174,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, return &TheAnd; } else { // Pull the XOR out of the AND. - Value *NewAnd = Builder->CreateAnd(X, AndRHS); + Value *NewAnd = Builder.CreateAnd(X, AndRHS); NewAnd->takeName(Op); return BinaryOperator::CreateXor(NewAnd, AndRHS); } @@ -198,7 +190,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, uint32_t BitWidth = AndRHS->getType()->getBitWidth(); uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal)); - ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShlMask); + ConstantInt *CI = Builder.getInt(AndRHS->getValue() & ShlMask); if (CI->getValue() == ShlMask) // Masking out bits that the shift already masks. @@ -218,7 +210,7 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, uint32_t BitWidth = AndRHS->getType()->getBitWidth(); uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); - ConstantInt *CI = Builder->getInt(AndRHS->getValue() & ShrMask); + ConstantInt *CI = Builder.getInt(AndRHS->getValue() & ShrMask); if (CI->getValue() == ShrMask) // Masking out bits that the shift already masks. @@ -238,12 +230,12 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, uint32_t BitWidth = AndRHS->getType()->getBitWidth(); uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth); APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal)); - Constant *C = Builder->getInt(AndRHS->getValue() & ShrMask); + Constant *C = Builder.getInt(AndRHS->getValue() & ShrMask); if (C == AndRHS) { // Masking out bits shifted in. // (Val ashr C1) & C2 -> (Val lshr C1) & C2 // Make the argument unsigned. Value *ShVal = Op->getOperand(0); - ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName()); + ShVal = Builder.CreateLShr(ShVal, OpRHS, Op->getName()); return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName()); } } @@ -269,15 +261,15 @@ Value *InstCombiner::insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi, ICmpInst::Predicate Pred = Inside ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE; if (isSigned ? Lo.isMinSignedValue() : Lo.isMinValue()) { Pred = isSigned ? ICmpInst::getSignedPredicate(Pred) : Pred; - return Builder->CreateICmp(Pred, V, ConstantInt::get(Ty, Hi)); + return Builder.CreateICmp(Pred, V, ConstantInt::get(Ty, Hi)); } // V >= Lo && V < Hi --> V - Lo u< Hi - Lo // V < Lo || V >= Hi --> V - Lo u>= Hi - Lo Value *VMinusLo = - Builder->CreateSub(V, ConstantInt::get(Ty, Lo), V->getName() + ".off"); + Builder.CreateSub(V, ConstantInt::get(Ty, Lo), V->getName() + ".off"); Constant *HiMinusLo = ConstantInt::get(Ty, Hi - Lo); - return Builder->CreateICmp(Pred, VMinusLo, HiMinusLo); + return Builder.CreateICmp(Pred, VMinusLo, HiMinusLo); } /// Classify (icmp eq (A & B), C) and (icmp ne (A & B), C) as matching patterns @@ -523,7 +515,7 @@ static unsigned getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C, /// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E) /// into a single (icmp(A & X) ==/!= Y). static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, - llvm::InstCombiner::BuilderTy *Builder) { + llvm::InstCombiner::BuilderTy &Builder) { Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr; ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate(); unsigned Mask = @@ -556,27 +548,27 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, if (Mask & Mask_AllZeros) { // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) // -> (icmp eq (A & (B|D)), 0) - Value *NewOr = Builder->CreateOr(B, D); - Value *NewAnd = Builder->CreateAnd(A, NewOr); + Value *NewOr = Builder.CreateOr(B, D); + Value *NewAnd = Builder.CreateAnd(A, NewOr); // We can't use C as zero because we might actually handle // (icmp ne (A & B), B) & (icmp ne (A & D), D) // with B and D, having a single bit set. Value *Zero = Constant::getNullValue(A->getType()); - return Builder->CreateICmp(NewCC, NewAnd, Zero); + return Builder.CreateICmp(NewCC, NewAnd, Zero); } if (Mask & BMask_AllOnes) { // (icmp eq (A & B), B) & (icmp eq (A & D), D) // -> (icmp eq (A & (B|D)), (B|D)) - Value *NewOr = Builder->CreateOr(B, D); - Value *NewAnd = Builder->CreateAnd(A, NewOr); - return Builder->CreateICmp(NewCC, NewAnd, NewOr); + Value *NewOr = Builder.CreateOr(B, D); + Value *NewAnd = Builder.CreateAnd(A, NewOr); + return Builder.CreateICmp(NewCC, NewAnd, NewOr); } if (Mask & AMask_AllOnes) { // (icmp eq (A & B), A) & (icmp eq (A & D), A) // -> (icmp eq (A & (B&D)), A) - Value *NewAnd1 = Builder->CreateAnd(B, D); - Value *NewAnd2 = Builder->CreateAnd(A, NewAnd1); - return Builder->CreateICmp(NewCC, NewAnd2, A); + Value *NewAnd1 = Builder.CreateAnd(B, D); + Value *NewAnd2 = Builder.CreateAnd(A, NewAnd1); + return Builder.CreateICmp(NewCC, NewAnd2, A); } // Remaining cases assume at least that B and D are constant, and depend on @@ -644,10 +636,10 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, (CCst->getValue() ^ ECst->getValue())).getBoolValue()) return ConstantInt::get(LHS->getType(), !IsAnd); - Value *NewOr1 = Builder->CreateOr(B, D); + Value *NewOr1 = Builder.CreateOr(B, D); Value *NewOr2 = ConstantExpr::getOr(CCst, ECst); - Value *NewAnd = Builder->CreateAnd(A, NewOr1); - return Builder->CreateICmp(NewCC, NewAnd, NewOr2); + Value *NewAnd = Builder.CreateAnd(A, NewOr1); + return Builder.CreateICmp(NewCC, NewAnd, NewOr2); } return nullptr; @@ -705,13 +697,13 @@ Value *InstCombiner::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1, if (Inverted) NewPred = ICmpInst::getInversePredicate(NewPred); - return Builder->CreateICmp(NewPred, Input, RangeEnd); + return Builder.CreateICmp(NewPred, Input, RangeEnd); } static Value * foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS, bool JoinedByAnd, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { Value *X = LHS->getOperand(0); if (X != RHS->getOperand(0)) return nullptr; @@ -742,8 +734,8 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS, // (X != C1 && X != C2) --> (X | (C1 ^ C2)) != C2 // We choose an 'or' with a Pow2 constant rather than the inverse mask with // 'and' because that may lead to smaller codegen from a smaller constant. - Value *Or = Builder->CreateOr(X, ConstantInt::get(X->getType(), Xor)); - return Builder->CreateICmp(Pred, Or, ConstantInt::get(X->getType(), *C2)); + Value *Or = Builder.CreateOr(X, ConstantInt::get(X->getType(), Xor)); + return Builder.CreateICmp(Pred, Or, ConstantInt::get(X->getType(), *C2)); } // Special case: get the ordering right when the values wrap around zero. @@ -755,9 +747,9 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS, // (X == 13 || X == 14) --> X - 13 <=u 1 // (X != 13 && X != 14) --> X - 13 >u 1 // An 'add' is the canonical IR form, so favor that over a 'sub'. - Value *Add = Builder->CreateAdd(X, ConstantInt::get(X->getType(), -(*C1))); + Value *Add = Builder.CreateAdd(X, ConstantInt::get(X->getType(), -(*C1))); auto NewPred = JoinedByAnd ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULE; - return Builder->CreateICmp(NewPred, Add, ConstantInt::get(X->getType(), 1)); + return Builder.CreateICmp(NewPred, Add, ConstantInt::get(X->getType(), 1)); } return nullptr; @@ -793,10 +785,10 @@ Value *InstCombiner::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS, if (A == C && isKnownToBeAPowerOfTwo(B, false, 0, &CxtI) && isKnownToBeAPowerOfTwo(D, false, 0, &CxtI)) { - Value *Mask = Builder->CreateOr(B, D); - Value *Masked = Builder->CreateAnd(A, Mask); + Value *Mask = Builder.CreateOr(B, D); + Value *Masked = Builder.CreateAnd(A, Mask); auto NewPred = JoinedByAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; - return Builder->CreateICmp(NewPred, Masked, Mask); + return Builder.CreateICmp(NewPred, Masked, Mask); } } @@ -855,8 +847,8 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) if ((PredL == ICmpInst::ICMP_ULT && LHSC->getValue().isPowerOf2()) || (PredL == ICmpInst::ICMP_EQ && LHSC->isZero())) { - Value *NewOr = Builder->CreateOr(LHS0, RHS0); - return Builder->CreateICmp(PredL, NewOr, LHSC); + Value *NewOr = Builder.CreateOr(LHS0, RHS0); + return Builder.CreateICmp(PredL, NewOr, LHSC); } } @@ -888,10 +880,10 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize); if ((Low & AndC->getValue()).isNullValue() && (Low & BigC->getValue()).isNullValue()) { - Value *NewAnd = Builder->CreateAnd(V, Low | AndC->getValue()); + Value *NewAnd = Builder.CreateAnd(V, Low | AndC->getValue()); APInt N = SmallC->getValue().zext(BigBitSize) | BigC->getValue(); Value *NewVal = ConstantInt::get(AndC->getType()->getContext(), N); - return Builder->CreateICmp(PredL, NewAnd, NewVal); + return Builder.CreateICmp(PredL, NewAnd, NewVal); } } } @@ -943,14 +935,14 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_ULT: if (LHSC == SubOne(RHSC)) // (X != 13 & X u< 14) -> X < 13 - return Builder->CreateICmpULT(LHS0, LHSC); - if (LHSC->isNullValue()) // (X != 0 & X u< 14) -> X-1 u< 13 + return Builder.CreateICmpULT(LHS0, LHSC); + if (LHSC->isZero()) // (X != 0 & X u< 14) -> X-1 u< 13 return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), false, true); break; // (X != 13 & X u< 15) -> no change case ICmpInst::ICMP_SLT: if (LHSC == SubOne(RHSC)) // (X != 13 & X s< 14) -> X < 13 - return Builder->CreateICmpSLT(LHS0, LHSC); + return Builder.CreateICmpSLT(LHS0, LHSC); break; // (X != 13 & X s< 15) -> no change case ICmpInst::ICMP_NE: // Potential folds for this case should already be handled. @@ -963,7 +955,7 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_NE: if (RHSC == AddOne(LHSC)) // (X u> 13 & X != 14) -> X u> 14 - return Builder->CreateICmp(PredL, LHS0, RHSC); + return Builder.CreateICmp(PredL, LHS0, RHSC); break; // (X u> 13 & X != 15) -> no change case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) <u 1 return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), @@ -976,7 +968,7 @@ Value *InstCombiner::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, llvm_unreachable("Unknown integer condition code!"); case ICmpInst::ICMP_NE: if (RHSC == AddOne(LHSC)) // (X s> 13 & X != 14) -> X s> 14 - return Builder->CreateICmp(PredL, LHS0, RHSC); + return Builder.CreateICmp(PredL, LHS0, RHSC); break; // (X s> 13 & X != 15) -> no change case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) s< 1 return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), true, @@ -1025,15 +1017,15 @@ Value *InstCombiner::foldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { // If either of the constants are nans, then the whole thing returns // false. if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return Builder->getFalse(); - return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); + return Builder.getFalse(); + return Builder.CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); } // Handle vector zeros. This occurs because the canonical form of // "fcmp ord x,x" is "fcmp ord x, 0". if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && isa<ConstantAggregateZero>(RHS->getOperand(1))) - return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); + return Builder.CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); return nullptr; } @@ -1088,7 +1080,7 @@ bool InstCombiner::shouldOptimizeCast(CastInst *CI) { /// Fold {and,or,xor} (cast X), C. static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { Constant *C; if (!match(Logic.getOperand(1), m_Constant(C))) return nullptr; @@ -1107,7 +1099,7 @@ static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast, Constant *ZextTruncC = ConstantExpr::getZExt(TruncC, DestTy); if (ZextTruncC == C) { // LogicOpc (zext X), C --> zext (LogicOpc X, C) - Value *NewOp = Builder->CreateBinOp(LogicOpc, X, TruncC); + Value *NewOp = Builder.CreateBinOp(LogicOpc, X, TruncC); return new ZExtInst(NewOp, DestTy); } } @@ -1150,7 +1142,7 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) { // fold logic(cast(A), cast(B)) -> cast(logic(A, B)) if (shouldOptimizeCast(Cast0) && shouldOptimizeCast(Cast1)) { - Value *NewOp = Builder->CreateBinOp(LogicOpc, Cast0Src, Cast1Src, + Value *NewOp = Builder.CreateBinOp(LogicOpc, Cast0Src, Cast1Src, I.getName()); return CastInst::Create(CastOpcode, NewOp, DestTy); } @@ -1196,15 +1188,14 @@ static Instruction *foldBoolSextMaskToSelect(BinaryOperator &I) { // Fold (and (sext bool to A), B) --> (select bool, B, 0) Value *X = nullptr; - if (match(Op0, m_SExt(m_Value(X))) && - X->getType()->getScalarType()->isIntegerTy(1)) { + if (match(Op0, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) { Value *Zero = Constant::getNullValue(Op1->getType()); return SelectInst::Create(X, Op1, Zero); } // Fold (and ~(sext bool to A), B) --> (select bool, 0, B) if (match(Op0, m_Not(m_SExt(m_Value(X)))) && - X->getType()->getScalarType()->isIntegerTy(1)) { + X->getType()->isIntOrIntVectorTy(1)) { Value *Zero = Constant::getNullValue(Op0->getType()); return SelectInst::Create(X, Zero, Op1); } @@ -1283,14 +1274,14 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return &I; // Do this before using distributive laws to catch simple and/or/not patterns. - if (Instruction *Xor = foldAndToXor(I, *Builder)) + if (Instruction *Xor = foldAndToXor(I, Builder)) return Xor; // (A|B)&(A|C) -> A|(B&C) etc if (Value *V = SimplifyUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyBSwap(I)) + if (Value *V = SimplifyBSwap(I, Builder)) return replaceInstUsesWith(I, V); if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) { @@ -1310,15 +1301,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { APInt NotAndRHS(~AndRHSMask); if (MaskedValueIsZero(Op0LHS, NotAndRHS, 0, &I)) { // Not masking anything out for the LHS, move to RHS. - Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS, - Op0RHS->getName()+".masked"); + Value *NewRHS = Builder.CreateAnd(Op0RHS, AndRHS, + Op0RHS->getName()+".masked"); return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS); } if (!isa<Constant>(Op0RHS) && MaskedValueIsZero(Op0RHS, NotAndRHS, 0, &I)) { // Not masking anything out for the RHS, move to LHS. - Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS, - Op0LHS->getName()+".masked"); + Value *NewLHS = Builder.CreateAnd(Op0LHS, AndRHS, + Op0LHS->getName()+".masked"); return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS); } @@ -1337,7 +1328,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // (1 >> x) & 1 --> zext(x == 0) if (AndRHSMask.isOneValue() && Op0LHS == AndRHS) { Value *NewICmp = - Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); + Builder.CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType())); return new ZExtInst(NewICmp, I.getType()); } break; @@ -1360,11 +1351,11 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { auto *TruncC1 = ConstantExpr::getTrunc(C1, X->getType()); Value *BinOp; if (isa<ZExtInst>(Op0LHS)) - BinOp = Builder->CreateBinOp(Op0I->getOpcode(), X, TruncC1); + BinOp = Builder.CreateBinOp(Op0I->getOpcode(), X, TruncC1); else - BinOp = Builder->CreateBinOp(Op0I->getOpcode(), TruncC1, X); + BinOp = Builder.CreateBinOp(Op0I->getOpcode(), TruncC1, X); auto *TruncC2 = ConstantExpr::getTrunc(AndRHS, X->getType()); - auto *And = Builder->CreateAnd(BinOp, TruncC2); + auto *And = Builder.CreateAnd(BinOp, TruncC2); return new ZExtInst(And, I.getType()); } } @@ -1384,7 +1375,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // into : and (trunc X to T), trunc(YC) & C2 // This will fold the two constants together, which may allow // other simplifications. - Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk"); + Value *NewCast = Builder.CreateTrunc(X, I.getType(), "and.shrunk"); Constant *C3 = ConstantExpr::getTrunc(YC, I.getType()); C3 = ConstantExpr::getAnd(C3, AndRHS); return BinaryOperator::CreateAnd(NewCast, C3); @@ -1396,7 +1387,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (Instruction *FoldedLogic = foldOpWithConstantIntoOperand(I)) return FoldedLogic; - if (Instruction *DeMorgan = matchDeMorgansLaws(I, *Builder)) + if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder)) return DeMorgan; { @@ -1422,7 +1413,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // an endless loop. By checking that A is non-constant we ensure that // we will never get to the loop. if (A == tmpOp0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B - return BinaryOperator::CreateAnd(A, Builder->CreateNot(B)); + return BinaryOperator::CreateAnd(A, Builder.CreateNot(B)); } } @@ -1436,13 +1427,13 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) if (match(Op1, m_Xor(m_Xor(m_Specific(B), m_Value(C)), m_Specific(A)))) if (Op1->hasOneUse() || IsFreeToInvert(C, C->hasOneUse())) - return BinaryOperator::CreateAnd(Op0, Builder->CreateNot(C)); + return BinaryOperator::CreateAnd(Op0, Builder.CreateNot(C)); // ((A ^ C) ^ B) & (B ^ A) -> (B ^ A) & ~C if (match(Op0, m_Xor(m_Xor(m_Value(A), m_Value(C)), m_Value(B)))) if (match(Op1, m_Xor(m_Specific(B), m_Specific(A)))) if (Op0->hasOneUse() || IsFreeToInvert(C, C->hasOneUse())) - return BinaryOperator::CreateAnd(Op1, Builder->CreateNot(C)); + return BinaryOperator::CreateAnd(Op1, Builder.CreateNot(C)); // (A | B) & ((~A) ^ B) -> (A & B) // (A | B) & (B ^ (~A)) -> (A & B) @@ -1474,18 +1465,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (LHS && match(Op1, m_OneUse(m_And(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast<ICmpInst>(X)) if (Value *Res = foldAndOfICmps(LHS, Cmp, I)) - return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y)); + return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y)); if (auto *Cmp = dyn_cast<ICmpInst>(Y)) if (Value *Res = foldAndOfICmps(LHS, Cmp, I)) - return replaceInstUsesWith(I, Builder->CreateAnd(Res, X)); + return replaceInstUsesWith(I, Builder.CreateAnd(Res, X)); } if (RHS && match(Op0, m_OneUse(m_And(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast<ICmpInst>(X)) if (Value *Res = foldAndOfICmps(Cmp, RHS, I)) - return replaceInstUsesWith(I, Builder->CreateAnd(Res, Y)); + return replaceInstUsesWith(I, Builder.CreateAnd(Res, Y)); if (auto *Cmp = dyn_cast<ICmpInst>(Y)) if (Value *Res = foldAndOfICmps(Cmp, RHS, I)) - return replaceInstUsesWith(I, Builder->CreateAnd(Res, X)); + return replaceInstUsesWith(I, Builder.CreateAnd(Res, X)); } } @@ -1567,14 +1558,14 @@ static Value *getSelectCondition(Value *A, Value *B, InstCombiner::BuilderTy &Builder) { // If these are scalars or vectors of i1, A can be used directly. Type *Ty = A->getType(); - if (match(A, m_Not(m_Specific(B))) && Ty->getScalarType()->isIntegerTy(1)) + if (match(A, m_Not(m_Specific(B))) && Ty->isIntOrIntVectorTy(1)) return A; // If A and B are sign-extended, look through the sexts to find the booleans. Value *Cond; Value *NotB; if (match(A, m_SExt(m_Value(Cond))) && - Cond->getType()->getScalarType()->isIntegerTy(1) && + Cond->getType()->isIntOrIntVectorTy(1) && match(B, m_OneUse(m_Not(m_Value(NotB))))) { NotB = peekThroughBitcast(NotB, true); if (match(NotB, m_SExt(m_Specific(Cond)))) @@ -1596,7 +1587,7 @@ static Value *getSelectCondition(Value *A, Value *B, // operand, see if the constants are inverse bitmasks. if (match(A, (m_Xor(m_SExt(m_Value(Cond)), m_Constant(AC)))) && match(B, (m_Xor(m_SExt(m_Specific(Cond)), m_Constant(BC)))) && - Cond->getType()->getScalarType()->isIntegerTy(1) && + Cond->getType()->isIntOrIntVectorTy(1) && areInverseVectorBitmasks(AC, BC)) { AC = ConstantExpr::getTrunc(AC, CmpInst::makeCmpResultType(Ty)); return Builder.CreateXor(Cond, AC); @@ -1687,9 +1678,9 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, RangeDiff.ugt(LHSC->getValue())) { Value *MaskC = ConstantInt::get(LAddC->getType(), ~DiffC); - Value *NewAnd = Builder->CreateAnd(LAddOpnd, MaskC); - Value *NewAdd = Builder->CreateAdd(NewAnd, MaxAddC); - return (Builder->CreateICmp(LHS->getPredicate(), NewAdd, LHSC)); + Value *NewAnd = Builder.CreateAnd(LAddOpnd, MaskC); + Value *NewAdd = Builder.CreateAdd(NewAnd, MaxAddC); + return Builder.CreateICmp(LHS->getPredicate(), NewAdd, LHSC); } } } @@ -1736,9 +1727,9 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, A = LHS->getOperand(1); } if (A && B) - return Builder->CreateICmp( + return Builder.CreateICmp( ICmpInst::ICMP_UGE, - Builder->CreateAdd(B, ConstantInt::getSigned(B->getType(), -1)), A); + Builder.CreateAdd(B, ConstantInt::getSigned(B->getType(), -1)), A); } // E.g. (icmp slt x, 0) | (icmp sgt x, n) --> icmp ugt x, n @@ -1759,8 +1750,8 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, if (LHSC == RHSC && PredL == PredR) { // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) if (PredL == ICmpInst::ICMP_NE && LHSC->isZero()) { - Value *NewOr = Builder->CreateOr(LHS0, RHS0); - return Builder->CreateICmp(PredL, NewOr, LHSC); + Value *NewOr = Builder.CreateOr(LHS0, RHS0); + return Builder.CreateICmp(PredL, NewOr, LHSC); } } @@ -1770,7 +1761,7 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, ConstantInt *AddC; if (match(LHS0, m_Add(m_Specific(RHS0), m_ConstantInt(AddC)))) if (RHSC->getValue() + AddC->getValue() == LHSC->getValue()) - return Builder->CreateICmpULE(LHS0, LHSC); + return Builder.CreateICmpULE(LHS0, LHSC); } // From here on, we only handle: @@ -1886,18 +1877,18 @@ Value *InstCombiner::foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { // If either of the constants are nans, then the whole thing returns // true. if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN()) - return Builder->getTrue(); + return Builder.getTrue(); // Otherwise, no need to compare the two constants, compare the // rest. - return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); + return Builder.CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); } // Handle vector zeros. This occurs because the canonical form of // "fcmp uno x,x" is "fcmp uno x, 0". if (isa<ConstantAggregateZero>(LHS->getOperand(1)) && isa<ConstantAggregateZero>(RHS->getOperand(1))) - return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); + return Builder.CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); return nullptr; } @@ -1916,7 +1907,7 @@ Value *InstCombiner::foldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { /// when the XOR of the two constants is "all ones" (-1). static Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A, Value *B, Value *C, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { ConstantInt *CI1 = dyn_cast<ConstantInt>(C); if (!CI1) return nullptr; @@ -1928,7 +1919,7 @@ static Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, if (!Xor.isAllOnesValue()) return nullptr; if (V1 == A || V1 == B) { - Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); + Value *NewOp = Builder.CreateAnd((V1 == A) ? B : A, CI1); return BinaryOperator::CreateOr(NewOp, V1); } @@ -1946,7 +1937,7 @@ static Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, /// when the XOR of the two constants is "all ones" (-1). static Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op, Value *A, Value *B, Value *C, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { ConstantInt *CI1 = dyn_cast<ConstantInt>(C); if (!CI1) return nullptr; @@ -1961,7 +1952,7 @@ static Instruction *FoldXorWithConstants(BinaryOperator &I, Value *Op, return nullptr; if (V1 == A || V1 == B) { - Value *NewOp = Builder->CreateAnd(V1 == A ? B : A, CI1); + Value *NewOp = Builder.CreateAnd(V1 == A ? B : A, CI1); return BinaryOperator::CreateXor(NewOp, V1); } @@ -1987,14 +1978,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return &I; // Do this before using distributive laws to catch simple and/or/not patterns. - if (Instruction *Xor = foldOrToXor(I, *Builder)) + if (Instruction *Xor = foldOrToXor(I, Builder)) return Xor; // (A&B)|(A&C) -> A&(B|C) etc if (Value *V = SimplifyUsingDistributiveLaws(I)) return replaceInstUsesWith(I, V); - if (Value *V = SimplifyBSwap(I)) + if (Value *V = SimplifyBSwap(I, Builder)) return replaceInstUsesWith(I, V); if (isa<Constant>(Op1)) @@ -2011,7 +2002,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // (X^C)|Y -> (X|Y)^C iff Y&C == 0 if (match(Op0, m_OneUse(m_Xor(m_Value(A), m_APInt(C)))) && MaskedValueIsZero(Op1, *C, 0, &I)) { - Value *NOr = Builder->CreateOr(A, Op1); + Value *NOr = Builder.CreateOr(A, Op1); NOr->takeName(Op0); return BinaryOperator::CreateXor(NOr, ConstantInt::get(NOr->getType(), *C)); @@ -2020,7 +2011,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // Y|(X^C) -> (X|Y)^C iff Y&C == 0 if (match(Op1, m_OneUse(m_Xor(m_Value(A), m_APInt(C)))) && MaskedValueIsZero(Op0, *C, 0, &I)) { - Value *NOr = Builder->CreateOr(A, Op0); + Value *NOr = Builder.CreateOr(A, Op0); NOr->takeName(Op0); return BinaryOperator::CreateXor(NOr, ConstantInt::get(NOr->getType(), *C)); @@ -2058,7 +2049,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { (V2 == B && MaskedValueIsZero(V1, ~C1->getValue(), 0, &I)))) // (N|V) return BinaryOperator::CreateAnd(A, - Builder->getInt(C1->getValue()|C2->getValue())); + Builder.getInt(C1->getValue()|C2->getValue())); // Or commutes, try both ways. if (match(B, m_Or(m_Value(V1), m_Value(V2))) && ((V1 == A && @@ -2066,7 +2057,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { (V2 == A && MaskedValueIsZero(V1, ~C2->getValue(), 0, &I)))) // (N|V) return BinaryOperator::CreateAnd(B, - Builder->getInt(C1->getValue()|C2->getValue())); + Builder.getInt(C1->getValue()|C2->getValue())); // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2) // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0. @@ -2075,9 +2066,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { (C3->getValue() & ~C1->getValue()).isNullValue() && match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) && (C4->getValue() & ~C2->getValue()).isNullValue()) { - V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield"); + V2 = Builder.CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield"); return BinaryOperator::CreateAnd(V2, - Builder->getInt(C1->getValue()|C2->getValue())); + Builder.getInt(C1->getValue()|C2->getValue())); } } } @@ -2087,21 +2078,21 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // 'or' that it is replacing. if (Op0->hasOneUse() || Op1->hasOneUse()) { // (Cond & C) | (~Cond & D) -> Cond ? C : D, and commuted variants. - if (Value *V = matchSelectFromAndOr(A, C, B, D, *Builder)) + if (Value *V = matchSelectFromAndOr(A, C, B, D, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(A, C, D, B, *Builder)) + if (Value *V = matchSelectFromAndOr(A, C, D, B, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(C, A, B, D, *Builder)) + if (Value *V = matchSelectFromAndOr(C, A, B, D, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(C, A, D, B, *Builder)) + if (Value *V = matchSelectFromAndOr(C, A, D, B, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(B, D, A, C, *Builder)) + if (Value *V = matchSelectFromAndOr(B, D, A, C, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(B, D, C, A, *Builder)) + if (Value *V = matchSelectFromAndOr(B, D, C, A, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(D, B, A, C, *Builder)) + if (Value *V = matchSelectFromAndOr(D, B, A, C, Builder)) return replaceInstUsesWith(I, V); - if (Value *V = matchSelectFromAndOr(D, B, C, A, *Builder)) + if (Value *V = matchSelectFromAndOr(D, B, C, A, Builder)) return replaceInstUsesWith(I, V); } @@ -2139,9 +2130,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // ((B | C) & A) | B -> B | (A & C) if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A)))) - return BinaryOperator::CreateOr(Op1, Builder->CreateAnd(A, C)); + return BinaryOperator::CreateOr(Op1, Builder.CreateAnd(A, C)); - if (Instruction *DeMorgan = matchDeMorgansLaws(I, *Builder)) + if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder)) return DeMorgan; // Canonicalize xor to the RHS. @@ -2163,11 +2154,11 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return BinaryOperator::CreateOr(A, B); if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) { - Value *Not = Builder->CreateNot(B, B->getName()+".not"); + Value *Not = Builder.CreateNot(B, B->getName() + ".not"); return BinaryOperator::CreateOr(Not, Op0); } if (Op1->hasOneUse() && match(B, m_Not(m_Specific(Op0)))) { - Value *Not = Builder->CreateNot(A, A->getName()+".not"); + Value *Not = Builder.CreateNot(A, A->getName() + ".not"); return BinaryOperator::CreateOr(Not, Op0); } } @@ -2181,7 +2172,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { B->getOpcode() == Instruction::Xor)) { Value *NotOp = Op0 == B->getOperand(0) ? B->getOperand(1) : B->getOperand(0); - Value *Not = Builder->CreateNot(NotOp, NotOp->getName()+".not"); + Value *Not = Builder.CreateNot(NotOp, NotOp->getName() + ".not"); return BinaryOperator::CreateOr(Not, Op0); } @@ -2194,7 +2185,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // xor was canonicalized to Op1 above. if (match(Op1, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) && match(Op0, m_c_And(m_Specific(A), m_Specific(B)))) - return BinaryOperator::CreateXor(Builder->CreateNot(A), B); + return BinaryOperator::CreateXor(Builder.CreateNot(A), B); if (SwappedForXor) std::swap(Op0, Op1); @@ -2212,18 +2203,18 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (LHS && match(Op1, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast<ICmpInst>(X)) if (Value *Res = foldOrOfICmps(LHS, Cmp, I)) - return replaceInstUsesWith(I, Builder->CreateOr(Res, Y)); + return replaceInstUsesWith(I, Builder.CreateOr(Res, Y)); if (auto *Cmp = dyn_cast<ICmpInst>(Y)) if (Value *Res = foldOrOfICmps(LHS, Cmp, I)) - return replaceInstUsesWith(I, Builder->CreateOr(Res, X)); + return replaceInstUsesWith(I, Builder.CreateOr(Res, X)); } if (RHS && match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { if (auto *Cmp = dyn_cast<ICmpInst>(X)) if (Value *Res = foldOrOfICmps(Cmp, RHS, I)) - return replaceInstUsesWith(I, Builder->CreateOr(Res, Y)); + return replaceInstUsesWith(I, Builder.CreateOr(Res, Y)); if (auto *Cmp = dyn_cast<ICmpInst>(Y)) if (Value *Res = foldOrOfICmps(Cmp, RHS, I)) - return replaceInstUsesWith(I, Builder->CreateOr(Res, X)); + return replaceInstUsesWith(I, Builder.CreateOr(Res, X)); } } @@ -2238,10 +2229,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // or(sext(A), B) / or(B, sext(A)) --> A ? -1 : B, where A is i1 or <N x i1>. if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) && - A->getType()->getScalarType()->isIntegerTy(1)) + A->getType()->isIntOrIntVectorTy(1)) return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1); if (match(Op1, m_OneUse(m_SExt(m_Value(A)))) && - A->getType()->getScalarType()->isIntegerTy(1)) + A->getType()->isIntOrIntVectorTy(1)) return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0); // Note: If we've gotten to the point of visiting the outer OR, then the @@ -2252,7 +2243,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { ConstantInt *C1; if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) && match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) { - Value *Inner = Builder->CreateOr(A, Op1); + Value *Inner = Builder.CreateOr(A, Op1); Inner->takeName(Op0); return BinaryOperator::CreateOr(Inner, C1); } @@ -2265,8 +2256,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if (Op0->hasOneUse() && Op1->hasOneUse() && match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) && match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) { - Value *orTrue = Builder->CreateOr(A, C); - Value *orFalse = Builder->CreateOr(B, D); + Value *orTrue = Builder.CreateOr(A, C); + Value *orFalse = Builder.CreateOr(B, D); return SelectInst::Create(X, orTrue, orFalse); } } @@ -2276,7 +2267,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { /// A ^ B can be specified using other logic ops in a variety of patterns. We /// can fold these early and efficiently by morphing an existing instruction. -static Instruction *foldXorToXor(BinaryOperator &I) { +static Instruction *foldXorToXor(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { assert(I.getOpcode() == Instruction::Xor); Value *Op0 = I.getOperand(0); Value *Op1 = I.getOperand(1); @@ -2323,6 +2315,21 @@ static Instruction *foldXorToXor(BinaryOperator &I) { return &I; } + // For the remaining cases we need to get rid of one of the operands. + if (!Op0->hasOneUse() && !Op1->hasOneUse()) + return nullptr; + + // (A | B) ^ ~(A & B) -> ~(A ^ B) + // (A | B) ^ ~(B & A) -> ~(A ^ B) + // (A & B) ^ ~(A | B) -> ~(A ^ B) + // (A & B) ^ ~(B | A) -> ~(A ^ B) + // Complexity sorting ensures the not will be on the right side. + if ((match(Op0, m_Or(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_c_And(m_Specific(A), m_Specific(B))))) || + (match(Op0, m_And(m_Value(A), m_Value(B))) && + match(Op1, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))) + return BinaryOperator::CreateNot(Builder.CreateXor(A, B)); + return nullptr; } @@ -2355,12 +2362,12 @@ Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) { if (OrICmp == LHS && AndICmp == RHS && RHS->hasOneUse()) { // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS RHS->setPredicate(RHS->getInversePredicate()); - return Builder->CreateAnd(LHS, RHS); + return Builder.CreateAnd(LHS, RHS); } if (OrICmp == RHS && AndICmp == LHS && LHS->hasOneUse()) { // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS LHS->setPredicate(LHS->getInversePredicate()); - return Builder->CreateAnd(LHS, RHS); + return Builder.CreateAnd(LHS, RHS); } } } @@ -2381,7 +2388,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (Value *V = SimplifyXorInst(Op0, Op1, SQ.getWithInstruction(&I))) return replaceInstUsesWith(I, V); - if (Instruction *NewXor = foldXorToXor(I)) + if (Instruction *NewXor = foldXorToXor(I, Builder)) return NewXor; // (A&B)^(A&C) -> A&(B^C) etc @@ -2393,7 +2400,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (SimplifyDemandedInstructionBits(I)) return &I; - if (Value *V = SimplifyBSwap(I)) + if (Value *V = SimplifyBSwap(I, Builder)) return replaceInstUsesWith(I, V); // Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand. @@ -2404,13 +2411,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { // ~(~X & Y) --> (X | ~Y) // ~(Y & ~X) --> (X | ~Y) if (match(&I, m_Not(m_OneUse(m_c_And(m_Not(m_Value(X)), m_Value(Y)))))) { - Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not"); + Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not"); return BinaryOperator::CreateOr(X, NotY); } // ~(~X | Y) --> (X & ~Y) // ~(Y | ~X) --> (X & ~Y) if (match(&I, m_Not(m_OneUse(m_c_Or(m_Not(m_Value(X)), m_Value(Y)))))) { - Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not"); + Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not"); return BinaryOperator::CreateAnd(X, NotY); } @@ -2426,8 +2433,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { NotVal->getOperand(0)->hasOneUse()) && IsFreeToInvert(NotVal->getOperand(1), NotVal->getOperand(1)->hasOneUse())) { - Value *NotX = Builder->CreateNot(NotVal->getOperand(0), "notlhs"); - Value *NotY = Builder->CreateNot(NotVal->getOperand(1), "notrhs"); + Value *NotX = Builder.CreateNot(NotVal->getOperand(0), "notlhs"); + Value *NotY = Builder.CreateNot(NotVal->getOperand(1), "notrhs"); if (NotVal->getOpcode() == Instruction::And) return BinaryOperator::CreateOr(NotX, NotY); return BinaryOperator::CreateAnd(NotX, NotY); @@ -2457,7 +2464,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { } // not (cmp A, B) = !cmp A, B - ICmpInst::Predicate Pred; + CmpInst::Predicate Pred; if (match(&I, m_Not(m_OneUse(m_Cmp(Pred, m_Value(), m_Value()))))) { cast<CmpInst>(Op0)->setPredicate(CmpInst::getInversePredicate(Pred)); return replaceInstUsesWith(I, Op0); @@ -2470,8 +2477,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (CI->hasOneUse() && Op0C->hasOneUse()) { Instruction::CastOps Opcode = Op0C->getOpcode(); if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) && - (RHSC == ConstantExpr::getCast(Opcode, Builder->getTrue(), - Op0C->getDestTy()))) { + (RHSC == ConstantExpr::getCast(Opcode, Builder.getTrue(), + Op0C->getDestTy()))) { CI->setPredicate(CI->getInversePredicate()); return CastInst::Create(Opcode, CI, Op0C->getType()); } @@ -2481,7 +2488,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { // ~(c-X) == X-c-1 == X+(-c-1) - if (Op0I->getOpcode() == Instruction::Sub && RHSC->isAllOnesValue()) + if (Op0I->getOpcode() == Instruction::Sub && RHSC->isMinusOne()) if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) { Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C); return BinaryOperator::CreateAdd(Op0I->getOperand(1), @@ -2491,13 +2498,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) { if (Op0I->getOpcode() == Instruction::Add) { // ~(X-c) --> (-c-1)-X - if (RHSC->isAllOnesValue()) { + if (RHSC->isMinusOne()) { Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); return BinaryOperator::CreateSub(SubOne(NegOp0CI), Op0I->getOperand(0)); } else if (RHSC->getValue().isSignMask()) { // (X + C) ^ signmask -> (X + C + signmask) - Constant *C = Builder->getInt(RHSC->getValue() + Op0CI->getValue()); + Constant *C = Builder.getInt(RHSC->getValue() + Op0CI->getValue()); return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); } @@ -2530,7 +2537,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { APInt FoldConst = C1->getValue().lshr(C2->getValue()); FoldConst ^= C3->getValue(); // Prepare the two operands. - Value *Opnd0 = Builder->CreateLShr(E1->getOperand(0), C2); + Value *Opnd0 = Builder.CreateLShr(E1->getOperand(0), C2); Opnd0->takeName(Op0I); cast<Instruction>(Opnd0)->setDebugLoc(I.getDebugLoc()); Value *FoldVal = ConstantInt::get(Opnd0->getType(), FoldConst); @@ -2575,14 +2582,14 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { if (A == Op1) // (B|A)^B == (A|B)^B std::swap(A, B); if (B == Op1) // (A|B)^B == A & ~B - return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1)); + return BinaryOperator::CreateAnd(A, Builder.CreateNot(Op1)); } else if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B))))) { if (A == Op1) // (A&B)^A -> (B&A)^A std::swap(A, B); const APInt *C; if (B == Op1 && // (B&A)^A == ~B & A !match(Op1, m_APInt(C))) { // Canonical form is (B&C)^C - return BinaryOperator::CreateAnd(Builder->CreateNot(A), Op1); + return BinaryOperator::CreateAnd(Builder.CreateNot(A), Op1); } } } @@ -2594,20 +2601,20 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { match(Op1, m_Or(m_Value(A), m_Value(B)))) { if (D == A) return BinaryOperator::CreateXor( - Builder->CreateAnd(Builder->CreateNot(A), B), C); + Builder.CreateAnd(Builder.CreateNot(A), B), C); if (D == B) return BinaryOperator::CreateXor( - Builder->CreateAnd(Builder->CreateNot(B), A), C); + Builder.CreateAnd(Builder.CreateNot(B), A), C); } // (A | B)^(A ^ C) -> ((~A) & B) ^ C if (match(Op0, m_Or(m_Value(A), m_Value(B))) && match(Op1, m_Xor(m_Value(D), m_Value(C)))) { if (D == A) return BinaryOperator::CreateXor( - Builder->CreateAnd(Builder->CreateNot(A), B), C); + Builder.CreateAnd(Builder.CreateNot(A), B), C); if (D == B) return BinaryOperator::CreateXor( - Builder->CreateAnd(Builder->CreateNot(B), A), C); + Builder.CreateAnd(Builder.CreateNot(B), A), C); } // (A & B) ^ (A ^ B) -> (A | B) if (match(Op0, m_And(m_Value(A), m_Value(B))) && @@ -2624,7 +2631,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { Value *A, *B; if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) && match(Op1, m_Not(m_Specific(A)))) - return BinaryOperator::CreateNot(Builder->CreateAnd(A, B)); + return BinaryOperator::CreateNot(Builder.CreateAnd(A, B)); if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0))) if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 3770021de1002..391c430dab75d 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -128,23 +128,23 @@ Instruction *InstCombiner::SimplifyElementUnorderedAtomicMemCpy( Type::getIntNPtrTy(AMI->getContext(), ElementSizeInBits, Src->getType()->getPointerAddressSpace()); - Value *SrcCasted = Builder->CreatePointerCast(Src, ElementPointerType, - "memcpy_unfold.src_casted"); - Value *DstCasted = Builder->CreatePointerCast(Dst, ElementPointerType, - "memcpy_unfold.dst_casted"); + Value *SrcCasted = Builder.CreatePointerCast(Src, ElementPointerType, + "memcpy_unfold.src_casted"); + Value *DstCasted = Builder.CreatePointerCast(Dst, ElementPointerType, + "memcpy_unfold.dst_casted"); for (uint64_t i = 0; i < NumElements; ++i) { // Get current element addresses ConstantInt *ElementIdxCI = ConstantInt::get(AMI->getContext(), APInt(64, i)); Value *SrcElementAddr = - Builder->CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr"); + Builder.CreateGEP(SrcCasted, ElementIdxCI, "memcpy_unfold.src_addr"); Value *DstElementAddr = - Builder->CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr"); + Builder.CreateGEP(DstCasted, ElementIdxCI, "memcpy_unfold.dst_addr"); // Load from the source. Transfer alignment information and mark load as // unordered atomic. - LoadInst *Load = Builder->CreateLoad(SrcElementAddr, "memcpy_unfold.val"); + LoadInst *Load = Builder.CreateLoad(SrcElementAddr, "memcpy_unfold.val"); Load->setOrdering(AtomicOrdering::Unordered); // We know alignment of the first element. It is also guaranteed by the // verifier that element size is less or equal than first element @@ -157,7 +157,7 @@ Instruction *InstCombiner::SimplifyElementUnorderedAtomicMemCpy( Load->setDebugLoc(AMI->getDebugLoc()); // Store loaded value via unordered atomic store. - StoreInst *Store = Builder->CreateStore(Load, DstElementAddr); + StoreInst *Store = Builder.CreateStore(Load, DstElementAddr); Store->setOrdering(AtomicOrdering::Unordered); Store->setAlignment(i == 0 ? AMI->getParamAlignment(0) : ElementSizeInBytes); @@ -213,7 +213,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) { if (M->getNumOperands() == 3 && M->getOperand(0) && mdconst::hasa<ConstantInt>(M->getOperand(0)) && - mdconst::extract<ConstantInt>(M->getOperand(0))->isNullValue() && + mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() && M->getOperand(1) && mdconst::hasa<ConstantInt>(M->getOperand(1)) && mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() == @@ -227,9 +227,9 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { SrcAlign = std::max(SrcAlign, CopyAlign); DstAlign = std::max(DstAlign, CopyAlign); - Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); - Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); - LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile()); + Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); + Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); + LoadInst *L = Builder.CreateLoad(Src, MI->isVolatile()); L->setAlignment(SrcAlign); if (CopyMD) L->setMetadata(LLVMContext::MD_tbaa, CopyMD); @@ -238,7 +238,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { if (LoopMemParallelMD) L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD); - StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile()); + StoreInst *S = Builder.CreateStore(L, Dest, MI->isVolatile()); S->setAlignment(DstAlign); if (CopyMD) S->setMetadata(LLVMContext::MD_tbaa, CopyMD); @@ -274,15 +274,15 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { Value *Dest = MI->getDest(); unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace(); Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp); - Dest = Builder->CreateBitCast(Dest, NewDstPtrTy); + Dest = Builder.CreateBitCast(Dest, NewDstPtrTy); // Alignment 0 is identity for alignment 1 for memset, but not store. if (Alignment == 0) Alignment = 1; // Extract the fill value and store. uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; - StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest, - MI->isVolatile()); + StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest, + MI->isVolatile()); S->setAlignment(Alignment); // Set the size of the copy to 0, it will be deleted on the next iteration. @@ -600,8 +600,7 @@ static Value *simplifyX86muldq(const IntrinsicInst &II, return Builder.CreateMul(LHS, RHS); } -static Value *simplifyX86pack(IntrinsicInst &II, InstCombiner &IC, - InstCombiner::BuilderTy &Builder, bool IsSigned) { +static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) { Value *Arg0 = II.getArgOperand(0); Value *Arg1 = II.getArgOperand(1); Type *ResTy = II.getType(); @@ -676,8 +675,7 @@ static Value *simplifyX86pack(IntrinsicInst &II, InstCombiner &IC, return ConstantVector::get(Vals); } -static Value *simplifyX86movmsk(const IntrinsicInst &II, - InstCombiner::BuilderTy &Builder) { +static Value *simplifyX86movmsk(const IntrinsicInst &II) { Value *Arg = II.getArgOperand(0); Type *ResTy = II.getType(); Type *ArgTy = Arg->getType(); @@ -860,7 +858,7 @@ static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0, } // Constant Fold - extraction from zero is always {zero, undef}. - if (CI0 && CI0->equalsInt(0)) + if (CI0 && CI0->isZero()) return LowConstantHighUndef(0); return nullptr; @@ -1404,7 +1402,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) { isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II, &IC.getDominatorTree())) { if (!match(II.getArgOperand(1), m_One())) { - II.setOperand(1, IC.Builder->getTrue()); + II.setOperand(1, IC.Builder.getTrue()); return &II; } } @@ -1477,7 +1475,7 @@ static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) { // the LLVM intrinsic definition for the pointer argument. unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace(); PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace); - Value *PtrCast = IC.Builder->CreateBitCast(Ptr, VecPtrTy, "castvec"); + Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec"); // Second, convert the x86 XMM integer vector mask to a vector of bools based // on each element's most significant bit (the sign bit). @@ -1485,7 +1483,7 @@ static Instruction *simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC) { // The pass-through vector for an x86 masked load is a zero vector. CallInst *NewMaskedLoad = - IC.Builder->CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec); + IC.Builder.CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec); return IC.replaceInstUsesWith(II, NewMaskedLoad); } @@ -1520,13 +1518,13 @@ static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC) { // the LLVM intrinsic definition for the pointer argument. unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace(); PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace); - Value *PtrCast = IC.Builder->CreateBitCast(Ptr, VecPtrTy, "castvec"); + Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec"); // Second, convert the x86 XMM integer vector mask to a vector of bools based // on each element's most significant bit (the sign bit). Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask); - IC.Builder->CreateMaskedStore(Vec, PtrCast, 1, BoolMask); + IC.Builder.CreateMaskedStore(Vec, PtrCast, 1, BoolMask); // 'Replace uses' doesn't work for stores. Erase the original masked store. IC.eraseInstFromFunction(II); @@ -1764,7 +1762,7 @@ static Instruction *SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC) { // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but // need special handling. // - // We seem to be mising intrinsics for rcp.approx.{ftz.}f32, which is just + // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just // as well. case Intrinsic::nvvm_rcp_rn_d: return {SPC_Reciprocal, FTZ_Any}; @@ -1965,16 +1963,17 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Value *IIOperand = II->getArgOperand(0); Value *X = nullptr; + // TODO should this be in InstSimplify? // bswap(bswap(x)) -> x if (match(IIOperand, m_BSwap(m_Value(X)))) - return replaceInstUsesWith(CI, X); + return replaceInstUsesWith(CI, X); // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) { unsigned C = X->getType()->getPrimitiveSizeInBits() - IIOperand->getType()->getPrimitiveSizeInBits(); Value *CV = ConstantInt::get(X->getType(), C); - Value *V = Builder->CreateLShr(X, CV); + Value *V = Builder.CreateLShr(X, CV); return new TruncInst(V, IIOperand->getType()); } break; @@ -1984,6 +1983,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Value *IIOperand = II->getArgOperand(0); Value *X = nullptr; + // TODO should this be in InstSimplify? // bitreverse(bitreverse(x)) -> x if (match(IIOperand, m_BitReverse(m_Value(X)))) return replaceInstUsesWith(CI, X); @@ -1991,7 +1991,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } case Intrinsic::masked_load: - if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, *Builder)) + if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder)) return replaceInstUsesWith(CI, SimplifiedMaskedOp); break; case Intrinsic::masked_store: @@ -2010,7 +2010,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Power->isOne()) return replaceInstUsesWith(CI, II->getArgOperand(0)); // powi(x, -1) -> 1/x - if (Power->isAllOnesValue()) + if (Power->isMinusOne()) return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), II->getArgOperand(0)); } @@ -2073,11 +2073,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::fmuladd: { // Canonicalize fast fmuladd to the separate fmul + fadd. if (II->hasUnsafeAlgebra()) { - BuilderTy::FastMathFlagGuard Guard(*Builder); - Builder->setFastMathFlags(II->getFastMathFlags()); - Value *Mul = Builder->CreateFMul(II->getArgOperand(0), - II->getArgOperand(1)); - Value *Add = Builder->CreateFAdd(Mul, II->getArgOperand(2)); + BuilderTy::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(II->getFastMathFlags()); + Value *Mul = Builder.CreateFMul(II->getArgOperand(0), + II->getArgOperand(1)); + Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2)); Add->takeName(II); return replaceInstUsesWith(*II, Add); } @@ -2128,8 +2128,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Constant *LHS, *RHS; if (match(II->getArgOperand(0), m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) { - CallInst *Call0 = Builder->CreateCall(II->getCalledFunction(), {LHS}); - CallInst *Call1 = Builder->CreateCall(II->getCalledFunction(), {RHS}); + CallInst *Call0 = Builder.CreateCall(II->getCalledFunction(), {LHS}); + CallInst *Call1 = Builder.CreateCall(II->getCalledFunction(), {RHS}); return SelectInst::Create(Cond, Call0, Call1); } @@ -2147,7 +2147,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // fabs (fpext x) -> fpext (fabs x) Value *F = Intrinsic::getDeclaration(II->getModule(), II->getIntrinsicID(), { ExtSrc->getType() }); - CallInst *NewFabs = Builder->CreateCall(F, ExtSrc); + CallInst *NewFabs = Builder.CreateCall(F, ExtSrc); NewFabs->copyFastMathFlags(II); NewFabs->takeName(II); return new FPExtInst(NewFabs, II->getType()); @@ -2174,7 +2174,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Turn PPC lvx -> load if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC, &DT) >= 16) { - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); return new LoadInst(Ptr); } @@ -2182,8 +2182,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_vsx_lxvw4x: case Intrinsic::ppc_vsx_lxvd2x: { // Turn PPC VSX loads into normal loads. - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), - PointerType::getUnqual(II->getType())); + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0), + PointerType::getUnqual(II->getType())); return new LoadInst(Ptr, Twine(""), false, 1); } case Intrinsic::ppc_altivec_stvx: @@ -2193,7 +2193,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { &DT) >= 16) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); return new StoreInst(II->getArgOperand(0), Ptr); } break; @@ -2201,18 +2201,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_vsx_stxvd2x: { // Turn PPC VSX stores into normal stores. Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); return new StoreInst(II->getArgOperand(0), Ptr, false, 1); } case Intrinsic::ppc_qpx_qvlfs: // Turn PPC QPX qvlfs -> load if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC, &DT) >= 16) { - Type *VTy = VectorType::get(Builder->getFloatTy(), + Type *VTy = VectorType::get(Builder.getFloatTy(), II->getType()->getVectorNumElements()); - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(VTy)); - Value *Load = Builder->CreateLoad(Ptr); + Value *Load = Builder.CreateLoad(Ptr); return new FPExtInst(Load, II->getType()); } break; @@ -2220,7 +2220,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Turn PPC QPX qvlfd -> load if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC, &DT) >= 32) { - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); return new LoadInst(Ptr); } @@ -2229,11 +2229,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Turn PPC QPX qvstfs -> store if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC, &DT) >= 16) { - Type *VTy = VectorType::get(Builder->getFloatTy(), + Type *VTy = VectorType::get(Builder.getFloatTy(), II->getArgOperand(0)->getType()->getVectorNumElements()); - Value *TOp = Builder->CreateFPTrunc(II->getArgOperand(0), VTy); + Value *TOp = Builder.CreateFPTrunc(II->getArgOperand(0), VTy); Type *OpPtrTy = PointerType::getUnqual(VTy); - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); return new StoreInst(TOp, Ptr); } break; @@ -2243,7 +2243,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { &DT) >= 32) { Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy); return new StoreInst(II->getArgOperand(0), Ptr); } break; @@ -2272,15 +2272,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { SmallVector<uint32_t, 8> SubVecMask; for (unsigned i = 0; i != RetWidth; ++i) SubVecMask.push_back((int)i); - VectorHalfAsShorts = Builder->CreateShuffleVector( + VectorHalfAsShorts = Builder.CreateShuffleVector( Arg, UndefValue::get(ArgType), SubVecMask); } auto VectorHalfType = VectorType::get(Type::getHalfTy(II->getContext()), RetWidth); auto VectorHalfs = - Builder->CreateBitCast(VectorHalfAsShorts, VectorHalfType); - auto VectorFloats = Builder->CreateFPExt(VectorHalfs, RetType); + Builder.CreateBitCast(VectorHalfAsShorts, VectorHalfType); + auto VectorFloats = Builder.CreateFPExt(VectorHalfs, RetType); return replaceInstUsesWith(*II, VectorFloats); } @@ -2334,7 +2334,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx_movmsk_pd_256: case Intrinsic::x86_avx_movmsk_ps_256: case Intrinsic::x86_avx2_pmovmskb: { - if (Value *V = simplifyX86movmsk(*II, *Builder)) + if (Value *V = simplifyX86movmsk(*II)) return replaceInstUsesWith(*II, V); break; } @@ -2437,25 +2437,25 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { default: llvm_unreachable("Case stmts out of sync!"); case Intrinsic::x86_avx512_mask_add_ps_512: case Intrinsic::x86_avx512_mask_add_pd_512: - V = Builder->CreateFAdd(Arg0, Arg1); + V = Builder.CreateFAdd(Arg0, Arg1); break; case Intrinsic::x86_avx512_mask_sub_ps_512: case Intrinsic::x86_avx512_mask_sub_pd_512: - V = Builder->CreateFSub(Arg0, Arg1); + V = Builder.CreateFSub(Arg0, Arg1); break; case Intrinsic::x86_avx512_mask_mul_ps_512: case Intrinsic::x86_avx512_mask_mul_pd_512: - V = Builder->CreateFMul(Arg0, Arg1); + V = Builder.CreateFMul(Arg0, Arg1); break; case Intrinsic::x86_avx512_mask_div_ps_512: case Intrinsic::x86_avx512_mask_div_pd_512: - V = Builder->CreateFDiv(Arg0, Arg1); + V = Builder.CreateFDiv(Arg0, Arg1); break; } // Create a select for the masking. V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2), - *Builder); + Builder); return replaceInstUsesWith(*II, V); } } @@ -2476,27 +2476,27 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Extract the element as scalars. Value *Arg0 = II->getArgOperand(0); Value *Arg1 = II->getArgOperand(1); - Value *LHS = Builder->CreateExtractElement(Arg0, (uint64_t)0); - Value *RHS = Builder->CreateExtractElement(Arg1, (uint64_t)0); + Value *LHS = Builder.CreateExtractElement(Arg0, (uint64_t)0); + Value *RHS = Builder.CreateExtractElement(Arg1, (uint64_t)0); Value *V; switch (II->getIntrinsicID()) { default: llvm_unreachable("Case stmts out of sync!"); case Intrinsic::x86_avx512_mask_add_ss_round: case Intrinsic::x86_avx512_mask_add_sd_round: - V = Builder->CreateFAdd(LHS, RHS); + V = Builder.CreateFAdd(LHS, RHS); break; case Intrinsic::x86_avx512_mask_sub_ss_round: case Intrinsic::x86_avx512_mask_sub_sd_round: - V = Builder->CreateFSub(LHS, RHS); + V = Builder.CreateFSub(LHS, RHS); break; case Intrinsic::x86_avx512_mask_mul_ss_round: case Intrinsic::x86_avx512_mask_mul_sd_round: - V = Builder->CreateFMul(LHS, RHS); + V = Builder.CreateFMul(LHS, RHS); break; case Intrinsic::x86_avx512_mask_div_ss_round: case Intrinsic::x86_avx512_mask_div_sd_round: - V = Builder->CreateFDiv(LHS, RHS); + V = Builder.CreateFDiv(LHS, RHS); break; } @@ -2506,18 +2506,18 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // We don't need a select if we know the mask bit is a 1. if (!C || !C->getValue()[0]) { // Cast the mask to an i1 vector and then extract the lowest element. - auto *MaskTy = VectorType::get(Builder->getInt1Ty(), + auto *MaskTy = VectorType::get(Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth()); - Mask = Builder->CreateBitCast(Mask, MaskTy); - Mask = Builder->CreateExtractElement(Mask, (uint64_t)0); + Mask = Builder.CreateBitCast(Mask, MaskTy); + Mask = Builder.CreateExtractElement(Mask, (uint64_t)0); // Extract the lowest element from the passthru operand. - Value *Passthru = Builder->CreateExtractElement(II->getArgOperand(2), + Value *Passthru = Builder.CreateExtractElement(II->getArgOperand(2), (uint64_t)0); - V = Builder->CreateSelect(Mask, V, Passthru); + V = Builder.CreateSelect(Mask, V, Passthru); } // Insert the result back into the original argument 0. - V = Builder->CreateInsertElement(Arg0, V, (uint64_t)0); + V = Builder.CreateInsertElement(Arg0, V, (uint64_t)0); return replaceInstUsesWith(*II, V); } @@ -2598,7 +2598,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx512_pslli_d_512: case Intrinsic::x86_avx512_pslli_q_512: case Intrinsic::x86_avx512_pslli_w_512: - if (Value *V = simplifyX86immShift(*II, *Builder)) + if (Value *V = simplifyX86immShift(*II, Builder)) return replaceInstUsesWith(*II, V); break; @@ -2629,7 +2629,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx512_psll_d_512: case Intrinsic::x86_avx512_psll_q_512: case Intrinsic::x86_avx512_psll_w_512: { - if (Value *V = simplifyX86immShift(*II, *Builder)) + if (Value *V = simplifyX86immShift(*II, Builder)) return replaceInstUsesWith(*II, V); // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector @@ -2673,7 +2673,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx512_psrlv_w_128: case Intrinsic::x86_avx512_psrlv_w_256: case Intrinsic::x86_avx512_psrlv_w_512: - if (Value *V = simplifyX86varShift(*II, *Builder)) + if (Value *V = simplifyX86varShift(*II, Builder)) return replaceInstUsesWith(*II, V); break; @@ -2683,7 +2683,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx2_pmulu_dq: case Intrinsic::x86_avx512_pmul_dq_512: case Intrinsic::x86_avx512_pmulu_dq_512: { - if (Value *V = simplifyX86muldq(*II, *Builder)) + if (Value *V = simplifyX86muldq(*II, Builder)) return replaceInstUsesWith(*II, V); unsigned VWidth = II->getType()->getVectorNumElements(); @@ -2703,7 +2703,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx2_packsswb: case Intrinsic::x86_avx512_packssdw_512: case Intrinsic::x86_avx512_packsswb_512: - if (Value *V = simplifyX86pack(*II, *this, *Builder, true)) + if (Value *V = simplifyX86pack(*II, true)) return replaceInstUsesWith(*II, V); break; @@ -2713,7 +2713,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx2_packuswb: case Intrinsic::x86_avx512_packusdw_512: case Intrinsic::x86_avx512_packuswb_512: - if (Value *V = simplifyX86pack(*II, *this, *Builder, false)) + if (Value *V = simplifyX86pack(*II, false)) return replaceInstUsesWith(*II, V); break; @@ -2756,7 +2756,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } case Intrinsic::x86_sse41_insertps: - if (Value *V = simplifyX86insertps(*II, *Builder)) + if (Value *V = simplifyX86insertps(*II, Builder)) return replaceInstUsesWith(*II, V); break; @@ -2779,7 +2779,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { : nullptr; // Attempt to simplify to a constant, shuffle vector or EXTRQI call. - if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder)) + if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder)) return replaceInstUsesWith(*II, V); // EXTRQ only uses the lowest 64-bits of the first 128-bit vector @@ -2811,7 +2811,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(2)); // Attempt to simplify to a constant or shuffle vector. - if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder)) + if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder)) return replaceInstUsesWith(*II, V); // EXTRQI only uses the lowest 64-bits of the first 128-bit vector @@ -2843,7 +2843,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { const APInt &V11 = CI11->getValue(); APInt Len = V11.zextOrTrunc(6); APInt Idx = V11.lshr(8).zextOrTrunc(6); - if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder)) + if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder)) return replaceInstUsesWith(*II, V); } @@ -2876,7 +2876,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (CILength && CIIndex) { APInt Len = CILength->getValue().zextOrTrunc(6); APInt Idx = CIIndex->getValue().zextOrTrunc(6); - if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder)) + if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder)) return replaceInstUsesWith(*II, V); } @@ -2930,7 +2930,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_ssse3_pshuf_b_128: case Intrinsic::x86_avx2_pshuf_b: case Intrinsic::x86_avx512_pshuf_b_512: - if (Value *V = simplifyX86pshufb(*II, *Builder)) + if (Value *V = simplifyX86pshufb(*II, Builder)) return replaceInstUsesWith(*II, V); break; @@ -2940,13 +2940,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx_vpermilvar_pd: case Intrinsic::x86_avx_vpermilvar_pd_256: case Intrinsic::x86_avx512_vpermilvar_pd_512: - if (Value *V = simplifyX86vpermilvar(*II, *Builder)) + if (Value *V = simplifyX86vpermilvar(*II, Builder)) return replaceInstUsesWith(*II, V); break; case Intrinsic::x86_avx2_permd: case Intrinsic::x86_avx2_permps: - if (Value *V = simplifyX86vpermv(*II, *Builder)) + if (Value *V = simplifyX86vpermv(*II, Builder)) return replaceInstUsesWith(*II, V); break; @@ -2964,10 +2964,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx512_mask_permvar_sf_512: case Intrinsic::x86_avx512_mask_permvar_si_256: case Intrinsic::x86_avx512_mask_permvar_si_512: - if (Value *V = simplifyX86vpermv(*II, *Builder)) { + if (Value *V = simplifyX86vpermv(*II, Builder)) { // We simplified the permuting, now create a select for the masking. V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2), - *Builder); + Builder); return replaceInstUsesWith(*II, V); } break; @@ -2976,7 +2976,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_avx_vperm2f128_ps_256: case Intrinsic::x86_avx_vperm2f128_si_256: case Intrinsic::x86_avx2_vperm2i128: - if (Value *V = simplifyX86vperm2(*II, *Builder)) + if (Value *V = simplifyX86vperm2(*II, Builder)) return replaceInstUsesWith(*II, V); break; @@ -3009,7 +3009,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_xop_vpcomd: case Intrinsic::x86_xop_vpcomq: case Intrinsic::x86_xop_vpcomw: - if (Value *V = simplifyX86vpcom(*II, *Builder, true)) + if (Value *V = simplifyX86vpcom(*II, Builder, true)) return replaceInstUsesWith(*II, V); break; @@ -3017,7 +3017,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_xop_vpcomud: case Intrinsic::x86_xop_vpcomuq: case Intrinsic::x86_xop_vpcomuw: - if (Value *V = simplifyX86vpcom(*II, *Builder, false)) + if (Value *V = simplifyX86vpcom(*II, Builder, false)) return replaceInstUsesWith(*II, V); break; @@ -3044,10 +3044,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (AllEltsOk) { // Cast the input vectors to byte vectors. - Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), - Mask->getType()); - Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), - Mask->getType()); + Value *Op0 = Builder.CreateBitCast(II->getArgOperand(0), + Mask->getType()); + Value *Op1 = Builder.CreateBitCast(II->getArgOperand(1), + Mask->getType()); Value *Result = UndefValue::get(Op0->getType()); // Only extract each element once. @@ -3067,13 +3067,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0; Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1; ExtractedElts[Idx] = - Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse, - Builder->getInt32(Idx&15)); + Builder.CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse, + Builder.getInt32(Idx&15)); } // Insert this value into the result vector. - Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], - Builder->getInt32(i)); + Result = Builder.CreateInsertElement(Result, ExtractedElts[Idx], + Builder.getInt32(i)); } return CastInst::Create(Instruction::BitCast, Result, CI.getType()); } @@ -3238,7 +3238,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Mask == (S_NAN | Q_NAN)) { // Equivalent of isnan. Replace with standard fcmp. - Value *FCmp = Builder->CreateFCmpUNO(Src0, Src0); + Value *FCmp = Builder.CreateFCmpUNO(Src0, Src0); FCmp->takeName(II); return replaceInstUsesWith(*II, FCmp); } @@ -3250,7 +3250,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Clamp mask to used bits if ((Mask & FullMask) != Mask) { - CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(), + CallInst *NewCall = Builder.CreateCall(II->getCalledFunction(), { Src0, ConstantInt::get(Src1->getType(), Mask & FullMask) } ); @@ -3343,13 +3343,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // TODO: Also emit sub if only width is constant. if (!CWidth && COffset && Offset == 0) { Constant *KSize = ConstantInt::get(COffset->getType(), IntSize); - Value *ShiftVal = Builder->CreateSub(KSize, II->getArgOperand(2)); - ShiftVal = Builder->CreateZExt(ShiftVal, II->getType()); + Value *ShiftVal = Builder.CreateSub(KSize, II->getArgOperand(2)); + ShiftVal = Builder.CreateZExt(ShiftVal, II->getType()); - Value *Shl = Builder->CreateShl(Src, ShiftVal); - Value *RightShift = Signed ? - Builder->CreateAShr(Shl, ShiftVal) : - Builder->CreateLShr(Shl, ShiftVal); + Value *Shl = Builder.CreateShl(Src, ShiftVal); + Value *RightShift = Signed ? Builder.CreateAShr(Shl, ShiftVal) + : Builder.CreateLShr(Shl, ShiftVal); RightShift->takeName(II); return replaceInstUsesWith(*II, RightShift); } @@ -3360,17 +3359,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // TODO: This allows folding to undef when the hardware has specific // behavior? if (Offset + Width < IntSize) { - Value *Shl = Builder->CreateShl(Src, IntSize - Offset - Width); - Value *RightShift = Signed ? - Builder->CreateAShr(Shl, IntSize - Width) : - Builder->CreateLShr(Shl, IntSize - Width); + Value *Shl = Builder.CreateShl(Src, IntSize - Offset - Width); + Value *RightShift = Signed ? Builder.CreateAShr(Shl, IntSize - Width) + : Builder.CreateLShr(Shl, IntSize - Width); RightShift->takeName(II); return replaceInstUsesWith(*II, RightShift); } - Value *RightShift = Signed ? - Builder->CreateAShr(Src, Offset) : - Builder->CreateLShr(Src, Offset); + Value *RightShift = Signed ? Builder.CreateAShr(Src, Offset) + : Builder.CreateLShr(Src, Offset); RightShift->takeName(II); return replaceInstUsesWith(*II, RightShift); @@ -3439,7 +3436,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } if (match(Src2, m_NaN()) || isa<UndefValue>(Src2)) { - CallInst *NewCall = Builder->CreateMinNum(Src0, Src1); + CallInst *NewCall = Builder.CreateMinNum(Src0, Src1); NewCall->copyFastMathFlags(II); NewCall->takeName(II); return replaceInstUsesWith(*II, NewCall); @@ -3451,7 +3448,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(), C2->getValueAPF()); return replaceInstUsesWith(*II, - ConstantFP::get(Builder->getContext(), Result)); + ConstantFP::get(Builder.getContext(), Result)); } } } @@ -3494,7 +3491,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")}; MDNode *MD = MDNode::get(II->getContext(), MDArgs); Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)}; - CallInst *NewCall = Builder->CreateCall(NewF, Args); + CallInst *NewCall = Builder.CreateCall(NewF, Args); NewCall->addAttribute(AttributeList::FunctionIndex, Attribute::Convergent); NewCall->takeName(II); @@ -3556,7 +3553,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { SrcLHS->getType()); Value *Args[] = { SrcLHS, SrcRHS, ConstantInt::get(CC->getType(), SrcPred) }; - CallInst *NewCall = Builder->CreateCall(NewF, Args); + CallInst *NewCall = Builder.CreateCall(NewF, Args); NewCall->takeName(II); return replaceInstUsesWith(*II, NewCall); } @@ -3633,16 +3630,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // the InstCombineIRInserter object. Value *AssumeIntrinsic = II->getCalledValue(), *A, *B; if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) { - Builder->CreateCall(AssumeIntrinsic, A, II->getName()); - Builder->CreateCall(AssumeIntrinsic, B, II->getName()); + Builder.CreateCall(AssumeIntrinsic, A, II->getName()); + Builder.CreateCall(AssumeIntrinsic, B, II->getName()); return eraseInstFromFunction(*II); } // assume(!(a || b)) -> assume(!a); assume(!b); if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) { - Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(A), - II->getName()); - Builder->CreateCall(AssumeIntrinsic, Builder->CreateNot(B), - II->getName()); + Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(A), II->getName()); + Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(B), II->getName()); return eraseInstFromFunction(*II); } @@ -3726,7 +3721,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return eraseInstFromFunction(*NextInst); // Otherwise canonicalize guard(a); guard(b) -> guard(a & b). - II->setArgOperand(0, Builder->CreateAnd(CurrCond, NextCond)); + II->setArgOperand(0, Builder.CreateAnd(CurrCond, NextCond)); return eraseInstFromFunction(*NextInst); } break; @@ -4163,7 +4158,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { Value *NewArg = *AI; if ((*AI)->getType() != ParamTy) - NewArg = Builder->CreateBitOrPointerCast(*AI, ParamTy); + NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy); Args.push_back(NewArg); // Add any parameter attributes. @@ -4189,7 +4184,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // Must promote to pass through va_arg area! Instruction::CastOps opcode = CastInst::getCastOpcode(*AI, false, PTy, false); - NewArg = Builder->CreateCast(opcode, *AI, PTy); + NewArg = Builder.CreateCast(opcode, *AI, PTy); } Args.push_back(NewArg); @@ -4215,10 +4210,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { CallSite NewCS; if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { - NewCS = Builder->CreateInvoke(Callee, II->getNormalDest(), - II->getUnwindDest(), Args, OpBundles); + NewCS = Builder.CreateInvoke(Callee, II->getNormalDest(), + II->getUnwindDest(), Args, OpBundles); } else { - NewCS = Builder->CreateCall(Callee, Args, OpBundles); + NewCS = Builder.CreateCall(Callee, Args, OpBundles); cast<CallInst>(NewCS.getInstruction()) ->setTailCallKind(cast<CallInst>(Caller)->getTailCallKind()); } @@ -4328,7 +4323,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // Add the chain argument and attributes. Value *NestVal = Tramp->getArgOperand(2); if (NestVal->getType() != NestTy) - NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest"); + NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest"); NewArgs.push_back(NestVal); NewArgAttrs.push_back(NestAttr); } diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index d3049389dfb9f..dfdfd3e9da840 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -84,7 +84,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI) { PointerType *PTy = cast<PointerType>(CI.getType()); - BuilderTy AllocaBuilder(*Builder); + BuilderTy AllocaBuilder(Builder); AllocaBuilder.SetInsertPoint(&AI); // Get the type really allocated and the type casted to. @@ -406,8 +406,7 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC, /// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32 /// ---> /// extractelement <4 x i32> %X, 1 -static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC, - const DataLayout &DL) { +static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC) { Value *TruncOp = Trunc.getOperand(0); Type *DestType = Trunc.getType(); if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType)) @@ -434,14 +433,14 @@ static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC, unsigned NumVecElts = VecWidth / DestWidth; if (VecType->getElementType() != DestType) { VecType = VectorType::get(DestType, NumVecElts); - VecInput = IC.Builder->CreateBitCast(VecInput, VecType, "bc"); + VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc"); } unsigned Elt = ShiftAmount / DestWidth; - if (DL.isBigEndian()) + if (IC.getDataLayout().isBigEndian()) Elt = NumVecElts - 1 - Elt; - return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); + return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt)); } /// Try to narrow the width of bitwise logic instructions with constants. @@ -460,7 +459,7 @@ Instruction *InstCombiner::shrinkBitwiseLogic(TruncInst &Trunc) { // trunc (logic X, C) --> logic (trunc X, C') Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy); - Value *NarrowOp0 = Builder->CreateTrunc(LogicOp->getOperand(0), DestTy); + Value *NarrowOp0 = Builder.CreateTrunc(LogicOp->getOperand(0), DestTy); return BinaryOperator::Create(LogicOp->getOpcode(), NarrowOp0, NarrowC); } @@ -554,7 +553,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector. if (DestTy->getScalarSizeInBits() == 1) { Constant *One = ConstantInt::get(SrcTy, 1); - Src = Builder->CreateAnd(Src, One); + Src = Builder.CreateAnd(Src, One); Value *Zero = Constant::getNullValue(Src->getType()); return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero); } @@ -580,7 +579,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // Since we're doing an lshr and a zero extend, and know that the shift // amount is smaller than ASize, it is always safe to do the shift in A's // type, then zero extend or truncate to the result. - Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue()); + Value *Shift = Builder.CreateLShr(A, Cst->getZExtValue()); Shift->takeName(Src); return CastInst::CreateIntegerCast(Shift, DestTy, false); } @@ -610,7 +609,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { return BinaryOperator::CreateAShr(A, ConstantInt::get(CI.getType(), std::min(ShiftAmt, ASize - 1))); if (SExt->hasOneUse()) { - Value *Shift = Builder->CreateAShr(A, std::min(ShiftAmt, ASize-1)); + Value *Shift = Builder.CreateAShr(A, std::min(ShiftAmt, ASize - 1)); Shift->takeName(Src); return CastInst::CreateIntegerCast(Shift, CI.getType(), true); } @@ -620,10 +619,10 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { if (Instruction *I = shrinkBitwiseLogic(CI)) return I; - if (Instruction *I = shrinkSplatShuffle(CI, *Builder)) + if (Instruction *I = shrinkSplatShuffle(CI, Builder)) return I; - if (Instruction *I = shrinkInsertElt(CI, *Builder)) + if (Instruction *I = shrinkInsertElt(CI, Builder)) return I; if (Src->hasOneUse() && isa<IntegerType>(SrcTy) && @@ -636,7 +635,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { // FoldShiftByConstant and is the extend in reg pattern. const unsigned DestSize = DestTy->getScalarSizeInBits(); if (Cst->getValue().ult(DestSize)) { - Value *NewTrunc = Builder->CreateTrunc(A, DestTy, A->getName() + ".tr"); + Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr"); return BinaryOperator::Create( Instruction::Shl, NewTrunc, @@ -645,7 +644,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { } } - if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL)) + if (Instruction *I = foldVecTruncToExtElt(CI, *this)) return I; return nullptr; @@ -668,13 +667,13 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, Value *In = ICI->getOperand(0); Value *Sh = ConstantInt::get(In->getType(), In->getType()->getScalarSizeInBits() - 1); - In = Builder->CreateLShr(In, Sh, In->getName() + ".lobit"); + In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit"); if (In->getType() != CI.getType()) - In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/); + In = Builder.CreateIntCast(In, CI.getType(), false /*ZExt*/); if (ICI->getPredicate() == ICmpInst::ICMP_SGT) { Constant *One = ConstantInt::get(In->getType(), 1); - In = Builder->CreateXor(In, One, In->getName() + ".not"); + In = Builder.CreateXor(In, One, In->getName() + ".not"); } return replaceInstUsesWith(CI, In); @@ -713,19 +712,19 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, if (ShAmt) { // Perform a logical shr by shiftamt. // Insert the shift to put the result in the low bit. - In = Builder->CreateLShr(In, ConstantInt::get(In->getType(), ShAmt), - In->getName() + ".lobit"); + In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt), + In->getName() + ".lobit"); } if (!Op1CV.isNullValue() == isNE) { // Toggle the low bit. Constant *One = ConstantInt::get(In->getType(), 1); - In = Builder->CreateXor(In, One); + In = Builder.CreateXor(In, One); } if (CI.getType() == In->getType()) return replaceInstUsesWith(CI, In); - Value *IntCast = Builder->CreateIntCast(In, CI.getType(), false); + Value *IntCast = Builder.CreateIntCast(In, CI.getType(), false); return replaceInstUsesWith(CI, IntCast); } } @@ -748,19 +747,19 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, if (UnknownBit.countPopulation() == 1) { if (!DoTransform) return ICI; - Value *Result = Builder->CreateXor(LHS, RHS); + Value *Result = Builder.CreateXor(LHS, RHS); // Mask off any bits that are set and won't be shifted away. if (KnownLHS.One.uge(UnknownBit)) - Result = Builder->CreateAnd(Result, + Result = Builder.CreateAnd(Result, ConstantInt::get(ITy, UnknownBit)); // Shift the bit we're testing down to the lsb. - Result = Builder->CreateLShr( + Result = Builder.CreateLShr( Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros())); if (ICI->getPredicate() == ICmpInst::ICMP_EQ) - Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1)); + Result = Builder.CreateXor(Result, ConstantInt::get(ITy, 1)); Result->takeName(ICI); return replaceInstUsesWith(CI, Result); } @@ -960,7 +959,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { if (SrcSize < DstSize) { APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize)); Constant *AndConst = ConstantInt::get(A->getType(), AndValue); - Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask"); + Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask"); return new ZExtInst(And, CI.getType()); } @@ -970,7 +969,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { AndValue)); } if (SrcSize > DstSize) { - Value *Trunc = Builder->CreateTrunc(A, CI.getType()); + Value *Trunc = Builder.CreateTrunc(A, CI.getType()); APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize)); return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Trunc->getType(), @@ -992,8 +991,8 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { (transformZExtICmp(LHS, CI, false) || transformZExtICmp(RHS, CI, false))) { // zext (or icmp, icmp) -> or (zext icmp), (zext icmp) - Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName()); - Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName()); + Value *LCast = Builder.CreateZExt(LHS, CI.getType(), LHS->getName()); + Value *RCast = Builder.CreateZExt(RHS, CI.getType(), RHS->getName()); BinaryOperator *Or = BinaryOperator::Create(Instruction::Or, LCast, RCast); // Perform the elimination. @@ -1020,7 +1019,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) && X->getType() == CI.getType()) { Constant *ZC = ConstantExpr::getZExt(C, CI.getType()); - return BinaryOperator::CreateXor(Builder->CreateAnd(X, ZC), ZC); + return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC); } return nullptr; @@ -1043,12 +1042,12 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { Value *Sh = ConstantInt::get(Op0->getType(), Op0->getType()->getScalarSizeInBits()-1); - Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit"); + Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit"); if (In->getType() != CI.getType()) - In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/); + In = Builder.CreateIntCast(In, CI.getType(), true /*SExt*/); if (Pred == ICmpInst::ICMP_SGT) - In = Builder->CreateNot(In, In->getName()+".not"); + In = Builder.CreateNot(In, In->getName() + ".not"); return replaceInstUsesWith(CI, In); } } @@ -1079,26 +1078,26 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { unsigned ShiftAmt = KnownZeroMask.countTrailingZeros(); // Perform a right shift to place the desired bit in the LSB. if (ShiftAmt) - In = Builder->CreateLShr(In, - ConstantInt::get(In->getType(), ShiftAmt)); + In = Builder.CreateLShr(In, + ConstantInt::get(In->getType(), ShiftAmt)); // At this point "In" is either 1 or 0. Subtract 1 to turn // {1, 0} -> {0, -1}. - In = Builder->CreateAdd(In, - ConstantInt::getAllOnesValue(In->getType()), - "sext"); + In = Builder.CreateAdd(In, + ConstantInt::getAllOnesValue(In->getType()), + "sext"); } else { // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1 unsigned ShiftAmt = KnownZeroMask.countLeadingZeros(); // Perform a left shift to place the desired bit in the MSB. if (ShiftAmt) - In = Builder->CreateShl(In, - ConstantInt::get(In->getType(), ShiftAmt)); + In = Builder.CreateShl(In, + ConstantInt::get(In->getType(), ShiftAmt)); // Distribute the bit over the whole bit width. - In = Builder->CreateAShr(In, ConstantInt::get(In->getType(), - KnownZeroMask.getBitWidth() - 1), "sext"); + In = Builder.CreateAShr(In, ConstantInt::get(In->getType(), + KnownZeroMask.getBitWidth() - 1), "sext"); } if (CI.getType() == In->getType()) @@ -1191,7 +1190,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // instead. KnownBits Known = computeKnownBits(Src, 0, &CI); if (Known.isNonNegative()) { - Value *ZExt = Builder->CreateZExt(Src, DestTy); + Value *ZExt = Builder.CreateZExt(Src, DestTy); return replaceInstUsesWith(CI, ZExt); } @@ -1217,7 +1216,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // We need to emit a shl + ashr to do the sign extend. Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize); - return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"), + return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"), ShAmt); } @@ -1229,7 +1228,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { unsigned SrcBitSize = SrcTy->getScalarSizeInBits(); unsigned DestBitSize = DestTy->getScalarSizeInBits(); Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize); - return BinaryOperator::CreateAShr(Builder->CreateShl(X, ShAmt), ShAmt); + return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt); } if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src)) @@ -1258,7 +1257,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { unsigned SrcDstSize = CI.getType()->getScalarSizeInBits(); unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize; Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt); - A = Builder->CreateShl(A, ShAmtV, CI.getName()); + A = Builder.CreateShl(A, ShAmtV, CI.getName()); return BinaryOperator::CreateAShr(A, ShAmtV); } @@ -1347,9 +1346,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // case of interest here is (float)((double)float + float)). if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) { if (LHSOrig->getType() != CI.getType()) - LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType()); + LHSOrig = Builder.CreateFPExt(LHSOrig, CI.getType()); if (RHSOrig->getType() != CI.getType()) - RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType()); + RHSOrig = Builder.CreateFPExt(RHSOrig, CI.getType()); Instruction *RI = BinaryOperator::Create(OpI->getOpcode(), LHSOrig, RHSOrig); RI->copyFastMathFlags(OpI); @@ -1364,9 +1363,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // in the destination format if it can represent both sources. if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) { if (LHSOrig->getType() != CI.getType()) - LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType()); + LHSOrig = Builder.CreateFPExt(LHSOrig, CI.getType()); if (RHSOrig->getType() != CI.getType()) - RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType()); + RHSOrig = Builder.CreateFPExt(RHSOrig, CI.getType()); Instruction *RI = BinaryOperator::CreateFMul(LHSOrig, RHSOrig); RI->copyFastMathFlags(OpI); @@ -1382,9 +1381,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // TODO: Tighten bound via rigorous analysis of the unbalanced case. if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) { if (LHSOrig->getType() != CI.getType()) - LHSOrig = Builder->CreateFPExt(LHSOrig, CI.getType()); + LHSOrig = Builder.CreateFPExt(LHSOrig, CI.getType()); if (RHSOrig->getType() != CI.getType()) - RHSOrig = Builder->CreateFPExt(RHSOrig, CI.getType()); + RHSOrig = Builder.CreateFPExt(RHSOrig, CI.getType()); Instruction *RI = BinaryOperator::CreateFDiv(LHSOrig, RHSOrig); RI->copyFastMathFlags(OpI); @@ -1399,11 +1398,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { if (SrcWidth == OpWidth) break; if (LHSWidth < SrcWidth) - LHSOrig = Builder->CreateFPExt(LHSOrig, RHSOrig->getType()); + LHSOrig = Builder.CreateFPExt(LHSOrig, RHSOrig->getType()); else if (RHSWidth <= SrcWidth) - RHSOrig = Builder->CreateFPExt(RHSOrig, LHSOrig->getType()); + RHSOrig = Builder.CreateFPExt(RHSOrig, LHSOrig->getType()); if (LHSOrig != OpI->getOperand(0) || RHSOrig != OpI->getOperand(1)) { - Value *ExactResult = Builder->CreateFRem(LHSOrig, RHSOrig); + Value *ExactResult = Builder.CreateFRem(LHSOrig, RHSOrig); if (Instruction *RI = dyn_cast<Instruction>(ExactResult)) RI->copyFastMathFlags(OpI); return CastInst::CreateFPCast(ExactResult, CI.getType()); @@ -1412,8 +1411,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // (fptrunc (fneg x)) -> (fneg (fptrunc x)) if (BinaryOperator::isFNeg(OpI)) { - Value *InnerTrunc = Builder->CreateFPTrunc(OpI->getOperand(1), - CI.getType()); + Value *InnerTrunc = Builder.CreateFPTrunc(OpI->getOperand(1), + CI.getType()); Instruction *RI = BinaryOperator::CreateFNeg(InnerTrunc); RI->copyFastMathFlags(OpI); return RI; @@ -1432,10 +1431,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { (isa<ConstantFP>(SI->getOperand(1)) || isa<ConstantFP>(SI->getOperand(2))) && matchSelectPattern(SI, LHS, RHS).Flavor == SPF_UNKNOWN) { - Value *LHSTrunc = Builder->CreateFPTrunc(SI->getOperand(1), - CI.getType()); - Value *RHSTrunc = Builder->CreateFPTrunc(SI->getOperand(2), - CI.getType()); + Value *LHSTrunc = Builder.CreateFPTrunc(SI->getOperand(1), CI.getType()); + Value *RHSTrunc = Builder.CreateFPTrunc(SI->getOperand(2), CI.getType()); return SelectInst::Create(SI->getOperand(0), LHSTrunc, RHSTrunc); } @@ -1465,7 +1462,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { // Do unary FP operation on smaller type. // (fptrunc (fabs x)) -> (fabs (fptrunc x)) - Value *InnerTrunc = Builder->CreateFPTrunc(Src, CI.getType()); + Value *InnerTrunc = Builder.CreateFPTrunc(Src, CI.getType()); Type *IntrinsicType[] = { CI.getType() }; Function *Overload = Intrinsic::getDeclaration( CI.getModule(), II->getIntrinsicID(), IntrinsicType); @@ -1482,7 +1479,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { } } - if (Instruction *I = shrinkInsertElt(CI, *Builder)) + if (Instruction *I = shrinkInsertElt(CI, Builder)) return I; return nullptr; @@ -1577,7 +1574,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { if (CI.getType()->isVectorTy()) // Handle vectors of pointers. Ty = VectorType::get(Ty, CI.getType()->getVectorNumElements()); - Value *P = Builder->CreateZExtOrTrunc(CI.getOperand(0), Ty); + Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty); return new IntToPtrInst(P, CI.getType()); } @@ -1627,7 +1624,7 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) { if (Ty->isVectorTy()) // Handle vectors of pointers. PtrTy = VectorType::get(PtrTy, Ty->getVectorNumElements()); - Value *P = Builder->CreatePtrToInt(CI.getOperand(0), PtrTy); + Value *P = Builder.CreatePtrToInt(CI.getOperand(0), PtrTy); return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false); } @@ -1653,7 +1650,7 @@ static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy, return nullptr; SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements()); - InVal = IC.Builder->CreateBitCast(InVal, SrcTy); + InVal = IC.Builder.CreateBitCast(InVal, SrcTy); } // Now that the element types match, get the shuffle mask and RHS of the @@ -1833,8 +1830,8 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI, for (unsigned i = 0, e = Elements.size(); i != e; ++i) { if (!Elements[i]) continue; // Unset element. - Result = IC.Builder->CreateInsertElement(Result, Elements[i], - IC.Builder->getInt32(i)); + Result = IC.Builder.CreateInsertElement(Result, Elements[i], + IC.Builder.getInt32(i)); } return Result; @@ -1845,8 +1842,7 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI, /// vectors better than bitcasts of scalars because vector registers are /// usually not type-specific like scalar integer or scalar floating-point. static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast, - InstCombiner &IC, - const DataLayout &DL) { + InstCombiner &IC) { // TODO: Create and use a pattern matcher for ExtractElementInst. auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0)); if (!ExtElt || !ExtElt->hasOneUse()) @@ -1860,8 +1856,8 @@ static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast, unsigned NumElts = ExtElt->getVectorOperandType()->getNumElements(); auto *NewVecType = VectorType::get(DestType, NumElts); - auto *NewBC = IC.Builder->CreateBitCast(ExtElt->getVectorOperand(), - NewVecType, "bc"); + auto *NewBC = IC.Builder.CreateBitCast(ExtElt->getVectorOperand(), + NewVecType, "bc"); return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand()); } @@ -1870,7 +1866,7 @@ static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder) { Type *DestTy = BitCast.getType(); BinaryOperator *BO; - if (!DestTy->getScalarType()->isIntegerTy() || + if (!DestTy->isIntOrIntVectorTy() || !match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) || !BO->isBitwiseLogicOp()) return nullptr; @@ -2033,8 +2029,8 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) { // For each old PHI node, create a corresponding new PHI node with a type A. SmallDenseMap<PHINode *, PHINode *> NewPNodes; for (auto *OldPN : OldPhiNodes) { - Builder->SetInsertPoint(OldPN); - PHINode *NewPN = Builder->CreatePHI(DestTy, OldPN->getNumOperands()); + Builder.SetInsertPoint(OldPN); + PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands()); NewPNodes[OldPN] = NewPN; } @@ -2047,8 +2043,8 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) { if (auto *C = dyn_cast<Constant>(V)) { NewV = ConstantExpr::getBitCast(C, DestTy); } else if (auto *LI = dyn_cast<LoadInst>(V)) { - Builder->SetInsertPoint(LI->getNextNode()); - NewV = Builder->CreateBitCast(LI, DestTy); + Builder.SetInsertPoint(LI->getNextNode()); + NewV = Builder.CreateBitCast(LI, DestTy); Worklist.Add(LI); } else if (auto *BCI = dyn_cast<BitCastInst>(V)) { NewV = BCI->getOperand(0); @@ -2064,9 +2060,9 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) { for (User *U : PN->users()) { auto *SI = dyn_cast<StoreInst>(U); if (SI && SI->isSimple() && SI->getOperand(0) == PN) { - Builder->SetInsertPoint(SI); + Builder.SetInsertPoint(SI); auto *NewBC = - cast<BitCastInst>(Builder->CreateBitCast(NewPNodes[PN], SrcTy)); + cast<BitCastInst>(Builder.CreateBitCast(NewPNodes[PN], SrcTy)); SI->setOperand(0, NewBC); Worklist.Add(SI); assert(hasStoreUsersOnly(*NewBC)); @@ -2121,14 +2117,14 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If we found a path from the src to dest, create the getelementptr now. if (SrcElTy == DstElTy) { - SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder->getInt32(0)); + SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder.getInt32(0)); return GetElementPtrInst::CreateInBounds(Src, Idxs); } } if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) { if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) { - Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType()); + Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType()); return InsertElementInst::Create(UndefValue::get(DestTy), Elem, Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast) @@ -2161,7 +2157,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // scalar-scalar cast. if (!DestTy->isVectorTy()) { Value *Elem = - Builder->CreateExtractElement(Src, + Builder.CreateExtractElement(Src, Constant::getNullValue(Type::getInt32Ty(CI.getContext()))); return CastInst::Create(Instruction::BitCast, Elem, DestTy); } @@ -2190,8 +2186,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { Tmp->getOperand(0)->getType() == DestTy) || ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) && Tmp->getOperand(0)->getType() == DestTy)) { - Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy); - Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy); + Value *LHS = Builder.CreateBitCast(SVI->getOperand(0), DestTy); + Value *RHS = Builder.CreateBitCast(SVI->getOperand(1), DestTy); // Return a new shuffle vector. Use the same element ID's, as we // know the vector types match #elts. return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2)); @@ -2204,13 +2200,13 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { if (Instruction *I = optimizeBitCastFromPhi(CI, PN)) return I; - if (Instruction *I = canonicalizeBitCastExtElt(CI, *this, DL)) + if (Instruction *I = canonicalizeBitCastExtElt(CI, *this)) return I; - if (Instruction *I = foldBitCastBitwiseLogic(CI, *Builder)) + if (Instruction *I = foldBitCastBitwiseLogic(CI, Builder)) return I; - if (Instruction *I = foldBitCastSelect(CI, *Builder)) + if (Instruction *I = foldBitCastSelect(CI, Builder)) return I; if (SrcTy->isPointerTy()) @@ -2234,7 +2230,7 @@ Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) { MidTy = VectorType::get(MidTy, VT->getNumElements()); } - Value *NewBitCast = Builder->CreateBitCast(Src, MidTy); + Value *NewBitCast = Builder.CreateBitCast(Src, MidTy); return new AddrSpaceCastInst(NewBitCast, CI.getType()); } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 58b8b2f526299..60d1cde971dd4 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -392,7 +392,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, Type *IntPtrTy = DL.getIntPtrType(GEP->getType()); unsigned PtrSize = IntPtrTy->getIntegerBitWidth(); if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize) - Idx = Builder->CreateTrunc(Idx, IntPtrTy); + Idx = Builder.CreateTrunc(Idx, IntPtrTy); } // If the comparison is only true for one or two elements, emit direct @@ -400,7 +400,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, if (SecondTrueElement != Overdefined) { // None true -> false. if (FirstTrueElement == Undefined) - return replaceInstUsesWith(ICI, Builder->getFalse()); + return replaceInstUsesWith(ICI, Builder.getFalse()); Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement); @@ -409,9 +409,9 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx); // True for two elements -> 'i == 47 | i == 72'. - Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx); + Value *C1 = Builder.CreateICmpEQ(Idx, FirstTrueIdx); Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement); - Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx); + Value *C2 = Builder.CreateICmpEQ(Idx, SecondTrueIdx); return BinaryOperator::CreateOr(C1, C2); } @@ -420,7 +420,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, if (SecondFalseElement != Overdefined) { // None false -> true. if (FirstFalseElement == Undefined) - return replaceInstUsesWith(ICI, Builder->getTrue()); + return replaceInstUsesWith(ICI, Builder.getTrue()); Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement); @@ -429,9 +429,9 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx); // False for two elements -> 'i != 47 & i != 72'. - Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx); + Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx); Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement); - Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx); + Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx); return BinaryOperator::CreateAnd(C1, C2); } @@ -443,7 +443,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1). if (FirstTrueElement) { Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement); - Idx = Builder->CreateAdd(Idx, Offs); + Idx = Builder.CreateAdd(Idx, Offs); } Value *End = ConstantInt::get(Idx->getType(), @@ -457,7 +457,7 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse). if (FirstFalseElement) { Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement); - Idx = Builder->CreateAdd(Idx, Offs); + Idx = Builder.CreateAdd(Idx, Offs); } Value *End = ConstantInt::get(Idx->getType(), @@ -481,9 +481,9 @@ Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount); if (Ty) { - Value *V = Builder->CreateIntCast(Idx, Ty, false); - V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); - V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); + Value *V = Builder.CreateIntCast(Idx, Ty, false); + V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); + V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V); return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0)); } } @@ -566,7 +566,7 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC, // we don't need to bother extending: the extension won't affect where the // computation crosses zero. if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) { - VariableIdx = IC.Builder->CreateTrunc(VariableIdx, IntPtrTy); + VariableIdx = IC.Builder.CreateTrunc(VariableIdx, IntPtrTy); } return VariableIdx; } @@ -588,10 +588,10 @@ static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC, // Okay, we can do this evaluation. Start by converting the index to intptr. if (VariableIdx->getType() != IntPtrTy) - VariableIdx = IC.Builder->CreateIntCast(VariableIdx, IntPtrTy, + VariableIdx = IC.Builder.CreateIntCast(VariableIdx, IntPtrTy, true /*Signed*/); Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs); - return IC.Builder->CreateAdd(VariableIdx, OffsetVal, "offset"); + return IC.Builder.CreateAdd(VariableIdx, OffsetVal, "offset"); } /// Returns true if we can rewrite Start as a GEP with pointer Base @@ -981,13 +981,13 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, if (LHSIndexTy != RHSIndexTy) { if (LHSIndexTy->getPrimitiveSizeInBits() < RHSIndexTy->getPrimitiveSizeInBits()) { - ROffset = Builder->CreateTrunc(ROffset, LHSIndexTy); + ROffset = Builder.CreateTrunc(ROffset, LHSIndexTy); } else - LOffset = Builder->CreateTrunc(LOffset, RHSIndexTy); + LOffset = Builder.CreateTrunc(LOffset, RHSIndexTy); } - Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond), - LOffset, ROffset); + Value *Cmp = Builder.CreateICmp(ICmpInst::getSignedPredicate(Cond), + LOffset, ROffset); return replaceInstUsesWith(I, Cmp); } @@ -1026,7 +1026,7 @@ Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, if (NumDifferences == 0) // SAME GEP? return replaceInstUsesWith(I, // No comparison is needed here. - Builder->getInt1(ICmpInst::isTrueWhenEqual(Cond))); + Builder.getInt1(ICmpInst::isTrueWhenEqual(Cond))); else if (NumDifferences == 1 && GEPsInBounds) { Value *LHSV = GEPLHS->getOperand(DiffOperand); @@ -1174,7 +1174,7 @@ Instruction *InstCombiner::foldICmpAddOpConst(Instruction &ICI, // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128 assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE); - Constant *C = Builder->getInt(CI->getValue()-1); + Constant *C = Builder.getInt(CI->getValue() - 1); return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C)); } @@ -1347,17 +1347,17 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, Value *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::sadd_with_overflow, NewType); - InstCombiner::BuilderTy *Builder = IC.Builder; + InstCombiner::BuilderTy &Builder = IC.Builder; // Put the new code above the original add, in case there are any uses of the // add between the add and the compare. - Builder->SetInsertPoint(OrigAdd); + Builder.SetInsertPoint(OrigAdd); - Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName() + ".trunc"); - Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName() + ".trunc"); - CallInst *Call = Builder->CreateCall(F, {TruncA, TruncB}, "sadd"); - Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result"); - Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType()); + Value *TruncA = Builder.CreateTrunc(A, NewType, A->getName() + ".trunc"); + Value *TruncB = Builder.CreateTrunc(B, NewType, B->getName() + ".trunc"); + CallInst *Call = Builder.CreateCall(F, {TruncA, TruncB}, "sadd"); + Value *Add = Builder.CreateExtractValue(Call, 0, "sadd.result"); + Value *ZExt = Builder.CreateZExt(Add, OrigAdd->getType()); // The inner add was the result of the narrow add, zero extended to the // wider type. Replace it with the result computed by the intrinsic. @@ -1434,9 +1434,9 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) { ConstantRange Intersection = DominatingCR.intersectWith(CR); ConstantRange Difference = DominatingCR.difference(CR); if (Intersection.isEmptySet()) - return replaceInstUsesWith(Cmp, Builder->getFalse()); + return replaceInstUsesWith(Cmp, Builder.getFalse()); if (Difference.isEmptySet()) - return replaceInstUsesWith(Cmp, Builder->getTrue()); + return replaceInstUsesWith(Cmp, Builder.getTrue()); // If this is a normal comparison, it demands all bits. If it is a sign // bit comparison, it only demands the sign bit. @@ -1452,9 +1452,9 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) { return nullptr; if (auto *AI = Intersection.getSingleElement()) - return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder->getInt(*AI)); + return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*AI)); if (auto *AD = Difference.getSingleElement()) - return new ICmpInst(ICmpInst::ICMP_NE, X, Builder->getInt(*AD)); + return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*AD)); } return nullptr; @@ -1628,11 +1628,11 @@ Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And, !Shift->isArithmeticShift() && !isa<Constant>(Shift->getOperand(0))) { // Compute C2 << Y. Value *NewShift = - IsShl ? Builder->CreateLShr(And->getOperand(1), Shift->getOperand(1)) - : Builder->CreateShl(And->getOperand(1), Shift->getOperand(1)); + IsShl ? Builder.CreateLShr(And->getOperand(1), Shift->getOperand(1)) + : Builder.CreateShl(And->getOperand(1), Shift->getOperand(1)); // Compute X & (C2 << Y). - Value *NewAnd = Builder->CreateAnd(Shift->getOperand(0), NewShift); + Value *NewAnd = Builder.CreateAnd(Shift->getOperand(0), NewShift); Cmp.setOperand(0, NewAnd); return &Cmp; } @@ -1670,7 +1670,7 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp, unsigned WideScalarBits = WideType->getScalarSizeInBits(); Constant *ZextC1 = ConstantInt::get(WideType, C1->zext(WideScalarBits)); Constant *ZextC2 = ConstantInt::get(WideType, C2->zext(WideScalarBits)); - Value *NewAnd = Builder->CreateAnd(W, ZextC2, And->getName()); + Value *NewAnd = Builder.CreateAnd(W, ZextC2, And->getName()); return new ICmpInst(Cmp.getPredicate(), NewAnd, ZextC1); } } @@ -1704,12 +1704,12 @@ Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp, NewOr = ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One); } else { if (UsesRemoved >= 3) - NewOr = Builder->CreateOr(Builder->CreateShl(One, B, LShr->getName(), - /*HasNUW=*/true), - One, Or->getName()); + NewOr = Builder.CreateOr(Builder.CreateShl(One, B, LShr->getName(), + /*HasNUW=*/true), + One, Or->getName()); } if (NewOr) { - Value *NewAnd = Builder->CreateAnd(A, NewOr, And->getName()); + Value *NewAnd = Builder.CreateAnd(A, NewOr, And->getName()); Cmp.setOperand(0, NewAnd); return &Cmp; } @@ -1772,7 +1772,7 @@ Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp, Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1); if (And->getType()->isVectorTy()) NTy = VectorType::get(NTy, And->getType()->getVectorNumElements()); - Value *Trunc = Builder->CreateTrunc(X, NTy); + Value *Trunc = Builder.CreateTrunc(X, NTy); auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE : CmpInst::ICMP_SLT; return new ICmpInst(NewPred, Trunc, Constant::getNullValue(NTy)); @@ -1811,9 +1811,9 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or, // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0 // -> and (icmp eq P, null), (icmp eq Q, null). Value *CmpP = - Builder->CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType())); + Builder.CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType())); Value *CmpQ = - Builder->CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType())); + Builder.CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType())); auto LogicOpc = Pred == ICmpInst::Predicate::ICMP_EQ ? Instruction::And : Instruction::Or; return BinaryOperator::Create(LogicOpc, CmpP, CmpQ); @@ -1993,7 +1993,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, Constant *Mask = ConstantInt::get( ShType, APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue())); - Value *And = Builder->CreateAnd(X, Mask, Shl->getName() + ".mask"); + Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask"); Constant *LShrC = ConstantInt::get(ShType, C->lshr(*ShiftAmt)); return new ICmpInst(Pred, And, LShrC); } @@ -2005,7 +2005,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, Constant *Mask = ConstantInt::get( ShType, APInt::getOneBitSet(TypeBits, TypeBits - ShiftAmt->getZExtValue() - 1)); - Value *And = Builder->CreateAnd(X, Mask, Shl->getName() + ".mask"); + Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask"); return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, And, Constant::getNullValue(ShType)); } @@ -2024,7 +2024,7 @@ Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp, TruncTy = VectorType::get(TruncTy, ShType->getVectorNumElements()); Constant *NewC = ConstantInt::get(TruncTy, C->ashr(*ShiftAmt).trunc(TypeBits - Amt)); - return new ICmpInst(Pred, Builder->CreateTrunc(X, TruncTy), NewC); + return new ICmpInst(Pred, Builder.CreateTrunc(X, TruncTy), NewC); } return nullptr; @@ -2076,8 +2076,8 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, Constant *DivCst = ConstantInt::get( Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal)); - Value *Tmp = IsAShr ? Builder->CreateSDiv(X, DivCst, "", Shr->isExact()) - : Builder->CreateUDiv(X, DivCst, "", Shr->isExact()); + Value *Tmp = IsAShr ? Builder.CreateSDiv(X, DivCst, "", Shr->isExact()) + : Builder.CreateUDiv(X, DivCst, "", Shr->isExact()); Cmp.setOperand(0, Tmp); @@ -2115,7 +2115,7 @@ Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp, // Otherwise strength reduce the shift into an 'and'. APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal)); Constant *Mask = ConstantInt::get(Shr->getType(), Val); - Value *And = Builder->CreateAnd(X, Mask, Shr->getName() + ".mask"); + Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask"); return new ICmpInst(Pred, And, ShiftedCmpRHS); } @@ -2279,7 +2279,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, default: llvm_unreachable("Unhandled icmp opcode!"); case ICmpInst::ICMP_EQ: if (LoOverflow && HiOverflow) - return replaceInstUsesWith(Cmp, Builder->getFalse()); + return replaceInstUsesWith(Cmp, Builder.getFalse()); if (HiOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE, X, LoBound); @@ -2291,7 +2291,7 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, HiBound->getUniqueInteger(), DivIsSigned, true)); case ICmpInst::ICMP_NE: if (LoOverflow && HiOverflow) - return replaceInstUsesWith(Cmp, Builder->getTrue()); + return replaceInstUsesWith(Cmp, Builder.getTrue()); if (HiOverflow) return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, X, LoBound); @@ -2305,16 +2305,16 @@ Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp, case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_SLT: if (LoOverflow == +1) // Low bound is greater than input range. - return replaceInstUsesWith(Cmp, Builder->getTrue()); + return replaceInstUsesWith(Cmp, Builder.getTrue()); if (LoOverflow == -1) // Low bound is less than input range. - return replaceInstUsesWith(Cmp, Builder->getFalse()); + return replaceInstUsesWith(Cmp, Builder.getFalse()); return new ICmpInst(Pred, X, LoBound); case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_SGT: if (HiOverflow == +1) // High bound greater than input range. - return replaceInstUsesWith(Cmp, Builder->getFalse()); + return replaceInstUsesWith(Cmp, Builder.getFalse()); if (HiOverflow == -1) // High bound less than input range. - return replaceInstUsesWith(Cmp, Builder->getTrue()); + return replaceInstUsesWith(Cmp, Builder.getTrue()); if (Pred == ICmpInst::ICMP_UGT) return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound); return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound); @@ -2361,12 +2361,12 @@ Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp, // iff (C2 & (C - 1)) == C - 1 and C is a power of 2 if (Pred == ICmpInst::ICMP_ULT && C->isPowerOf2() && (*C2 & (*C - 1)) == (*C - 1)) - return new ICmpInst(ICmpInst::ICMP_EQ, Builder->CreateOr(Y, *C - 1), X); + return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateOr(Y, *C - 1), X); // C2 - Y >u C -> (Y | C) != C2 // iff C2 & C == C and C + 1 is a power of 2 if (Pred == ICmpInst::ICMP_UGT && (*C + 1).isPowerOf2() && (*C2 & *C) == *C) - return new ICmpInst(ICmpInst::ICMP_NE, Builder->CreateOr(Y, *C), X); + return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateOr(Y, *C), X); return nullptr; } @@ -2422,14 +2422,14 @@ Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp, // iff C & (C2-1) == 0 // C2 is a power of 2 if (Pred == ICmpInst::ICMP_ULT && C->isPowerOf2() && (*C2 & (*C - 1)) == 0) - return new ICmpInst(ICmpInst::ICMP_EQ, Builder->CreateAnd(X, -(*C)), + return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateAnd(X, -(*C)), ConstantExpr::getNeg(cast<Constant>(Y))); // X+C >u C2 -> (X & ~C2) != C // iff C & C2 == 0 // C2+1 is a power of 2 if (Pred == ICmpInst::ICMP_UGT && (*C + 1).isPowerOf2() && (*C2 & *C) == 0) - return new ICmpInst(ICmpInst::ICMP_NE, Builder->CreateAnd(X, ~(*C)), + return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateAnd(X, ~(*C)), ConstantExpr::getNeg(cast<Constant>(Y))); return nullptr; @@ -2493,13 +2493,13 @@ Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp, // When none of the three constants satisfy the predicate for the RHS (C), // the entire original Cmp can be simplified to a false. - Value *Cond = Builder->getFalse(); + Value *Cond = Builder.getFalse(); if (TrueWhenLessThan) - Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_SLT, OrigLHS, OrigRHS)); + Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT, OrigLHS, OrigRHS)); if (TrueWhenEqual) - Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_EQ, OrigLHS, OrigRHS)); + Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ, OrigLHS, OrigRHS)); if (TrueWhenGreaterThan) - Cond = Builder->CreateOr(Cond, Builder->CreateICmp(ICmpInst::ICMP_SGT, OrigLHS, OrigRHS)); + Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT, OrigLHS, OrigRHS)); return replaceInstUsesWith(Cmp, Cond); } @@ -2615,7 +2615,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, if (C->isNullValue() && BO->hasOneUse()) { const APInt *BOC; if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) { - Value *NewRem = Builder->CreateURem(BOp0, BOp1, BO->getName()); + Value *NewRem = Builder.CreateURem(BOp0, BOp1, BO->getName()); return new ICmpInst(Pred, NewRem, Constant::getNullValue(BO->getType())); } @@ -2637,7 +2637,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, if (Value *NegVal = dyn_castNegVal(BOp0)) return new ICmpInst(Pred, NegVal, BOp1); if (BO->hasOneUse()) { - Value *Neg = Builder->CreateNeg(BOp1); + Value *Neg = Builder.CreateNeg(BOp1); Neg->takeName(BO); return new ICmpInst(Pred, BOp0, Neg); } @@ -2676,7 +2676,7 @@ Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp, // Replace (X | C) == -1 with (X & ~C) == ~C. // This removes the -1 constant. Constant *NotBOC = ConstantExpr::getNot(cast<Constant>(BOp1)); - Value *And = Builder->CreateAnd(BOp0, NotBOC); + Value *And = Builder.CreateAnd(BOp0, NotBOC); return new ICmpInst(Pred, And, NotBOC); } break; @@ -2740,23 +2740,26 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, if (!II || !Cmp.isEquality()) return nullptr; - // Handle icmp {eq|ne} <intrinsic>, intcst. + // Handle icmp {eq|ne} <intrinsic>, Constant. + Type *Ty = II->getType(); switch (II->getIntrinsicID()) { case Intrinsic::bswap: Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); - Cmp.setOperand(1, Builder->getInt(C->byteSwap())); + Cmp.setOperand(1, ConstantInt::get(Ty, C->byteSwap())); return &Cmp; + case Intrinsic::ctlz: case Intrinsic::cttz: // ctz(A) == bitwidth(A) -> A == 0 and likewise for != if (*C == C->getBitWidth()) { Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); - Cmp.setOperand(1, ConstantInt::getNullValue(II->getType())); + Cmp.setOperand(1, ConstantInt::getNullValue(Ty)); return &Cmp; } break; + case Intrinsic::ctpop: { // popcount(A) == 0 -> A == 0 and likewise for != // popcount(A) == bitwidth(A) -> A == -1 and likewise for != @@ -2764,8 +2767,8 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, if (IsZero || *C == C->getBitWidth()) { Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); - auto *NewOp = IsZero ? Constant::getNullValue(II->getType()) - : Constant::getAllOnesValue(II->getType()); + auto *NewOp = + IsZero ? Constant::getNullValue(Ty) : Constant::getAllOnesValue(Ty); Cmp.setOperand(1, NewOp); return &Cmp; } @@ -2774,6 +2777,7 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, default: break; } + return nullptr; } @@ -2841,11 +2845,11 @@ Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) { } if (Transform) { if (!Op1) - Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC, - I.getName()); + Op1 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC, + I.getName()); if (!Op2) - Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), RHSC, - I.getName()); + Op2 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(2), RHSC, + I.getName()); return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); } break; @@ -3029,12 +3033,12 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { APInt AP1Abs = C1->getValue().abs(); APInt AP2Abs = C2->getValue().abs(); if (AP1Abs.uge(AP2Abs)) { - ConstantInt *C3 = Builder->getInt(AP1 - AP2); - Value *NewAdd = Builder->CreateNSWAdd(A, C3); + ConstantInt *C3 = Builder.getInt(AP1 - AP2); + Value *NewAdd = Builder.CreateNSWAdd(A, C3); return new ICmpInst(Pred, NewAdd, C); } else { - ConstantInt *C3 = Builder->getInt(AP2 - AP1); - Value *NewAdd = Builder->CreateNSWAdd(C, C3); + ConstantInt *C3 = Builder.getInt(AP2 - AP1); + Value *NewAdd = Builder.CreateNSWAdd(C, C3); return new ICmpInst(Pred, A, NewAdd); } } @@ -3157,8 +3161,8 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { Constant *Mask = ConstantInt::get( BO0->getType(), APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs)); - Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask); - Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask); + Value *And1 = Builder.CreateAnd(BO0->getOperand(0), Mask); + Value *And2 = Builder.CreateAnd(BO1->getOperand(0), Mask); return new ICmpInst(Pred, And1, And2); } // If there are no trailing zeros in the multiplier, just eliminate @@ -3315,8 +3319,8 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { ConstantInt *C1, *C2; if (match(B, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) { - Constant *NC = Builder->getInt(C1->getValue() ^ C2->getValue()); - Value *Xor = Builder->CreateXor(C, NC); + Constant *NC = Builder.getInt(C1->getValue() ^ C2->getValue()); + Value *Xor = Builder.CreateXor(C, NC); return new ICmpInst(Pred, A, Xor); } @@ -3362,8 +3366,8 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { } if (X) { // Build (X^Y) & Z - Op1 = Builder->CreateXor(X, Y); - Op1 = Builder->CreateAnd(Op1, Z); + Op1 = Builder.CreateXor(X, Y); + Op1 = Builder.CreateAnd(Op1, Z); I.setOperand(0, Op1); I.setOperand(1, Constant::getNullValue(Op1->getType())); return &I; @@ -3380,7 +3384,7 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { APInt Pow2 = Cst1->getValue() + 1; if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) && Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth()) - return new ICmpInst(Pred, A, Builder->CreateTrunc(B, A->getType())); + return new ICmpInst(Pred, A, Builder.CreateTrunc(B, A->getType())); } // (A >> C) == (B >> C) --> (A^B) u< (1 << C) @@ -3394,9 +3398,9 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { if (ShAmt < TypeBits && ShAmt != 0) { ICmpInst::Predicate NewPred = Pred == ICmpInst::ICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT; - Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted"); + Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted"); APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt); - return new ICmpInst(NewPred, Xor, Builder->getInt(CmpVal)); + return new ICmpInst(NewPred, Xor, Builder.getInt(CmpVal)); } } @@ -3406,9 +3410,9 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { unsigned TypeBits = Cst1->getBitWidth(); unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits); if (ShAmt < TypeBits && ShAmt != 0) { - Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted"); + Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted"); APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt); - Value *And = Builder->CreateAnd(Xor, Builder->getInt(AndVal), + Value *And = Builder.CreateAnd(Xor, Builder.getInt(AndVal), I.getName() + ".mask"); return new ICmpInst(Pred, And, Constant::getNullValue(Cst1->getType())); } @@ -3433,11 +3437,20 @@ Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) { APInt CmpV = Cst1->getValue().zext(ASize); CmpV <<= ShAmt; - Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV)); - return new ICmpInst(Pred, Mask, Builder->getInt(CmpV)); + Value *Mask = Builder.CreateAnd(A, Builder.getInt(MaskV)); + return new ICmpInst(Pred, Mask, Builder.getInt(CmpV)); } } + // If both operands are byte-swapped or bit-reversed, just compare the + // original values. + // TODO: Move this to a function similar to foldICmpIntrinsicWithConstant() + // and handle more intrinsics. + if ((match(Op0, m_BSwap(m_Value(A))) && match(Op1, m_BSwap(m_Value(B)))) || + (match(Op0, m_BitReverse(m_Value(A))) && + match(Op1, m_BitReverse(m_Value(B))))) + return new ICmpInst(Pred, A, B); + return nullptr; } @@ -3462,7 +3475,7 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) { RHSOp = RHSC->getOperand(0); // If the pointer types don't match, insert a bitcast. if (LHSCIOp->getType() != RHSOp->getType()) - RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType()); + RHSOp = Builder.CreateBitCast(RHSOp, LHSCIOp->getType()); } } else if (auto *RHSC = dyn_cast<Constant>(ICmp.getOperand(1))) { RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); @@ -3546,7 +3559,7 @@ Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) { // We're performing an unsigned comp with a sign extended value. // This is true if the input is >= 0. [aka >s -1] Constant *NegOne = Constant::getAllOnesValue(SrcTy); - Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICmp.getName()); + Value *Result = Builder.CreateICmpSGT(LHSCIOp, NegOne, ICmp.getName()); // Finally, return the value computed. if (ICmp.getPredicate() == ICmpInst::ICMP_ULT) @@ -3574,7 +3587,7 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, // may be pointing to the compare. We want to insert the new instructions // before the add in case there are uses of the add between the add and the // compare. - Builder->SetInsertPoint(&OrigI); + Builder.SetInsertPoint(&OrigI); switch (OCF) { case OCF_INVALID: @@ -3583,11 +3596,11 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, case OCF_UNSIGNED_ADD: { OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI); if (OR == OverflowResult::NeverOverflows) - return SetResult(Builder->CreateNUWAdd(LHS, RHS), Builder->getFalse(), + return SetResult(Builder.CreateNUWAdd(LHS, RHS), Builder.getFalse(), true); if (OR == OverflowResult::AlwaysOverflows) - return SetResult(Builder->CreateAdd(LHS, RHS), Builder->getTrue(), true); + return SetResult(Builder.CreateAdd(LHS, RHS), Builder.getTrue(), true); // Fall through uadd into sadd LLVM_FALLTHROUGH; @@ -3595,13 +3608,13 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, case OCF_SIGNED_ADD: { // X + 0 -> {X, false} if (match(RHS, m_Zero())) - return SetResult(LHS, Builder->getFalse(), false); + return SetResult(LHS, Builder.getFalse(), false); // We can strength reduce this signed add into a regular add if we can prove // that it will never overflow. if (OCF == OCF_SIGNED_ADD) if (willNotOverflowSignedAdd(LHS, RHS, OrigI)) - return SetResult(Builder->CreateNSWAdd(LHS, RHS), Builder->getFalse(), + return SetResult(Builder.CreateNSWAdd(LHS, RHS), Builder.getFalse(), true); break; } @@ -3610,15 +3623,15 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, case OCF_SIGNED_SUB: { // X - 0 -> {X, false} if (match(RHS, m_Zero())) - return SetResult(LHS, Builder->getFalse(), false); + return SetResult(LHS, Builder.getFalse(), false); if (OCF == OCF_SIGNED_SUB) { if (willNotOverflowSignedSub(LHS, RHS, OrigI)) - return SetResult(Builder->CreateNSWSub(LHS, RHS), Builder->getFalse(), + return SetResult(Builder.CreateNSWSub(LHS, RHS), Builder.getFalse(), true); } else { if (willNotOverflowUnsignedSub(LHS, RHS, OrigI)) - return SetResult(Builder->CreateNUWSub(LHS, RHS), Builder->getFalse(), + return SetResult(Builder.CreateNUWSub(LHS, RHS), Builder.getFalse(), true); } break; @@ -3627,28 +3640,28 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, case OCF_UNSIGNED_MUL: { OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI); if (OR == OverflowResult::NeverOverflows) - return SetResult(Builder->CreateNUWMul(LHS, RHS), Builder->getFalse(), + return SetResult(Builder.CreateNUWMul(LHS, RHS), Builder.getFalse(), true); if (OR == OverflowResult::AlwaysOverflows) - return SetResult(Builder->CreateMul(LHS, RHS), Builder->getTrue(), true); + return SetResult(Builder.CreateMul(LHS, RHS), Builder.getTrue(), true); LLVM_FALLTHROUGH; } case OCF_SIGNED_MUL: // X * undef -> undef if (isa<UndefValue>(RHS)) - return SetResult(RHS, UndefValue::get(Builder->getInt1Ty()), false); + return SetResult(RHS, UndefValue::get(Builder.getInt1Ty()), false); // X * 0 -> {0, false} if (match(RHS, m_Zero())) - return SetResult(RHS, Builder->getFalse(), false); + return SetResult(RHS, Builder.getFalse(), false); // X * 1 -> {X, false} if (match(RHS, m_One())) - return SetResult(LHS, Builder->getFalse(), false); + return SetResult(LHS, Builder.getFalse(), false); if (OCF == OCF_SIGNED_MUL) if (willNotOverflowSignedMul(LHS, RHS, OrigI)) - return SetResult(Builder->CreateNSWMul(LHS, RHS), Builder->getFalse(), + return SetResult(Builder.CreateNSWMul(LHS, RHS), Builder.getFalse(), true); break; } @@ -3813,25 +3826,25 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, return nullptr; } - InstCombiner::BuilderTy *Builder = IC.Builder; - Builder->SetInsertPoint(MulInstr); + InstCombiner::BuilderTy &Builder = IC.Builder; + Builder.SetInsertPoint(MulInstr); // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B) Value *MulA = A, *MulB = B; if (WidthA < MulWidth) - MulA = Builder->CreateZExt(A, MulType); + MulA = Builder.CreateZExt(A, MulType); if (WidthB < MulWidth) - MulB = Builder->CreateZExt(B, MulType); + MulB = Builder.CreateZExt(B, MulType); Value *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::umul_with_overflow, MulType); - CallInst *Call = Builder->CreateCall(F, {MulA, MulB}, "umul"); + CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "umul"); IC.Worklist.Add(MulInstr); // If there are uses of mul result other than the comparison, we know that // they are truncation or binary AND. Change them to use result of // mul.with.overflow and adjust properly mask/size. if (MulVal->hasNUsesOrMore(2)) { - Value *Mul = Builder->CreateExtractValue(Call, 0, "umul.value"); + Value *Mul = Builder.CreateExtractValue(Call, 0, "umul.value"); for (User *U : MulVal->users()) { if (U == &I || U == OtherVal) continue; @@ -3843,17 +3856,18 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) { assert(BO->getOpcode() == Instruction::And); // Replace (mul & mask) --> zext (mul.with.overflow & short_mask) - ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1)); - APInt ShortMask = CI->getValue().trunc(MulWidth); - Value *ShortAnd = Builder->CreateAnd(Mul, ShortMask); - Instruction *Zext = - cast<Instruction>(Builder->CreateZExt(ShortAnd, BO->getType())); - IC.Worklist.Add(Zext); + Value *ShortMask = + Builder.CreateTrunc(BO->getOperand(1), Builder.getIntNTy(MulWidth)); + Value *ShortAnd = Builder.CreateAnd(Mul, ShortMask); + Value *Zext = Builder.CreateZExt(ShortAnd, BO->getType()); + if (auto *ZextI = dyn_cast<Instruction>(Zext)) + IC.Worklist.Add(ZextI); IC.replaceInstUsesWith(*BO, Zext); } else { llvm_unreachable("Unexpected Binary operation"); } - IC.Worklist.Add(cast<Instruction>(U)); + if (auto *UI = dyn_cast<Instruction>(U)) + IC.Worklist.Add(UI); } } if (isa<Instruction>(OtherVal)) @@ -3884,7 +3898,7 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal, llvm_unreachable("Unexpected predicate"); } if (Inverse) { - Value *Res = Builder->CreateExtractValue(Call, 1); + Value *Res = Builder.CreateExtractValue(Call, 1); return BinaryOperator::CreateNot(Res); } @@ -4239,7 +4253,7 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { if (Op1Max == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - Builder->getInt(CI->getValue() - 1)); + Builder.getInt(CI->getValue() - 1)); } break; case ICmpInst::ICMP_SGT: @@ -4253,7 +4267,7 @@ Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) { if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) { if (Op1Min == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C return new ICmpInst(ICmpInst::ICMP_EQ, Op0, - Builder->getInt(CI->getValue() + 1)); + Builder.getInt(CI->getValue() + 1)); } break; case ICmpInst::ICMP_SGE: @@ -4358,7 +4372,7 @@ static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) { static Instruction *canonicalizeICmpBool(ICmpInst &I, InstCombiner::BuilderTy &Builder) { Value *A = I.getOperand(0), *B = I.getOperand(1); - assert(A->getType()->getScalarType()->isIntegerTy(1) && "Bools only"); + assert(A->getType()->isIntOrIntVectorTy(1) && "Bools only"); // A boolean compared to true/false can be simplified to Op0/true/false in // 14 out of the 20 (10 predicates * 2 constants) possible combinations. @@ -4465,8 +4479,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } } - if (Op0->getType()->getScalarType()->isIntegerTy(1)) - if (Instruction *Res = canonicalizeICmpBool(I, *Builder)) + if (Op0->getType()->isIntOrIntVectorTy(1)) + if (Instruction *Res = canonicalizeICmpBool(I, Builder)) return Res; if (ICmpInst *NewICmp = canonicalizeCmpWithConstant(I)) @@ -4559,7 +4573,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType()); } else { // Otherwise, cast the RHS right before the icmp - Op1 = Builder->CreateBitCast(Op1, Op0->getType()); + Op1 = Builder.CreateBitCast(Op1, Op0->getType()); } } return new ICmpInst(I.getPredicate(), Op0, Op1); @@ -4592,8 +4606,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && match(Op1, m_Zero()) && isKnownToBeAPowerOfTwo(A, false, 0, &I) && I.isEquality()) - return new ICmpInst(I.getInversePredicate(), - Builder->CreateAnd(A, B), + return new ICmpInst(I.getInversePredicate(), Builder.CreateAnd(A, B), Op1); // ~X < ~Y --> Y < X @@ -4693,10 +4706,10 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, RHSRoundInt.roundToIntegral(APFloat::rmNearestTiesToEven); if (RHS.compare(RHSRoundInt) != APFloat::cmpEqual) { if (P == FCmpInst::FCMP_OEQ || P == FCmpInst::FCMP_UEQ) - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getFalse()); assert(P == FCmpInst::FCMP_ONE || P == FCmpInst::FCMP_UNE); - return replaceInstUsesWith(I, Builder->getTrue()); + return replaceInstUsesWith(I, Builder.getTrue()); } } @@ -4762,9 +4775,9 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, Pred = ICmpInst::ICMP_NE; break; case FCmpInst::FCMP_ORD: - return replaceInstUsesWith(I, Builder->getTrue()); + return replaceInstUsesWith(I, Builder.getTrue()); case FCmpInst::FCMP_UNO: - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getFalse()); } // Now we know that the APFloat is a normal number, zero or inf. @@ -4782,8 +4795,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, if (SMax.compare(RHS) == APFloat::cmpLessThan) { // smax < 13123.0 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) - return replaceInstUsesWith(I, Builder->getTrue()); - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getTrue()); + return replaceInstUsesWith(I, Builder.getFalse()); } } else { // If the RHS value is > UnsignedMax, fold the comparison. This handles @@ -4794,8 +4807,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, if (UMax.compare(RHS) == APFloat::cmpLessThan) { // umax < 13123.0 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) - return replaceInstUsesWith(I, Builder->getTrue()); - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getTrue()); + return replaceInstUsesWith(I, Builder.getFalse()); } } @@ -4807,8 +4820,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE) - return replaceInstUsesWith(I, Builder->getTrue()); - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getTrue()); + return replaceInstUsesWith(I, Builder.getFalse()); } } else { // See if the RHS value is < UnsignedMin. @@ -4818,8 +4831,8 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0 if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) - return replaceInstUsesWith(I, Builder->getTrue()); - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getTrue()); + return replaceInstUsesWith(I, Builder.getFalse()); } } @@ -4841,14 +4854,14 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, switch (Pred) { default: llvm_unreachable("Unexpected integer comparison!"); case ICmpInst::ICMP_NE: // (float)int != 4.4 --> true - return replaceInstUsesWith(I, Builder->getTrue()); + return replaceInstUsesWith(I, Builder.getTrue()); case ICmpInst::ICMP_EQ: // (float)int == 4.4 --> false - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getFalse()); case ICmpInst::ICMP_ULE: // (float)int <= 4.4 --> int <= 4 // (float)int <= -4.4 --> false if (RHS.isNegative()) - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getFalse()); break; case ICmpInst::ICMP_SLE: // (float)int <= 4.4 --> int <= 4 @@ -4860,7 +4873,7 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, // (float)int < -4.4 --> false // (float)int < 4.4 --> int <= 4 if (RHS.isNegative()) - return replaceInstUsesWith(I, Builder->getFalse()); + return replaceInstUsesWith(I, Builder.getFalse()); Pred = ICmpInst::ICMP_ULE; break; case ICmpInst::ICMP_SLT: @@ -4873,7 +4886,7 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, // (float)int > 4.4 --> int > 4 // (float)int > -4.4 --> true if (RHS.isNegative()) - return replaceInstUsesWith(I, Builder->getTrue()); + return replaceInstUsesWith(I, Builder.getTrue()); break; case ICmpInst::ICMP_SGT: // (float)int > 4.4 --> int > 4 @@ -4885,7 +4898,7 @@ Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, // (float)int >= -4.4 --> true // (float)int >= 4.4 --> int > 4 if (RHS.isNegative()) - return replaceInstUsesWith(I, Builder->getTrue()); + return replaceInstUsesWith(I, Builder.getTrue()); Pred = ICmpInst::ICMP_UGT; break; case ICmpInst::ICMP_SGE: diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 87f11467b95e2..c38a4981bf1dc 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -21,8 +21,6 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -212,7 +210,7 @@ public: /// \brief An IRBuilder that automatically inserts new instructions into the /// worklist. typedef IRBuilder<TargetFolder, IRBuilderCallbackInserter> BuilderTy; - BuilderTy *Builder; + BuilderTy &Builder; private: // Mode in which we are running the combiner. @@ -235,7 +233,7 @@ private: bool MadeIRChange; public: - InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder, + InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder, bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, const DataLayout &DL, LoopInfo *LI) @@ -598,9 +596,8 @@ private: /// This tries to simplify binary operations by factorizing out common terms /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). - Value *tryFactorization(InstCombiner::BuilderTy *, BinaryOperator &, - Instruction::BinaryOps, Value *, Value *, Value *, - Value *); + Value *tryFactorization(BinaryOperator &, Instruction::BinaryOps, Value *, + Value *, Value *, Value *); /// Match a select chain which produces one of three values based on whether /// the LHS is less than, equal to, or greater than RHS respectively. @@ -639,7 +636,6 @@ private: APInt &UndefElts, unsigned Depth = 0); Value *SimplifyVectorOp(BinaryOperator &Inst); - Value *SimplifyBSwap(BinaryOperator &Inst); /// Given a binary operator, cast instruction, or select which has a PHI node diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 26bee204e5a44..c59e1ce69ac22 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -189,7 +189,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { return nullptr; // Canonicalize it. - Value *V = IC.Builder->getInt32(1); + Value *V = IC.Builder.getInt32(1); AI.setOperand(0, V); return &AI; } @@ -197,7 +197,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - AllocaInst *New = IC.Builder->CreateAlloca(NewTy, nullptr, AI.getName()); + AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName()); New->setAlignment(AI.getAlignment()); // Scan to the end of the allocation instructions, to skip over a block of @@ -229,7 +229,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { // any casting is exposed early. Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType()); if (AI.getArraySize()->getType() != IntPtrTy) { - Value *V = IC.Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); + Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false); AI.setOperand(0, V); return &AI; } @@ -458,10 +458,10 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT SmallVector<std::pair<unsigned, MDNode *>, 8> MD; LI.getAllMetadata(MD); - LoadInst *NewLoad = IC.Builder->CreateAlignedLoad( - IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)), + LoadInst *NewLoad = IC.Builder.CreateAlignedLoad( + IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS)), LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix); - NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); + NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); MDBuilder MDB(NewLoad->getContext()); for (const auto &MDPair : MD) { unsigned ID = MDPair.first; @@ -518,10 +518,10 @@ static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value SmallVector<std::pair<unsigned, MDNode *>, 8> MD; SI.getAllMetadata(MD); - StoreInst *NewStore = IC.Builder->CreateAlignedStore( - V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)), + StoreInst *NewStore = IC.Builder.CreateAlignedStore( + V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)), SI.getAlignment(), SI.isVolatile()); - NewStore->setAtomic(SI.getOrdering(), SI.getSynchScope()); + NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); for (const auto &MDPair : MD) { unsigned ID = MDPair.first; MDNode *N = MDPair.second; @@ -613,7 +613,7 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) { // Replace all the stores with stores of the newly loaded value. for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) { auto *SI = cast<StoreInst>(*UI++); - IC.Builder->SetInsertPoint(SI); + IC.Builder.SetInsertPoint(SI); combineStoreToNewValue(IC, *SI, NewLoad); IC.eraseInstFromFunction(*SI); } @@ -664,7 +664,7 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) { AAMDNodes AAMD; LI.getAAMetadata(AAMD); NewLoad->setAAMetadata(AAMD); - return IC.replaceInstUsesWith(LI, IC.Builder->CreateInsertValue( + return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue( UndefValue::get(T), NewLoad, 0, Name)); } @@ -689,15 +689,15 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) { Zero, ConstantInt::get(IdxType, i), }; - auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), - Name + ".elt"); + auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), + Name + ".elt"); auto EltAlign = MinAlign(Align, SL->getElementOffset(i)); - auto *L = IC.Builder->CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack"); + auto *L = IC.Builder.CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack"); // Propagate AA metadata. It'll still be valid on the narrowed load. AAMDNodes AAMD; LI.getAAMetadata(AAMD); L->setAAMetadata(AAMD); - V = IC.Builder->CreateInsertValue(V, L, i); + V = IC.Builder.CreateInsertValue(V, L, i); } V->setName(Name); @@ -712,7 +712,7 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) { AAMDNodes AAMD; LI.getAAMetadata(AAMD); NewLoad->setAAMetadata(AAMD); - return IC.replaceInstUsesWith(LI, IC.Builder->CreateInsertValue( + return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue( UndefValue::get(T), NewLoad, 0, Name)); } @@ -740,14 +740,14 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) { Zero, ConstantInt::get(IdxType, i), }; - auto *Ptr = IC.Builder->CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices), - Name + ".elt"); - auto *L = IC.Builder->CreateAlignedLoad(Ptr, MinAlign(Align, Offset), - Name + ".unpack"); + auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices), + Name + ".elt"); + auto *L = IC.Builder.CreateAlignedLoad(Ptr, MinAlign(Align, Offset), + Name + ".unpack"); AAMDNodes AAMD; LI.getAAMetadata(AAMD); L->setAAMetadata(AAMD); - V = IC.Builder->CreateInsertValue(V, L, i); + V = IC.Builder.CreateInsertValue(V, L, i); Offset += EltSize; } @@ -982,8 +982,8 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI); return replaceInstUsesWith( - LI, Builder->CreateBitOrPointerCast(AvailableVal, LI.getType(), - LI.getName() + ".cast")); + LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(), + LI.getName() + ".cast")); } // None of the following transforms are legal for volatile/ordered atomic @@ -1019,15 +1019,15 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { unsigned Align = LI.getAlignment(); if (isSafeToLoadUnconditionally(SI->getOperand(1), Align, DL, SI) && isSafeToLoadUnconditionally(SI->getOperand(2), Align, DL, SI)) { - LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), - SI->getOperand(1)->getName()+".val"); - LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), - SI->getOperand(2)->getName()+".val"); + LoadInst *V1 = Builder.CreateLoad(SI->getOperand(1), + SI->getOperand(1)->getName()+".val"); + LoadInst *V2 = Builder.CreateLoad(SI->getOperand(2), + SI->getOperand(2)->getName()+".val"); assert(LI.isUnordered() && "implied by above"); V1->setAlignment(Align); - V1->setAtomic(LI.getOrdering(), LI.getSynchScope()); + V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); V2->setAlignment(Align); - V2->setAtomic(LI.getOrdering(), LI.getSynchScope()); + V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); return SelectInst::Create(SI->getCondition(), V1, V2); } @@ -1172,7 +1172,7 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { // If the struct only have one element, we unpack. unsigned Count = ST->getNumElements(); if (Count == 1) { - V = IC.Builder->CreateExtractValue(V, 0); + V = IC.Builder.CreateExtractValue(V, 0); combineStoreToNewValue(IC, SI, V); return true; } @@ -1201,12 +1201,11 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { Zero, ConstantInt::get(IdxType, i), }; - auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), - AddrName); - auto *Val = IC.Builder->CreateExtractValue(V, i, EltName); + auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), + AddrName); + auto *Val = IC.Builder.CreateExtractValue(V, i, EltName); auto EltAlign = MinAlign(Align, SL->getElementOffset(i)); - llvm::Instruction *NS = - IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign); + llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign); AAMDNodes AAMD; SI.getAAMetadata(AAMD); NS->setAAMetadata(AAMD); @@ -1219,7 +1218,7 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { // If the array only have one element, we unpack. auto NumElements = AT->getNumElements(); if (NumElements == 1) { - V = IC.Builder->CreateExtractValue(V, 0); + V = IC.Builder.CreateExtractValue(V, 0); combineStoreToNewValue(IC, SI, V); return true; } @@ -1252,11 +1251,11 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) { Zero, ConstantInt::get(IdxType, i), }; - auto *Ptr = IC.Builder->CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices), - AddrName); - auto *Val = IC.Builder->CreateExtractValue(V, i, EltName); + auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices), + AddrName); + auto *Val = IC.Builder.CreateExtractValue(V, i, EltName); auto EltAlign = MinAlign(Align, Offset); - Instruction *NS = IC.Builder->CreateAlignedStore(Val, Ptr, EltAlign); + Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign); AAMDNodes AAMD; SI.getAAMetadata(AAMD); NS->setAAMetadata(AAMD); @@ -1541,7 +1540,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { SI.isVolatile(), SI.getAlignment(), SI.getOrdering(), - SI.getSynchScope()); + SI.getSyncScopeID()); InsertNewInstBefore(NewSI, *BBI); // The debug locations of the original instructions might differ; merge them. NewSI->setDebugLoc(DILocation::getMergedLocation(SI.getDebugLoc(), diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 579639a6194e9..e3a50220f94e2 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -39,8 +39,8 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC, Value *A = nullptr, *B = nullptr, *One = nullptr; if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(One), m_Value(A))), m_Value(B))) && match(One, m_One())) { - A = IC.Builder->CreateSub(A, B); - return IC.Builder->CreateShl(One, A); + A = IC.Builder.CreateSub(A, B); + return IC.Builder.CreateShl(One, A); } // (PowerOfTwo >>u B) --> isExact since shifting out the result would make it @@ -250,9 +250,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { ConstantInt *C1; Value *Sub = nullptr; if (match(Op0, m_Sub(m_Value(Y), m_Value(X)))) - Sub = Builder->CreateSub(X, Y, "suba"); + Sub = Builder.CreateSub(X, Y, "suba"); else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1)))) - Sub = Builder->CreateSub(Builder->CreateNeg(C1), Y, "subc"); + Sub = Builder.CreateSub(Builder.CreateNeg(C1), Y, "subc"); if (Sub) return BinaryOperator::CreateMul(Sub, @@ -272,11 +272,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { Value *X; Constant *C1; if (match(Op0, m_OneUse(m_Add(m_Value(X), m_Constant(C1))))) { - Value *Mul = Builder->CreateMul(C1, Op1); + Value *Mul = Builder.CreateMul(C1, Op1); // Only go forward with the transform if C1*CI simplifies to a tidier // constant. if (!match(Mul, m_Mul(m_Value(), m_Value()))) - return BinaryOperator::CreateAdd(Builder->CreateMul(X, Op1), Mul); + return BinaryOperator::CreateAdd(Builder.CreateMul(X, Op1), Mul); } } } @@ -318,7 +318,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { auto RemOpc = Div->getOpcode() == Instruction::UDiv ? Instruction::URem : Instruction::SRem; - Value *Rem = Builder->CreateBinOp(RemOpc, X, DivOp1); + Value *Rem = Builder.CreateBinOp(RemOpc, X, DivOp1); if (DivOp1 == Y) return BinaryOperator::CreateSub(X, Rem); return BinaryOperator::CreateSub(Rem, X); @@ -326,7 +326,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } /// i1 mul -> i1 and. - if (I.getType()->getScalarType()->isIntegerTy(1)) + if (I.getType()->isIntOrIntVectorTy(1)) return BinaryOperator::CreateAnd(Op0, Op1); // X*(1 << Y) --> X << Y @@ -368,7 +368,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } if (BoolCast) { - Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()), + Value *V = Builder.CreateSub(Constant::getNullValue(I.getType()), BoolCast); return BinaryOperator::CreateAnd(V, OtherOp); } @@ -386,7 +386,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { willNotOverflowSignedMul(Op0Conv->getOperand(0), CI, I)) { // Insert the new, smaller mul. Value *NewMul = - Builder->CreateNSWMul(Op0Conv->getOperand(0), CI, "mulconv"); + Builder.CreateNSWMul(Op0Conv->getOperand(0), CI, "mulconv"); return new SExtInst(NewMul, I.getType()); } } @@ -403,7 +403,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { willNotOverflowSignedMul(Op0Conv->getOperand(0), Op1Conv->getOperand(0), I)) { // Insert the new integer mul. - Value *NewMul = Builder->CreateNSWMul( + Value *NewMul = Builder.CreateNSWMul( Op0Conv->getOperand(0), Op1Conv->getOperand(0), "mulconv"); return new SExtInst(NewMul, I.getType()); } @@ -422,7 +422,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { willNotOverflowUnsignedMul(Op0Conv->getOperand(0), CI, I)) { // Insert the new, smaller mul. Value *NewMul = - Builder->CreateNUWMul(Op0Conv->getOperand(0), CI, "mulconv"); + Builder.CreateNUWMul(Op0Conv->getOperand(0), CI, "mulconv"); return new ZExtInst(NewMul, I.getType()); } } @@ -439,7 +439,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { willNotOverflowUnsignedMul(Op0Conv->getOperand(0), Op1Conv->getOperand(0), I)) { // Insert the new integer mul. - Value *NewMul = Builder->CreateNUWMul( + Value *NewMul = Builder.CreateNUWMul( Op0Conv->getOperand(0), Op1Conv->getOperand(0), "mulconv"); return new ZExtInst(NewMul, I.getType()); } @@ -698,11 +698,11 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { } // if pattern detected emit alternate sequence if (OpX && OpY) { - BuilderTy::FastMathFlagGuard Guard(*Builder); - Builder->setFastMathFlags(Log2->getFastMathFlags()); + BuilderTy::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(Log2->getFastMathFlags()); Log2->setArgOperand(0, OpY); - Value *FMulVal = Builder->CreateFMul(OpX, Log2); - Value *FSub = Builder->CreateFSub(FMulVal, OpX); + Value *FMulVal = Builder.CreateFMul(OpX, Log2); + Value *FSub = Builder.CreateFSub(FMulVal, OpX); FSub->takeName(&I); return replaceInstUsesWith(I, FSub); } @@ -714,23 +714,23 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { for (int i = 0; i < 2; i++) { bool IgnoreZeroSign = I.hasNoSignedZeros(); if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) { - BuilderTy::FastMathFlagGuard Guard(*Builder); - Builder->setFastMathFlags(I.getFastMathFlags()); + BuilderTy::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(I.getFastMathFlags()); Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign); Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign); // -X * -Y => X*Y if (N1) { - Value *FMul = Builder->CreateFMul(N0, N1); + Value *FMul = Builder.CreateFMul(N0, N1); FMul->takeName(&I); return replaceInstUsesWith(I, FMul); } if (Opnd0->hasOneUse()) { // -X * Y => -(X*Y) (Promote negation as high as possible) - Value *T = Builder->CreateFMul(N0, Opnd1); - Value *Neg = Builder->CreateFNeg(T); + Value *T = Builder.CreateFMul(N0, Opnd1); + Value *Neg = Builder.CreateFNeg(T); Neg->takeName(&I); return replaceInstUsesWith(I, Neg); } @@ -755,10 +755,10 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { Y = Opnd0_0; if (Y) { - BuilderTy::FastMathFlagGuard Guard(*Builder); - Builder->setFastMathFlags(I.getFastMathFlags()); - Value *T = Builder->CreateFMul(Opnd1, Opnd1); - Value *R = Builder->CreateFMul(T, Y); + BuilderTy::FastMathFlagGuard Guard(Builder); + Builder.setFastMathFlags(I.getFastMathFlags()); + Value *T = Builder.CreateFMul(Opnd1, Opnd1); + Value *R = Builder.CreateFMul(T, Y); R->takeName(&I); return replaceInstUsesWith(I, R); } @@ -824,7 +824,7 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { *I = SI->getOperand(NonNullOperand); Worklist.Add(&*BBI); } else if (*I == SelectCond) { - *I = Builder->getInt1(NonNullOperand == 1); + *I = Builder.getInt1(NonNullOperand == 1); Worklist.Add(&*BBI); } } @@ -938,20 +938,18 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { } if (match(Op0, m_One())) { - assert(!I.getType()->getScalarType()->isIntegerTy(1) && - "i1 divide not removed?"); + assert(!I.getType()->isIntOrIntVectorTy(1) && "i1 divide not removed?"); if (I.getOpcode() == Instruction::SDiv) { // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the // result is one, if Op1 is -1 then the result is minus one, otherwise // it's zero. - Value *Inc = Builder->CreateAdd(Op1, Op0); - Value *Cmp = Builder->CreateICmpULT( - Inc, ConstantInt::get(I.getType(), 3)); + Value *Inc = Builder.CreateAdd(Op1, Op0); + Value *Cmp = Builder.CreateICmpULT(Inc, ConstantInt::get(I.getType(), 3)); return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0)); } else { // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the // result is one, otherwise it's zero. - return new ZExtInst(Builder->CreateICmpEQ(Op1, Op0), I.getType()); + return new ZExtInst(Builder.CreateICmpEQ(Op1, Op0), I.getType()); } } @@ -1026,7 +1024,7 @@ static Instruction *foldUDivPow2Cst(Value *Op0, Value *Op1, // X udiv C, where C >= signbit static Instruction *foldUDivNegCst(Value *Op0, Value *Op1, const BinaryOperator &I, InstCombiner &IC) { - Value *ICI = IC.Builder->CreateICmpULT(Op0, cast<ConstantInt>(Op1)); + Value *ICI = IC.Builder.CreateICmpULT(Op0, cast<ConstantInt>(Op1)); return SelectInst::Create(ICI, Constant::getNullValue(I.getType()), ConstantInt::get(I.getType(), 1)); @@ -1045,10 +1043,9 @@ static Instruction *foldUDivShl(Value *Op0, Value *Op1, const BinaryOperator &I, if (!match(ShiftLeft, m_Shl(m_APInt(CI), m_Value(N)))) llvm_unreachable("match should never fail here!"); if (*CI != 1) - N = IC.Builder->CreateAdd(N, - ConstantInt::get(N->getType(), CI->logBase2())); + N = IC.Builder.CreateAdd(N, ConstantInt::get(N->getType(), CI->logBase2())); if (Op1 != ShiftLeft) - N = IC.Builder->CreateZExt(N, Op1->getType()); + N = IC.Builder.CreateZExt(N, Op1->getType()); BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, N); if (I.isExact()) LShr->setIsExact(); @@ -1134,7 +1131,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0)) if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) return new ZExtInst( - Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div", I.isExact()), + Builder.CreateUDiv(ZOp0->getOperand(0), ZOp1, "div", I.isExact()), I.getType()); // (LHS udiv (select (select (...)))) -> (LHS >> (select (select (...)))) @@ -1209,7 +1206,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { Constant *NarrowDivisor = ConstantExpr::getTrunc(cast<Constant>(Op1), Op0Src->getType()); - Value *NarrowOp = Builder->CreateSDiv(Op0Src, NarrowDivisor); + Value *NarrowOp = Builder.CreateSDiv(Op0Src, NarrowDivisor); return new SExtInst(NarrowOp, Op0->getType()); } } @@ -1217,7 +1214,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { if (Constant *RHS = dyn_cast<Constant>(Op1)) { // X/INT_MIN -> X == INT_MIN if (RHS->isMinSignedValue()) - return new ZExtInst(Builder->CreateICmpEQ(Op0, Op1), I.getType()); + return new ZExtInst(Builder.CreateICmpEQ(Op0, Op1), I.getType()); // -X/C --> X/-C provided the negation doesn't overflow. Value *X; @@ -1380,7 +1377,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { // (X/Y) / Z => X / (Y*Z) // if (!isa<Constant>(Y) || !isa<Constant>(Op1)) { - NewInst = Builder->CreateFMul(Y, Op1); + NewInst = Builder.CreateFMul(Y, Op1); if (Instruction *RI = dyn_cast<Instruction>(NewInst)) { FastMathFlags Flags = I.getFastMathFlags(); Flags &= cast<Instruction>(Op0)->getFastMathFlags(); @@ -1392,7 +1389,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { // Z / (X/Y) => Z*Y / X // if (!isa<Constant>(Y) || !isa<Constant>(Op0)) { - NewInst = Builder->CreateFMul(Op0, Y); + NewInst = Builder.CreateFMul(Op0, Y); if (Instruction *RI = dyn_cast<Instruction>(NewInst)) { FastMathFlags Flags = I.getFastMathFlags(); Flags &= cast<Instruction>(Op1)->getFastMathFlags(); @@ -1483,28 +1480,28 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { // (zext A) urem (zext B) --> zext (A urem B) if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0)) if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) - return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1), + return new ZExtInst(Builder.CreateURem(ZOp0->getOperand(0), ZOp1), I.getType()); // X urem Y -> X and Y-1, where Y is a power of 2, if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) { Constant *N1 = Constant::getAllOnesValue(I.getType()); - Value *Add = Builder->CreateAdd(Op1, N1); + Value *Add = Builder.CreateAdd(Op1, N1); return BinaryOperator::CreateAnd(Op0, Add); } // 1 urem X -> zext(X != 1) if (match(Op0, m_One())) { - Value *Cmp = Builder->CreateICmpNE(Op1, Op0); - Value *Ext = Builder->CreateZExt(Cmp, I.getType()); + Value *Cmp = Builder.CreateICmpNE(Op1, Op0); + Value *Ext = Builder.CreateZExt(Cmp, I.getType()); return replaceInstUsesWith(I, Ext); } // X urem C -> X < C ? X : X - C, where C >= signbit. const APInt *DivisorC; if (match(Op1, m_APInt(DivisorC)) && DivisorC->isNegative()) { - Value *Cmp = Builder->CreateICmpULT(Op0, Op1); - Value *Sub = Builder->CreateSub(Op0, Op1); + Value *Cmp = Builder.CreateICmpULT(Op0, Op1); + Value *Sub = Builder.CreateSub(Op0, Op1); return SelectInst::Create(Cmp, Op0, Sub); } diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 5dbf1e85b05b9..0011412c2bf47 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -636,10 +636,10 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, /// Return an existing non-zero constant if this phi node has one, otherwise /// return constant 1. static ConstantInt *GetAnyNonZeroConstInt(PHINode &PN) { - assert(isa<IntegerType>(PN.getType()) && "Expect only intger type phi"); + assert(isa<IntegerType>(PN.getType()) && "Expect only integer type phi"); for (Value *V : PN.operands()) if (auto *ConstVA = dyn_cast<ConstantInt>(V)) - if (!ConstVA->isZeroValue()) + if (!ConstVA->isZero()) return ConstVA; return ConstantInt::get(cast<IntegerType>(PN.getType()), 1); } @@ -836,12 +836,12 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { } // Otherwise, do an extract in the predecessor. - Builder->SetInsertPoint(Pred->getTerminator()); + Builder.SetInsertPoint(Pred->getTerminator()); Value *Res = InVal; if (Offset) - Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(), + Res = Builder.CreateLShr(Res, ConstantInt::get(InVal->getType(), Offset), "extract"); - Res = Builder->CreateTrunc(Res, Ty, "extract.t"); + Res = Builder.CreateTrunc(Res, Ty, "extract.t"); PredVal = Res; EltPHI->addIncoming(Res, Pred); diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 80c6595904e11..4eebe8255998c 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -61,12 +61,12 @@ static CmpInst::Predicate getCmpPredicateForMinMax(SelectPatternFlavor SPF, } } -static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy *Builder, +static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy &Builder, SelectPatternFlavor SPF, Value *A, Value *B) { CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF); assert(CmpInst::isIntPredicate(Pred)); - return Builder->CreateSelect(Builder->CreateICmp(Pred, A, B), A, B); + return Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B); } /// We want to turn code that looks like this: @@ -167,8 +167,8 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI, // Fold this by inserting a select from the input values. Value *NewSI = - Builder->CreateSelect(SI.getCondition(), TI->getOperand(0), - FI->getOperand(0), SI.getName() + ".v", &SI); + Builder.CreateSelect(SI.getCondition(), TI->getOperand(0), + FI->getOperand(0), SI.getName() + ".v", &SI); return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI, TI->getType()); } @@ -211,8 +211,8 @@ Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI, } // If we reach here, they do have operations in common. - Value *NewSI = Builder->CreateSelect(SI.getCondition(), OtherOpT, OtherOpF, - SI.getName() + ".v", &SI); + Value *NewSI = Builder.CreateSelect(SI.getCondition(), OtherOpT, OtherOpF, + SI.getName() + ".v", &SI); Value *Op0 = MatchIsOpZero ? MatchOp : NewSI; Value *Op1 = MatchIsOpZero ? NewSI : MatchOp; return BinaryOperator::Create(BO->getOpcode(), Op0, Op1); @@ -227,8 +227,8 @@ static bool isSelect01(Constant *C1, Constant *C2) { return false; if (!C1I->isZero() && !C2I->isZero()) // One side must be zero. return false; - return C1I->isOne() || C1I->isAllOnesValue() || - C2I->isOne() || C2I->isAllOnesValue(); + return C1I->isOne() || C1I->isMinusOne() || + C2I->isOne() || C2I->isMinusOne(); } /// Try to fold the select into one of the operands to allow further @@ -254,7 +254,7 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, // Avoid creating select between 2 constants unless it's selecting // between 0, 1 and -1. if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { - Value *NewSel = Builder->CreateSelect(SI.getCondition(), OOp, C); + Value *NewSel = Builder.CreateSelect(SI.getCondition(), OOp, C); NewSel->takeName(TVI); BinaryOperator *TVI_BO = cast<BinaryOperator>(TVI); BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(), @@ -284,7 +284,7 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, // Avoid creating select between 2 constants unless it's selecting // between 0, 1 and -1. if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) { - Value *NewSel = Builder->CreateSelect(SI.getCondition(), C, OOp); + Value *NewSel = Builder.CreateSelect(SI.getCondition(), C, OOp); NewSel->takeName(FVI); BinaryOperator *FVI_BO = cast<BinaryOperator>(FVI); BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(), @@ -315,7 +315,7 @@ Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, /// 3. The magnitude of C2 and C1 are flipped static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, Value *FalseVal, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition()); if (!IC || !SI.getType()->isIntegerTy()) return nullptr; @@ -383,22 +383,22 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, if (NeedAnd) { // Insert the AND instruction on the input to the truncate. APInt C1 = APInt::getOneBitSet(V->getType()->getScalarSizeInBits(), C1Log); - V = Builder->CreateAnd(V, ConstantInt::get(V->getType(), C1)); + V = Builder.CreateAnd(V, ConstantInt::get(V->getType(), C1)); } if (C2Log > C1Log) { - V = Builder->CreateZExtOrTrunc(V, Y->getType()); - V = Builder->CreateShl(V, C2Log - C1Log); + V = Builder.CreateZExtOrTrunc(V, Y->getType()); + V = Builder.CreateShl(V, C2Log - C1Log); } else if (C1Log > C2Log) { - V = Builder->CreateLShr(V, C1Log - C2Log); - V = Builder->CreateZExtOrTrunc(V, Y->getType()); + V = Builder.CreateLShr(V, C1Log - C2Log); + V = Builder.CreateZExtOrTrunc(V, Y->getType()); } else - V = Builder->CreateZExtOrTrunc(V, Y->getType()); + V = Builder.CreateZExtOrTrunc(V, Y->getType()); if (NeedXor) - V = Builder->CreateXor(V, *C2); + V = Builder.CreateXor(V, *C2); - return Builder->CreateOr(V, Y); + return Builder.CreateOr(V, Y); } /// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single @@ -414,7 +414,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, /// into: /// %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 false) static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, - InstCombiner::BuilderTy *Builder) { + InstCombiner::BuilderTy &Builder) { ICmpInst::Predicate Pred = ICI->getPredicate(); Value *CmpLHS = ICI->getOperand(0); Value *CmpRHS = ICI->getOperand(1); @@ -449,8 +449,8 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, IntrinsicInst *NewI = cast<IntrinsicInst>(II->clone()); Type *Ty = NewI->getArgOperand(1)->getType(); NewI->setArgOperand(1, Constant::getNullValue(Ty)); - Builder->Insert(NewI); - return Builder->CreateZExtOrTrunc(NewI, ValueOnZero->getType()); + Builder.Insert(NewI); + return Builder.CreateZExtOrTrunc(NewI, ValueOnZero->getType()); } return nullptr; @@ -597,7 +597,7 @@ canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp, /// Visit a SelectInst that has an ICmpInst as its first operand. Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI) { - if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, *Builder)) + if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, Builder)) return NewSel; bool Changed = adjustMinMax(SI, *ICI); @@ -617,23 +617,23 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, if (TrueVal->getType() == Ty) { if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) { ConstantInt *C1 = nullptr, *C2 = nullptr; - if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) { + if (Pred == ICmpInst::ICMP_SGT && Cmp->isMinusOne()) { C1 = dyn_cast<ConstantInt>(TrueVal); C2 = dyn_cast<ConstantInt>(FalseVal); - } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isNullValue()) { + } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isZero()) { C1 = dyn_cast<ConstantInt>(FalseVal); C2 = dyn_cast<ConstantInt>(TrueVal); } if (C1 && C2) { // This shift results in either -1 or 0. - Value *AShr = Builder->CreateAShr(CmpLHS, Ty->getBitWidth()-1); + Value *AShr = Builder.CreateAShr(CmpLHS, Ty->getBitWidth() - 1); // Check if we can express the operation with a single or. - if (C2->isAllOnesValue()) - return replaceInstUsesWith(SI, Builder->CreateOr(AShr, C1)); + if (C2->isMinusOne()) + return replaceInstUsesWith(SI, Builder.CreateOr(AShr, C1)); - Value *And = Builder->CreateAnd(AShr, C2->getValue()-C1->getValue()); - return replaceInstUsesWith(SI, Builder->CreateAdd(And, C1)); + Value *And = Builder.CreateAnd(AShr, C2->getValue() - C1->getValue()); + return replaceInstUsesWith(SI, Builder.CreateAdd(And, C1)); } } } @@ -684,19 +684,19 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, // (X & Y) == 0 ? X : X ^ Y --> X & ~Y if (TrueWhenUnset && TrueVal == X && match(FalseVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C) - V = Builder->CreateAnd(X, ~(*Y)); + V = Builder.CreateAnd(X, ~(*Y)); // (X & Y) != 0 ? X ^ Y : X --> X & ~Y else if (!TrueWhenUnset && FalseVal == X && match(TrueVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C) - V = Builder->CreateAnd(X, ~(*Y)); + V = Builder.CreateAnd(X, ~(*Y)); // (X & Y) == 0 ? X ^ Y : X --> X | Y else if (TrueWhenUnset && FalseVal == X && match(TrueVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C) - V = Builder->CreateOr(X, *Y); + V = Builder.CreateOr(X, *Y); // (X & Y) != 0 ? X : X ^ Y --> X | Y else if (!TrueWhenUnset && TrueVal == X && match(FalseVal, m_Xor(m_Specific(X), m_APInt(C))) && *Y == *C) - V = Builder->CreateOr(X, *Y); + V = Builder.CreateOr(X, *Y); if (V) return replaceInstUsesWith(SI, V); @@ -809,8 +809,8 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner, (SPF1 == SPF_NABS && SPF2 == SPF_ABS)) { SelectInst *SI = cast<SelectInst>(Inner); Value *NewSI = - Builder->CreateSelect(SI->getCondition(), SI->getFalseValue(), - SI->getTrueValue(), SI->getName(), SI); + Builder.CreateSelect(SI->getCondition(), SI->getFalseValue(), + SI->getTrueValue(), SI->getName(), SI); return replaceInstUsesWith(Outer, NewSI); } @@ -848,15 +848,15 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner, IsFreeOrProfitableToInvert(B, NotB, ElidesXor) && IsFreeOrProfitableToInvert(C, NotC, ElidesXor) && ElidesXor) { if (!NotA) - NotA = Builder->CreateNot(A); + NotA = Builder.CreateNot(A); if (!NotB) - NotB = Builder->CreateNot(B); + NotB = Builder.CreateNot(B); if (!NotC) - NotC = Builder->CreateNot(C); + NotC = Builder.CreateNot(C); Value *NewInner = generateMinMaxSelectPattern( Builder, getInverseMinMaxSelectPattern(SPF1), NotA, NotB); - Value *NewOuter = Builder->CreateNot(generateMinMaxSelectPattern( + Value *NewOuter = Builder.CreateNot(generateMinMaxSelectPattern( Builder, getInverseMinMaxSelectPattern(SPF2), NewInner, NotC)); return replaceInstUsesWith(Outer, NewOuter); } @@ -868,9 +868,9 @@ Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner, /// icmp instruction with zero, and we have an 'and' with the non-constant value /// and a power of two we can turn the select into a shift on the result of the /// 'and'. -static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, - ConstantInt *FalseVal, - InstCombiner::BuilderTy *Builder) { +static Value *foldSelectICmpAnd(const SelectInst &SI, APInt TrueVal, + APInt FalseVal, + InstCombiner::BuilderTy &Builder) { const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition()); if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy()) return nullptr; @@ -886,56 +886,53 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, // If both select arms are non-zero see if we have a select of the form // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic // for 'x ? 2^n : 0' and fix the thing up at the end. - ConstantInt *Offset = nullptr; - if (!TrueVal->isZero() && !FalseVal->isZero()) { - if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2()) + APInt Offset(TrueVal.getBitWidth(), 0); + if (!TrueVal.isNullValue() && !FalseVal.isNullValue()) { + if ((TrueVal - FalseVal).isPowerOf2()) Offset = FalseVal; - else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2()) + else if ((FalseVal - TrueVal).isPowerOf2()) Offset = TrueVal; else return nullptr; // Adjust TrueVal and FalseVal to the offset. - TrueVal = ConstantInt::get(Builder->getContext(), - TrueVal->getValue() - Offset->getValue()); - FalseVal = ConstantInt::get(Builder->getContext(), - FalseVal->getValue() - Offset->getValue()); + TrueVal -= Offset; + FalseVal -= Offset; } // Make sure the mask in the 'and' and one of the select arms is a power of 2. if (!AndRHS->getValue().isPowerOf2() || - (!TrueVal->getValue().isPowerOf2() && - !FalseVal->getValue().isPowerOf2())) + (!TrueVal.isPowerOf2() && !FalseVal.isPowerOf2())) return nullptr; // Determine which shift is needed to transform result of the 'and' into the // desired result. - ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal; - unsigned ValZeros = ValC->getValue().logBase2(); + const APInt &ValC = !TrueVal.isNullValue() ? TrueVal : FalseVal; + unsigned ValZeros = ValC.logBase2(); unsigned AndZeros = AndRHS->getValue().logBase2(); // If types don't match we can still convert the select by introducing a zext // or a trunc of the 'and'. The trunc case requires that all of the truncated // bits are zero, we can figure that out by looking at the 'and' mask. - if (AndZeros >= ValC->getBitWidth()) + if (AndZeros >= ValC.getBitWidth()) return nullptr; - Value *V = Builder->CreateZExtOrTrunc(LHS, SI.getType()); + Value *V = Builder.CreateZExtOrTrunc(LHS, SI.getType()); if (ValZeros > AndZeros) - V = Builder->CreateShl(V, ValZeros - AndZeros); + V = Builder.CreateShl(V, ValZeros - AndZeros); else if (ValZeros < AndZeros) - V = Builder->CreateLShr(V, AndZeros - ValZeros); + V = Builder.CreateLShr(V, AndZeros - ValZeros); // Okay, now we know that everything is set up, we just don't know whether we // have a icmp_ne or icmp_eq and whether the true or false val is the zero. - bool ShouldNotVal = !TrueVal->isZero(); + bool ShouldNotVal = !TrueVal.isNullValue(); ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE; if (ShouldNotVal) - V = Builder->CreateXor(V, ValC); + V = Builder.CreateXor(V, ValC); // Apply an offset if needed. - if (Offset) - V = Builder->CreateAdd(V, Offset); + if (!Offset.isNullValue()) + V = Builder.CreateAdd(V, ConstantInt::get(V->getType(), Offset)); return V; } @@ -1024,7 +1021,7 @@ Instruction *InstCombiner::foldSelectExtConst(SelectInst &Sel) { // TODO: Handle larger types? That requires adjusting FoldOpIntoSelect too. Value *X = ExtInst->getOperand(0); Type *SmallType = X->getType(); - if (!SmallType->getScalarType()->isIntegerTy(1)) + if (!SmallType->isIntOrIntVectorTy(1)) return nullptr; Constant *C; @@ -1045,7 +1042,7 @@ Instruction *InstCombiner::foldSelectExtConst(SelectInst &Sel) { // select Cond, (ext X), C --> ext(select Cond, X, C') // select Cond, C, (ext X) --> ext(select Cond, C', X) - Value *NewSel = Builder->CreateSelect(Cond, X, TruncCVal, "narrow", &Sel); + Value *NewSel = Builder.CreateSelect(Cond, X, TruncCVal, "narrow", &Sel); return CastInst::Create(Instruction::CastOps(ExtOpcode), NewSel, SelType); } @@ -1184,7 +1181,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return &SI; } - if (SelType->getScalarType()->isIntegerTy(1) && + if (SelType->isIntOrIntVectorTy(1) && TrueVal->getType() == CondVal->getType()) { if (match(TrueVal, m_One())) { // Change: A = select B, true, C --> A = or B, C @@ -1192,7 +1189,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } if (match(TrueVal, m_Zero())) { // Change: A = select B, false, C --> A = and !B, C - Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName()); + Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); return BinaryOperator::CreateAnd(NotCond, FalseVal); } if (match(FalseVal, m_Zero())) { @@ -1201,7 +1198,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } if (match(FalseVal, m_One())) { // Change: A = select B, C, true --> A = or !B, C - Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName()); + Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); return BinaryOperator::CreateOr(NotCond, TrueVal); } @@ -1226,7 +1223,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // select i1 %c, <2 x i8> <1, 1>, <2 x i8> <0, 0> // because that may need 3 instructions to splat the condition value: // extend, insertelement, shufflevector. - if (CondVal->getType()->isVectorTy() == SelType->isVectorTy()) { + if (SelType->isIntOrIntVectorTy() && + CondVal->getType()->isVectorTy() == SelType->isVectorTy()) { // select C, 1, 0 -> zext C to int if (match(TrueVal, m_One()) && match(FalseVal, m_Zero())) return new ZExtInst(CondVal, SelType); @@ -1237,20 +1235,21 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // select C, 0, 1 -> zext !C to int if (match(TrueVal, m_Zero()) && match(FalseVal, m_One())) { - Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName()); + Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); return new ZExtInst(NotCond, SelType); } // select C, 0, -1 -> sext !C to int if (match(TrueVal, m_Zero()) && match(FalseVal, m_AllOnes())) { - Value *NotCond = Builder->CreateNot(CondVal, "not." + CondVal->getName()); + Value *NotCond = Builder.CreateNot(CondVal, "not." + CondVal->getName()); return new SExtInst(NotCond, SelType); } } if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal)) if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) - if (Value *V = foldSelectICmpAnd(SI, TrueValC, FalseValC, Builder)) + if (Value *V = foldSelectICmpAnd(SI, TrueValC->getValue(), + FalseValC->getValue(), Builder)) return replaceInstUsesWith(SI, V); // See if we are selecting two values based on a comparison of the two values. @@ -1288,10 +1287,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // (X ugt Y) ? X : Y -> (X ole Y) ? Y : X if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) { FCmpInst::Predicate InvPred = FCI->getInversePredicate(); - IRBuilder<>::FastMathFlagGuard FMFG(*Builder); - Builder->setFastMathFlags(FCI->getFastMathFlags()); - Value *NewCond = Builder->CreateFCmp(InvPred, TrueVal, FalseVal, - FCI->getName() + ".inv"); + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + Builder.setFastMathFlags(FCI->getFastMathFlags()); + Value *NewCond = Builder.CreateFCmp(InvPred, TrueVal, FalseVal, + FCI->getName() + ".inv"); return SelectInst::Create(NewCond, FalseVal, TrueVal, SI.getName() + ".p"); @@ -1331,10 +1330,10 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // (X ugt Y) ? X : Y -> (X ole Y) ? X : Y if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) { FCmpInst::Predicate InvPred = FCI->getInversePredicate(); - IRBuilder<>::FastMathFlagGuard FMFG(*Builder); - Builder->setFastMathFlags(FCI->getFastMathFlags()); - Value *NewCond = Builder->CreateFCmp(InvPred, FalseVal, TrueVal, - FCI->getName() + ".inv"); + IRBuilder<>::FastMathFlagGuard FMFG(Builder); + Builder.setFastMathFlags(FCI->getFastMathFlags()); + Value *NewCond = Builder.CreateFCmp(InvPred, FalseVal, TrueVal, + FCI->getName() + ".inv"); return SelectInst::Create(NewCond, FalseVal, TrueVal, SI.getName() + ".p"); @@ -1350,7 +1349,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (Instruction *Result = foldSelectInstWithICmp(SI, ICI)) return Result; - if (Instruction *Add = foldAddSubSelect(SI, *Builder)) + if (Instruction *Add = foldAddSubSelect(SI, Builder)) return Add; // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) @@ -1381,16 +1380,16 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { Value *Cmp; if (CmpInst::isIntPredicate(Pred)) { - Cmp = Builder->CreateICmp(Pred, LHS, RHS); + Cmp = Builder.CreateICmp(Pred, LHS, RHS); } else { - IRBuilder<>::FastMathFlagGuard FMFG(*Builder); + IRBuilder<>::FastMathFlagGuard FMFG(Builder); auto FMF = cast<FPMathOperator>(SI.getCondition())->getFastMathFlags(); - Builder->setFastMathFlags(FMF); - Cmp = Builder->CreateFCmp(Pred, LHS, RHS); + Builder.setFastMathFlags(FMF); + Cmp = Builder.CreateFCmp(Pred, LHS, RHS); } - Value *NewSI = Builder->CreateCast( - CastOp, Builder->CreateSelect(Cmp, LHS, RHS, SI.getName(), &SI), + Value *NewSI = Builder.CreateCast( + CastOp, Builder.CreateSelect(Cmp, LHS, RHS, SI.getName(), &SI), SelType); return replaceInstUsesWith(SI, NewSI); } @@ -1425,13 +1424,12 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { (SI.hasOneUse() && match(*SI.user_begin(), m_Not(m_Value()))); if (NumberOfNots >= 2) { - Value *NewLHS = Builder->CreateNot(LHS); - Value *NewRHS = Builder->CreateNot(RHS); - Value *NewCmp = SPF == SPF_SMAX - ? Builder->CreateICmpSLT(NewLHS, NewRHS) - : Builder->CreateICmpULT(NewLHS, NewRHS); + Value *NewLHS = Builder.CreateNot(LHS); + Value *NewRHS = Builder.CreateNot(RHS); + Value *NewCmp = SPF == SPF_SMAX ? Builder.CreateICmpSLT(NewLHS, NewRHS) + : Builder.CreateICmpULT(NewLHS, NewRHS); Value *NewSI = - Builder->CreateNot(Builder->CreateSelect(NewCmp, NewLHS, NewRHS)); + Builder.CreateNot(Builder.CreateSelect(NewCmp, NewLHS, NewRHS)); return replaceInstUsesWith(SI, NewSI); } } @@ -1461,7 +1459,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // We choose this as normal form to enable folding on the And and shortening // paths for the values (this helps GetUnderlyingObjects() for example). if (TrueSI->getFalseValue() == FalseVal && TrueSI->hasOneUse()) { - Value *And = Builder->CreateAnd(CondVal, TrueSI->getCondition()); + Value *And = Builder.CreateAnd(CondVal, TrueSI->getCondition()); SI.setOperand(0, And); SI.setOperand(1, TrueSI->getTrueValue()); return &SI; @@ -1479,7 +1477,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } // select(C0, a, select(C1, a, b)) -> select(C0|C1, a, b) if (FalseSI->getTrueValue() == TrueVal && FalseSI->hasOneUse()) { - Value *Or = Builder->CreateOr(CondVal, FalseSI->getCondition()); + Value *Or = Builder.CreateOr(CondVal, FalseSI->getCondition()); SI.setOperand(0, Or); SI.setOperand(2, FalseSI->getFalseValue()); return &SI; @@ -1541,7 +1539,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { return replaceInstUsesWith(SI, FalseVal); } - if (Instruction *BitCastSel = foldSelectCmpBitcasts(SI, *Builder)) + if (Instruction *BitCastSel = foldSelectCmpBitcasts(SI, Builder)) return BitCastSel; return nullptr; diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 1bb1a85367d1b..7ed141c7fd79d 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -47,7 +47,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { if (isKnownNonNegative(A, DL, 0, &AC, &I, &DT) && isKnownNonNegative(C, DL, 0, &AC, &I, &DT)) return BinaryOperator::Create( - I.getOpcode(), Builder->CreateBinOp(I.getOpcode(), Op0, C), A); + I.getOpcode(), Builder.CreateBinOp(I.getOpcode(), Op0, C), A); // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2. // Because shifts by negative values (which could occur if A were negative) @@ -56,8 +56,8 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) { // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't // demand the sign bit (and many others) here?? - Value *Rem = Builder->CreateAnd(A, ConstantInt::get(I.getType(), *B-1), - Op1->getName()); + Value *Rem = Builder.CreateAnd(A, ConstantInt::get(I.getType(), *B - 1), + Op1->getName()); I.setOperand(1, Rem); return &I; } @@ -260,9 +260,9 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, // We can always evaluate constants shifted. if (Constant *C = dyn_cast<Constant>(V)) { if (isLeftShift) - V = IC.Builder->CreateShl(C, NumBits); + V = IC.Builder.CreateShl(C, NumBits); else - V = IC.Builder->CreateLShr(C, NumBits); + V = IC.Builder.CreateLShr(C, NumBits); // If we got a constantexpr back, try to simplify it with TD info. if (auto *C = dyn_cast<Constant>(V)) if (auto *FoldedC = @@ -289,7 +289,7 @@ static Value *getShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, case Instruction::Shl: case Instruction::LShr: return foldShiftedShift(cast<BinaryOperator>(I), NumBits, isLeftShift, - *(IC.Builder)); + IC.Builder); case Instruction::Select: I->setOperand( @@ -353,7 +353,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, Constant *ShAmt = ConstantExpr::getZExt(cast<Constant>(Op1), TrOp->getType()); // (shift2 (shift1 & 0x00FF), c2) - Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); + Value *NSh = Builder.CreateBinOp(I.getOpcode(), TrOp, ShAmt, I.getName()); // For logical shifts, the truncation has the effect of making the high // part of the register be zeros. Emulate this by inserting an AND to @@ -375,9 +375,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, } // shift1 & 0x00FF - Value *And = Builder->CreateAnd(NSh, - ConstantInt::get(I.getContext(), MaskV), - TI->getName()); + Value *And = Builder.CreateAnd(NSh, + ConstantInt::get(I.getContext(), MaskV), + TI->getName()); // Return the value truncated to the interesting size. return new TruncInst(And, I.getType()); @@ -401,10 +401,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, match(Op0BO->getOperand(1), m_Shr(m_Value(V1), m_Specific(Op1)))) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); + Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); // (X + (Y << C)) - Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, - Op0BO->getOperand(1)->getName()); + Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), YS, V1, + Op0BO->getOperand(1)->getName()); unsigned Op1Val = Op1C->getLimitedValue(TypeBits); APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val); @@ -421,11 +421,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))), m_ConstantInt(CC)))) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(0), Op1, - Op0BO->getName()); + Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName()); // X & (CC << C) - Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), - V1->getName()+".mask"); + Value *XM = Builder.CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM); } LLVM_FALLTHROUGH; @@ -437,10 +436,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, match(Op0BO->getOperand(0), m_Shr(m_Value(V1), m_Specific(Op1)))) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); // (X + (Y << C)) - Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, - Op0BO->getOperand(0)->getName()); + Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), V1, YS, + Op0BO->getOperand(0)->getName()); unsigned Op1Val = Op1C->getLimitedValue(TypeBits); APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val); @@ -456,10 +455,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))), m_ConstantInt(CC))) && V2 == Op1) { Value *YS = // (Y << C) - Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); + Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName()); // X & (CC << C) - Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1), - V1->getName()+".mask"); + Value *XM = Builder.CreateAnd(V1, ConstantExpr::getShl(CC, Op1), + V1->getName()+".mask"); return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS); } @@ -502,7 +501,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1); Value *NewShift = - Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); + Builder.CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1); NewShift->takeName(Op0BO); return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, @@ -541,7 +540,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { unsigned SrcWidth = X->getType()->getScalarSizeInBits(); if (ShAmt < SrcWidth && MaskedValueIsZero(X, APInt::getHighBitsSet(SrcWidth, ShAmt), 0, &I)) - return new ZExtInst(Builder->CreateShl(X, ShAmt), Ty); + return new ZExtInst(Builder.CreateShl(X, ShAmt), Ty); } // (X >>u C) << C --> X & (-1 << C) @@ -641,7 +640,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { // ctpop.i32(x)>>5 --> zext(x == -1) bool IsPop = II->getIntrinsicID() == Intrinsic::ctpop; Constant *RHS = ConstantInt::getSigned(Ty, IsPop ? -1 : 0); - Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS); + Value *Cmp = Builder.CreateICmpEQ(II->getArgOperand(0), RHS); return new ZExtInst(Cmp, Ty); } @@ -658,7 +657,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return NewLShr; } // (X << C1) >>u C2 --> (X >>u (C2 - C1)) & (-1 >> C2) - Value *NewLShr = Builder->CreateLShr(X, ShiftDiff, "", I.isExact()); + Value *NewLShr = Builder.CreateLShr(X, ShiftDiff, "", I.isExact()); APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt)); return BinaryOperator::CreateAnd(NewLShr, ConstantInt::get(Ty, Mask)); } @@ -671,7 +670,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { return NewShl; } // (X << C1) >>u C2 --> X << (C1 - C2) & (-1 >> C2) - Value *NewShl = Builder->CreateShl(X, ShiftDiff); + Value *NewShl = Builder.CreateShl(X, ShiftDiff); APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt)); return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask)); } @@ -692,7 +691,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { // lshr (sext iM X to iN), N-1 --> zext (lshr X, M-1) to iN if (Op0->hasOneUse()) { - Value *NewLShr = Builder->CreateLShr(X, SrcTyBitWidth - 1); + Value *NewLShr = Builder.CreateLShr(X, SrcTyBitWidth - 1); return new ZExtInst(NewLShr, Ty); } } @@ -701,7 +700,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { if (ShAmt == BitWidth - SrcTyBitWidth && Op0->hasOneUse()) { // The new shift amount can't be more than the narrow source type. unsigned NewShAmt = std::min(ShAmt, SrcTyBitWidth - 1); - Value *AShr = Builder->CreateAShr(X, NewShAmt); + Value *AShr = Builder.CreateAShr(X, NewShAmt); return new ZExtInst(AShr, Ty); } } diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 03841164b58de..5689c06042391 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -548,7 +548,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) { // X % -1 demands all the bits because we don't want to introduce // INT_MIN % -1 (== undef) by accident. - if (Rem->isAllOnesValue()) + if (Rem->isMinusOne()) break; APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { @@ -1627,10 +1627,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I) Args.push_back(II->getArgOperand(I)); - IRBuilderBase::InsertPointGuard Guard(*Builder); - Builder->SetInsertPoint(II); + IRBuilderBase::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(II); - CallInst *NewCall = Builder->CreateCall(NewIntrin, Args); + CallInst *NewCall = Builder.CreateCall(NewIntrin, Args); NewCall->takeName(II); NewCall->copyMetadata(*II); @@ -1657,15 +1657,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (NewNumElts == 1) { - return Builder->CreateInsertElement(UndefValue::get(V->getType()), - NewCall, static_cast<uint64_t>(0)); + return Builder.CreateInsertElement(UndefValue::get(V->getType()), + NewCall, static_cast<uint64_t>(0)); } SmallVector<uint32_t, 8> EltMask; for (unsigned I = 0; I < VWidth; ++I) EltMask.push_back(I); - Value *Shuffle = Builder->CreateShuffleVector( + Value *Shuffle = Builder.CreateShuffleVector( NewCall, UndefValue::get(NewTy), EltMask); MadeChange = true; diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 926e46655eb86..dd71a31b644b3 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -204,11 +204,11 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { if (I->hasOneUse() && cheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) { Value *newEI0 = - Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), - EI.getName()+".lhs"); + Builder.CreateExtractElement(BO->getOperand(0), EI.getOperand(1), + EI.getName()+".lhs"); Value *newEI1 = - Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), - EI.getName()+".rhs"); + Builder.CreateExtractElement(BO->getOperand(1), EI.getOperand(1), + EI.getName()+".rhs"); return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), newEI0, newEI1, BO); } @@ -250,8 +250,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // Bitcasts can change the number of vector elements, and they cost // nothing. if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { - Value *EE = Builder->CreateExtractElement(CI->getOperand(0), - EI.getIndexOperand()); + Value *EE = Builder.CreateExtractElement(CI->getOperand(0), + EI.getIndexOperand()); Worklist.AddValue(EE); return CastInst::Create(CI->getOpcode(), EE, EI.getType()); } @@ -269,20 +269,20 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { Value *Cond = SI->getCondition(); if (Cond->getType()->isVectorTy()) { - Cond = Builder->CreateExtractElement(Cond, - EI.getIndexOperand(), - Cond->getName() + ".elt"); + Cond = Builder.CreateExtractElement(Cond, + EI.getIndexOperand(), + Cond->getName() + ".elt"); } Value *V1Elem - = Builder->CreateExtractElement(TrueVal, - EI.getIndexOperand(), - TrueVal->getName() + ".elt"); + = Builder.CreateExtractElement(TrueVal, + EI.getIndexOperand(), + TrueVal->getName() + ".elt"); Value *V2Elem - = Builder->CreateExtractElement(FalseVal, - EI.getIndexOperand(), - FalseVal->getName() + ".elt"); + = Builder.CreateExtractElement(FalseVal, + EI.getIndexOperand(), + FalseVal->getName() + ".elt"); return SelectInst::Create(Cond, V1Elem, V2Elem, @@ -837,7 +837,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE)) return Shuf; - if (Instruction *NewInsElt = hoistInsEltConst(IE, *Builder)) + if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder)) return NewInsElt; // Turn a sequence of inserts that broadcasts a scalar into a single @@ -1020,9 +1020,9 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { SmallVector<Constant *, 16> MaskValues; for (int i = 0, e = Mask.size(); i != e; ++i) { if (Mask[i] == -1) - MaskValues.push_back(UndefValue::get(Builder->getInt32Ty())); + MaskValues.push_back(UndefValue::get(Builder.getInt32Ty())); else - MaskValues.push_back(Builder->getInt32(Mask[i])); + MaskValues.push_back(Builder.getInt32(Mask[i])); } return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()), ConstantVector::get(MaskValues)); @@ -1095,7 +1095,7 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask); return InsertElementInst::Create(V, I->getOperand(1), - Builder->getInt32(Index), "", I); + Builder.getInt32(Index), "", I); } } llvm_unreachable("failed to reorder elements of vector instruction!"); @@ -1275,9 +1275,9 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { UndefValue::get(Int32Ty)); for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I) ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx); - V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()), - ConstantVector::get(ShuffleMask), - SVI.getName() + ".extract"); + V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()), + ConstantVector::get(ShuffleMask), + SVI.getName() + ".extract"); BegIdx = 0; } unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth; @@ -1287,10 +1287,10 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { auto *NewBC = BCAlreadyExists ? NewBCs[CastSrcTy] - : Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc"); + : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc"); if (!BCAlreadyExists) NewBCs[CastSrcTy] = NewBC; - auto *Ext = Builder->CreateExtractElement( + auto *Ext = Builder.CreateExtractElement( NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract"); // The shufflevector isn't being replaced: the bitcast that used it // is. InstCombine will visit the newly-created instructions. diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 723414635d6fb..90e2323991555 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -88,7 +88,7 @@ MaxArraySize("instcombine-maxarray-size", cl::init(1024), cl::desc("Maximum array size considered when doing a combine")); Value *InstCombiner::EmitGEPOffset(User *GEP) { - return llvm::EmitGEPOffset(Builder, DL, GEP); + return llvm::EmitGEPOffset(&Builder, DL, GEP); } /// Return true if it is desirable to convert an integer computation from a @@ -498,8 +498,7 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode, /// This tries to simplify binary operations by factorizing out common terms /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). -Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder, - BinaryOperator &I, +Value *InstCombiner::tryFactorization(BinaryOperator &I, Instruction::BinaryOps InnerOpcode, Value *A, Value *B, Value *C, Value *D) { assert(A && B && C && D && "All values must be provided"); @@ -525,9 +524,9 @@ Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder, // If "B op D" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. if (!V && LHS->hasOneUse() && RHS->hasOneUse()) - V = Builder->CreateBinOp(TopLevelOpcode, B, D, RHS->getName()); + V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName()); if (V) { - SimplifiedInst = Builder->CreateBinOp(InnerOpcode, A, V); + SimplifiedInst = Builder.CreateBinOp(InnerOpcode, A, V); } } @@ -545,9 +544,9 @@ Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder, // If "A op C" doesn't simplify then only go on if both of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. if (!V && LHS->hasOneUse() && RHS->hasOneUse()) - V = Builder->CreateBinOp(TopLevelOpcode, A, C, LHS->getName()); + V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName()); if (V) { - SimplifiedInst = Builder->CreateBinOp(InnerOpcode, V, B); + SimplifiedInst = Builder.CreateBinOp(InnerOpcode, V, B); } } @@ -610,7 +609,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { // The instruction has the form "(A op' B) op (C op' D)". Try to factorize // a common term. if (Op0 && Op1 && LHSOpcode == RHSOpcode) - if (Value *V = tryFactorization(Builder, I, LHSOpcode, A, B, C, D)) + if (Value *V = tryFactorization(I, LHSOpcode, A, B, C, D)) return V; // The instruction has the form "(A op' B) op (C)". Try to factorize common @@ -618,7 +617,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { if (Op0) if (Value *Ident = getIdentityValue(LHSOpcode, RHS)) if (Value *V = - tryFactorization(Builder, I, LHSOpcode, A, B, RHS, Ident)) + tryFactorization(I, LHSOpcode, A, B, RHS, Ident)) return V; // The instruction has the form "(B) op (C op' D)". Try to factorize common @@ -626,7 +625,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { if (Op1) if (Value *Ident = getIdentityValue(RHSOpcode, LHS)) if (Value *V = - tryFactorization(Builder, I, RHSOpcode, LHS, Ident, C, D)) + tryFactorization(I, RHSOpcode, LHS, Ident, C, D)) return V; } @@ -644,7 +643,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { SimplifyBinOp(TopLevelOpcode, B, C, SQ.getWithInstruction(&I))) { // They do! Return "L op' R". ++NumExpand; - C = Builder->CreateBinOp(InnerOpcode, L, R); + C = Builder.CreateBinOp(InnerOpcode, L, R); C->takeName(&I); return C; } @@ -663,7 +662,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { SimplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I))) { // They do! Return "L op' R". ++NumExpand; - A = Builder->CreateBinOp(InnerOpcode, L, R); + A = Builder.CreateBinOp(InnerOpcode, L, R); A->takeName(&I); return A; } @@ -678,18 +677,18 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(), SI1->getFalseValue(), SQ.getWithInstruction(&I))) - SI = Builder->CreateSelect(SI0->getCondition(), - Builder->CreateBinOp(TopLevelOpcode, - SI0->getTrueValue(), - SI1->getTrueValue()), - V); + SI = Builder.CreateSelect(SI0->getCondition(), + Builder.CreateBinOp(TopLevelOpcode, + SI0->getTrueValue(), + SI1->getTrueValue()), + V); if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(), SI1->getTrueValue(), SQ.getWithInstruction(&I))) - SI = Builder->CreateSelect( + SI = Builder.CreateSelect( SI0->getCondition(), V, - Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(), - SI1->getFalseValue())); + Builder.CreateBinOp(TopLevelOpcode, SI0->getFalseValue(), + SI1->getFalseValue())); if (SI) { SI->takeName(&I); return SI; @@ -751,9 +750,9 @@ Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const { } static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, - InstCombiner *IC) { + InstCombiner::BuilderTy &Builder) { if (auto *Cast = dyn_cast<CastInst>(&I)) - return IC->Builder->CreateCast(Cast->getOpcode(), SO, I.getType()); + return Builder.CreateCast(Cast->getOpcode(), SO, I.getType()); assert(I.isBinaryOp() && "Unexpected opcode for select folding"); @@ -772,8 +771,8 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, std::swap(Op0, Op1); auto *BO = cast<BinaryOperator>(&I); - Value *RI = IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1, - SO->getName() + ".op"); + Value *RI = Builder.CreateBinOp(BO->getOpcode(), Op0, Op1, + SO->getName() + ".op"); auto *FPInst = dyn_cast<Instruction>(RI); if (FPInst && isa<FPMathOperator>(FPInst)) FPInst->copyFastMathFlags(BO); @@ -791,7 +790,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { return nullptr; // Bool selects with constant operands can be folded to logical ops. - if (SI->getType()->getScalarType()->isIntegerTy(1)) + if (SI->getType()->isIntOrIntVectorTy(1)) return nullptr; // If it's a bitcast involving vectors, make sure it has the same number of @@ -825,13 +824,13 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { } } - Value *NewTV = foldOperationIntoSelectOperand(Op, TV, this); - Value *NewFV = foldOperationIntoSelectOperand(Op, FV, this); + Value *NewTV = foldOperationIntoSelectOperand(Op, TV, Builder); + Value *NewFV = foldOperationIntoSelectOperand(Op, FV, Builder); return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI); } static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV, - InstCombiner *IC) { + InstCombiner::BuilderTy &Builder) { bool ConstIsRHS = isa<Constant>(I->getOperand(1)); Constant *C = cast<Constant>(I->getOperand(ConstIsRHS)); @@ -845,7 +844,7 @@ static Value *foldOperationIntoPhiValue(BinaryOperator *I, Value *InV, if (!ConstIsRHS) std::swap(Op0, Op1); - Value *RI = IC->Builder->CreateBinOp(I->getOpcode(), Op0, Op1, "phitmp"); + Value *RI = Builder.CreateBinOp(I->getOpcode(), Op0, Op1, "phitmp"); auto *FPInst = dyn_cast<Instruction>(RI); if (FPInst && isa<FPMathOperator>(FPInst)) FPInst->copyFastMathFlags(I); @@ -916,7 +915,7 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) { // If we are going to have to insert a new computation, do so right before the // predecessor's terminator. if (NonConstBB) - Builder->SetInsertPoint(NonConstBB->getTerminator()); + Builder.SetInsertPoint(NonConstBB->getTerminator()); // Next, add all of the operands to the PHI. if (SelectInst *SI = dyn_cast<SelectInst>(&I)) { @@ -948,9 +947,9 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) { // folded to TrueVInPred or FalseVInPred as done for ConstantInt. For // non-vector phis, this transformation was always profitable because // the select would be generated exactly once in the NonConstBB. - Builder->SetInsertPoint(ThisBB->getTerminator()); - InV = Builder->CreateSelect(PN->getIncomingValue(i), - TrueVInPred, FalseVInPred, "phitmp"); + Builder.SetInsertPoint(ThisBB->getTerminator()); + InV = Builder.CreateSelect(PN->getIncomingValue(i), TrueVInPred, + FalseVInPred, "phitmp"); } NewPN->addIncoming(InV, ThisBB); } @@ -961,16 +960,17 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) { if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); else if (isa<ICmpInst>(CI)) - InV = Builder->CreateICmp(CI->getPredicate(), PN->getIncomingValue(i), - C, "phitmp"); + InV = Builder.CreateICmp(CI->getPredicate(), PN->getIncomingValue(i), + C, "phitmp"); else - InV = Builder->CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i), - C, "phitmp"); + InV = Builder.CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i), + C, "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } else if (auto *BO = dyn_cast<BinaryOperator>(&I)) { for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV = foldOperationIntoPhiValue(BO, PN->getIncomingValue(i), this); + Value *InV = foldOperationIntoPhiValue(BO, PN->getIncomingValue(i), + Builder); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } else { @@ -981,8 +981,8 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) { if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i))) InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy); else - InV = Builder->CreateCast(CI->getOpcode(), - PN->getIncomingValue(i), I.getType(), "phitmp"); + InV = Builder.CreateCast(CI->getOpcode(), PN->getIncomingValue(i), + I.getType(), "phitmp"); NewPN->addIncoming(InV, PN->getIncomingBlock(i)); } } @@ -1328,8 +1328,8 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { /// \brief Creates node of binary operation with the same attributes as the /// specified one but with other operands. static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS, - InstCombiner::BuilderTy *B) { - Value *BO = B->CreateBinOp(Inst.getOpcode(), LHS, RHS); + InstCombiner::BuilderTy &B) { + Value *BO = B.CreateBinOp(Inst.getOpcode(), LHS, RHS); // If LHS and RHS are constant, BO won't be a binary operator. if (BinaryOperator *NewBO = dyn_cast<BinaryOperator>(BO)) NewBO->copyIRFlags(&Inst); @@ -1365,7 +1365,7 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { LShuf->getOperand(0)->getType() == RShuf->getOperand(0)->getType()) { Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0), RShuf->getOperand(0), Builder); - return Builder->CreateShuffleVector( + return Builder.CreateShuffleVector( NewBO, UndefValue::get(NewBO->getType()), LShuf->getMask()); } @@ -1404,7 +1404,7 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { Value *NewLHS = isa<Constant>(LHS) ? C2 : Shuffle->getOperand(0); Value *NewRHS = isa<Constant>(LHS) ? Shuffle->getOperand(0) : C2; Value *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder); - return Builder->CreateShuffleVector(NewBO, + return Builder.CreateShuffleVector(NewBO, UndefValue::get(Inst.getType()), Shuffle->getMask()); } } @@ -1452,7 +1452,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // If we are using a wider index than needed for this platform, shrink // it to what we need. If narrower, sign-extend it to what we need. // This explicit cast can make subsequent optimizations more obvious. - *I = Builder->CreateIntCast(*I, NewIndexType, true); + *I = Builder.CreateIntCast(*I, NewIndexType, true); MadeChange = true; } } @@ -1546,10 +1546,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // set that index. PHINode *NewPN; { - IRBuilderBase::InsertPointGuard Guard(*Builder); - Builder->SetInsertPoint(PN); - NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(), - PN->getNumOperands()); + IRBuilderBase::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(PN); + NewPN = Builder.CreatePHI(Op1->getOperand(DI)->getType(), + PN->getNumOperands()); } for (auto &I : PN->operands()) @@ -1669,8 +1669,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // pointer arithmetic. if (match(V, m_Neg(m_PtrToInt(m_Value())))) { Operator *Index = cast<Operator>(V); - Value *PtrToInt = Builder->CreatePtrToInt(PtrOp, Index->getType()); - Value *NewSub = Builder->CreateSub(PtrToInt, Index->getOperand(1)); + Value *PtrToInt = Builder.CreatePtrToInt(PtrOp, Index->getType()); + Value *NewSub = Builder.CreateSub(PtrToInt, Index->getOperand(1)); return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType()); } // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) @@ -1723,7 +1723,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // -> // %0 = GEP i8 addrspace(1)* X, ... // addrspacecast i8 addrspace(1)* %0 to i8* - return new AddrSpaceCastInst(Builder->Insert(Res), GEP.getType()); + return new AddrSpaceCastInst(Builder.Insert(Res), GEP.getType()); } if (ArrayType *XATy = @@ -1751,10 +1751,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // addrspacecast i8 addrspace(1)* %0 to i8* SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end()); Value *NewGEP = GEP.isInBounds() - ? Builder->CreateInBoundsGEP( + ? Builder.CreateInBoundsGEP( nullptr, StrippedPtr, Idx, GEP.getName()) - : Builder->CreateGEP(nullptr, StrippedPtr, Idx, - GEP.getName()); + : Builder.CreateGEP(nullptr, StrippedPtr, Idx, + GEP.getName()); return new AddrSpaceCastInst(NewGEP, GEP.getType()); } } @@ -1772,9 +1772,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) }; Value *NewGEP = GEP.isInBounds() - ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, Idx, - GEP.getName()) - : Builder->CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName()); + ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, Idx, + GEP.getName()) + : Builder.CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName()); // V and GEP are both pointer types --> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, @@ -1807,10 +1807,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // GEP may not be "inbounds". Value *NewGEP = GEP.isInBounds() && NSW - ? Builder->CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx, - GEP.getName()) - : Builder->CreateGEP(nullptr, StrippedPtr, NewIdx, - GEP.getName()); + ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx, + GEP.getName()) + : Builder.CreateGEP(nullptr, StrippedPtr, NewIdx, + GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, @@ -1849,10 +1849,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { NewIdx}; Value *NewGEP = GEP.isInBounds() && NSW - ? Builder->CreateInBoundsGEP( + ? Builder.CreateInBoundsGEP( SrcElTy, StrippedPtr, Off, GEP.getName()) - : Builder->CreateGEP(SrcElTy, StrippedPtr, Off, - GEP.getName()); + : Builder.CreateGEP(SrcElTy, StrippedPtr, Off, + GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEP.getType()); @@ -1916,8 +1916,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) { Value *NGEP = GEP.isInBounds() - ? Builder->CreateInBoundsGEP(nullptr, Operand, NewIndices) - : Builder->CreateGEP(nullptr, Operand, NewIndices); + ? Builder.CreateInBoundsGEP(nullptr, Operand, NewIndices) + : Builder.CreateGEP(nullptr, Operand, NewIndices); if (NGEP->getType() == GEP.getType()) return replaceInstUsesWith(GEP, NGEP); @@ -2166,8 +2166,8 @@ Instruction *InstCombiner::visitFree(CallInst &FI) { // free undef -> unreachable. if (isa<UndefValue>(Op)) { // Insert a new store to null because we cannot modify the CFG here. - Builder->CreateStore(ConstantInt::getTrue(FI.getContext()), - UndefValue::get(Type::getInt1PtrTy(FI.getContext()))); + Builder.CreateStore(ConstantInt::getTrue(FI.getContext()), + UndefValue::get(Type::getInt1PtrTy(FI.getContext()))); return eraseInstFromFunction(FI); } @@ -2281,8 +2281,8 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { // the backend should extend back to a legal type for the target. if (NewWidth > 0 && NewWidth < Known.getBitWidth()) { IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth); - Builder->SetInsertPoint(&SI); - Value *NewCond = Builder->CreateTrunc(Cond, Ty, "trunc"); + Builder.SetInsertPoint(&SI); + Value *NewCond = Builder.CreateTrunc(Cond, Ty, "trunc"); SI.setCondition(NewCond); for (auto Case : SI.cases()) { @@ -2339,8 +2339,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // %E = insertvalue { i32 } %X, i32 42, 0 // by switching the order of the insert and extract (though the // insertvalue should be left in, since it may have other uses). - Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(), - EV.getIndices()); + Value *NewEV = Builder.CreateExtractValue(IV->getAggregateOperand(), + EV.getIndices()); return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(), makeArrayRef(insi, inse)); } @@ -2415,17 +2415,17 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // extractvalue has integer indices, getelementptr has Value*s. Convert. SmallVector<Value*, 4> Indices; // Prefix an i32 0 since we need the first element. - Indices.push_back(Builder->getInt32(0)); + Indices.push_back(Builder.getInt32(0)); for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end(); I != E; ++I) - Indices.push_back(Builder->getInt32(*I)); + Indices.push_back(Builder.getInt32(*I)); // We need to insert these at the location of the old load, not at that of // the extractvalue. - Builder->SetInsertPoint(L); - Value *GEP = Builder->CreateInBoundsGEP(L->getType(), - L->getPointerOperand(), Indices); - Instruction *NL = Builder->CreateLoad(GEP); + Builder.SetInsertPoint(L); + Value *GEP = Builder.CreateInBoundsGEP(L->getType(), + L->getPointerOperand(), Indices); + Instruction *NL = Builder.CreateLoad(GEP); // Whatever aliasing information we had for the orignal load must also // hold for the smaller load, so propagate the annotations. AAMDNodes Nodes; @@ -2922,8 +2922,8 @@ bool InstCombiner::run() { } // Now that we have an instruction, try combining it to simplify it. - Builder->SetInsertPoint(I); - Builder->SetCurrentDebugLocation(I->getDebugLoc()); + Builder.SetInsertPoint(I); + Builder.SetCurrentDebugLocation(I->getDebugLoc()); #ifndef NDEBUG std::string OrigI; @@ -3160,7 +3160,7 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist, MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist); - InstCombiner IC(Worklist, &Builder, F.optForMinSize(), ExpensiveCombines, + InstCombiner IC(Worklist, Builder, F.optForMinSize(), ExpensiveCombines, AA, AC, TLI, DT, DL, LI); IC.MaxArraySizeForCombine = MaxArraySize; diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 7eea44d6aca03..184940b7ea583 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1230,7 +1230,7 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass, if (auto *Vector = dyn_cast<ConstantVector>(Mask)) { // dyn_cast as we might get UndefValue if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) { - if (Masked->isNullValue()) + if (Masked->isZero()) // Mask is constant false, so no instrumentation needed. continue; // If we have a true or undef value, fall through to doInstrumentAddress diff --git a/lib/Transforms/Instrumentation/CFGMST.h b/lib/Transforms/Instrumentation/CFGMST.h index 3802f9fbf7dbe..16e2e6b4e7304 100644 --- a/lib/Transforms/Instrumentation/CFGMST.h +++ b/lib/Transforms/Instrumentation/CFGMST.h @@ -12,6 +12,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H +#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H + #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -24,10 +27,10 @@ #include <utility> #include <vector> -namespace llvm { - #define DEBUG_TYPE "cfgmst" +namespace llvm { + /// \brief An union-find based Minimum Spanning Tree for CFG /// /// Implements a Union-find algorithm to compute Minimum Spanning Tree @@ -220,5 +223,8 @@ public: } }; -#undef DEBUG_TYPE // "cfgmst" } // end namespace llvm + +#undef DEBUG_TYPE // "cfgmst" + +#endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index 9c14b0149fdc1..db8fa89779479 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -112,7 +112,7 @@ cl::opt<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore, cl::desc("Do counter register promotion"), cl::init(false)); cl::opt<unsigned> MaxNumOfPromotionsPerLoop( - cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(10), + cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20), cl::desc("Max number counter promotions per loop to avoid" " increasing register pressure too much")); @@ -121,10 +121,21 @@ cl::opt<int> MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1), cl::desc("Max number of allowed counter promotions")); -cl::opt<bool> SpeculativeCounterPromotion( - cl::ZeroOrMore, "speculative-counter-promotion", cl::init(false), - cl::desc("Allow counter promotion for loops with multiple exiting blocks " - " or top-tested loops. ")); +cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting( + cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3), + cl::desc("The max number of exiting blocks of a loop to allow " + " speculative counter promotion")); + +cl::opt<bool> SpeculativeCounterPromotionToLoop( + cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false), + cl::desc("When the option is false, if the target block is in a loop, " + "the promotion will be disallowed unless the promoted counter " + " update can be further/iteratively promoted into an acyclic " + " region.")); + +cl::opt<bool> IterativeCounterPromotion( + cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true), + cl::desc("Allow counter promotion across the whole loop nest.")); class InstrProfilingLegacyPass : public ModulePass { InstrProfiling InstrProf; @@ -150,6 +161,7 @@ public: } }; +/// /// A helper class to promote one counter RMW operation in the loop /// into register update. /// @@ -158,16 +170,19 @@ public: /// class PGOCounterPromoterHelper : public LoadAndStorePromoter { public: - PGOCounterPromoterHelper(Instruction *L, Instruction *S, SSAUpdater &SSA, - Value *Init, BasicBlock *PH, - ArrayRef<BasicBlock *> ExitBlocks, - ArrayRef<Instruction *> InsertPts) + PGOCounterPromoterHelper( + Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init, + BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks, + ArrayRef<Instruction *> InsertPts, + DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands, + LoopInfo &LI) : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks), - InsertPts(InsertPts) { + InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) { assert(isa<LoadInst>(L)); assert(isa<StoreInst>(S)); SSA.AddAvailableValue(PH, Init); } + void doExtraRewritesBeforeFinalDeletion() const override { for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = ExitBlocks[i]; @@ -179,12 +194,21 @@ public: Value *Addr = cast<StoreInst>(Store)->getPointerOperand(); IRBuilder<> Builder(InsertPos); if (AtomicCounterUpdatePromoted) + // automic update currently can only be promoted across the current + // loop, not the whole loop nest. Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue, AtomicOrdering::SequentiallyConsistent); else { LoadInst *OldVal = Builder.CreateLoad(Addr, "pgocount.promoted"); auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue); - Builder.CreateStore(NewVal, Addr); + auto *NewStore = Builder.CreateStore(NewVal, Addr); + + // Now update the parent loop's candidate list: + if (IterativeCounterPromotion) { + auto *TargetLoop = LI.getLoopFor(ExitBlock); + if (TargetLoop) + LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore); + } } } } @@ -193,6 +217,8 @@ private: Instruction *Store; ArrayRef<BasicBlock *> ExitBlocks; ArrayRef<Instruction *> InsertPts; + DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates; + LoopInfo &LI; }; /// A helper class to do register promotion for all profile counter @@ -200,12 +226,15 @@ private: /// class PGOCounterPromoter { public: - PGOCounterPromoter(ArrayRef<LoadStorePair> Cands, Loop &Loop) - : Candidates(Cands), ExitBlocks(), InsertPts(), ParentLoop(Loop) { + PGOCounterPromoter( + DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands, + Loop &CurLoop, LoopInfo &LI) + : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop), + LI(LI) { SmallVector<BasicBlock *, 8> LoopExitBlocks; SmallPtrSet<BasicBlock *, 8> BlockSet; - ParentLoop.getExitBlocks(LoopExitBlocks); + L.getExitBlocks(LoopExitBlocks); for (BasicBlock *ExitBlock : LoopExitBlocks) { if (BlockSet.insert(ExitBlock).second) { @@ -216,55 +245,97 @@ public: } bool run(int64_t *NumPromoted) { - // We can't insert into a catchswitch. - bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) { - return isa<CatchSwitchInst>(Exit->getTerminator()); - }); - - if (HasCatchSwitch) - return false; - - if (!ParentLoop.hasDedicatedExits()) - return false; - - BasicBlock *PH = ParentLoop.getLoopPreheader(); - if (!PH) - return false; - - BasicBlock *H = ParentLoop.getHeader(); - bool TopTested = - ((ParentLoop.getBlocks().size() > 1) && ParentLoop.isLoopExiting(H)); - if (!SpeculativeCounterPromotion && - (TopTested || ParentLoop.getExitingBlock() == nullptr)) + unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L); + if (MaxProm == 0) return false; unsigned Promoted = 0; - for (auto &Cand : Candidates) { + for (auto &Cand : LoopToCandidates[&L]) { SmallVector<PHINode *, 4> NewPHIs; SSAUpdater SSA(&NewPHIs); Value *InitVal = ConstantInt::get(Cand.first->getType(), 0); + PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal, - PH, ExitBlocks, InsertPts); + L.getLoopPreheader(), ExitBlocks, + InsertPts, LoopToCandidates, LI); Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second})); Promoted++; - if (Promoted >= MaxNumOfPromotionsPerLoop) + if (Promoted >= MaxProm) break; + (*NumPromoted)++; if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions) break; } DEBUG(dbgs() << Promoted << " counters promoted for loop (depth=" - << ParentLoop.getLoopDepth() << ")\n"); + << L.getLoopDepth() << ")\n"); return Promoted != 0; } private: - ArrayRef<LoadStorePair> Candidates; + bool allowSpeculativeCounterPromotion(Loop *LP) { + SmallVector<BasicBlock *, 8> ExitingBlocks; + L.getExitingBlocks(ExitingBlocks); + // Not considierered speculative. + if (ExitingBlocks.size() == 1) + return true; + if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting) + return false; + return true; + } + + // Returns the max number of Counter Promotions for LP. + unsigned getMaxNumOfPromotionsInLoop(Loop *LP) { + // We can't insert into a catchswitch. + SmallVector<BasicBlock *, 8> LoopExitBlocks; + LP->getExitBlocks(LoopExitBlocks); + if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) { + return isa<CatchSwitchInst>(Exit->getTerminator()); + })) + return 0; + + if (!LP->hasDedicatedExits()) + return 0; + + BasicBlock *PH = LP->getLoopPreheader(); + if (!PH) + return 0; + + SmallVector<BasicBlock *, 8> ExitingBlocks; + LP->getExitingBlocks(ExitingBlocks); + // Not considierered speculative. + if (ExitingBlocks.size() == 1) + return MaxNumOfPromotionsPerLoop; + + if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting) + return 0; + + // Whether the target block is in a loop does not matter: + if (SpeculativeCounterPromotionToLoop) + return MaxNumOfPromotionsPerLoop; + + // Now check the target block: + unsigned MaxProm = MaxNumOfPromotionsPerLoop; + for (auto *TargetBlock : LoopExitBlocks) { + auto *TargetLoop = LI.getLoopFor(TargetBlock); + if (!TargetLoop) + continue; + unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop); + unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size(); + MaxProm = + std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) - + PendingCandsInTarget); + } + return MaxProm; + } + + DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates; SmallVector<BasicBlock *, 8> ExitBlocks; SmallVector<Instruction *, 8> InsertPts; - Loop &ParentLoop; + Loop &L; + LoopInfo &LI; }; } // end anonymous namespace @@ -349,8 +420,10 @@ void InstrProfiling::promoteCounterLoadStores(Function *F) { SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder(); - for (auto *Loop : Loops) { - PGOCounterPromoter Promoter(LoopPromotionCandidates[Loop], *Loop); + // Do a post-order traversal of the loops so that counter updates can be + // iteratively hoisted outside the loop nest. + for (auto *Loop : llvm::reverse(Loops)) { + PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI); Promoter.run(&TotalCountersPromoted); } } diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h index 363539b2886f3..4eb758c69c581 100644 --- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h +++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H -#define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H +#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H +#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/IR/BasicBlock.h" @@ -108,4 +108,4 @@ namespace llvm { } // End llvm namespace -#endif +#endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_MAXIMUMSPANNINGTREE_H diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index df4ee9969c02f..1348e0ed0ed00 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2918,8 +2918,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { if (ClDumpStrictInstructions) dumpInst(I); DEBUG(dbgs() << "DEFAULT: " << I << "\n"); - for (size_t i = 0, n = I.getNumOperands(); i < n; i++) - insertShadowCheck(I.getOperand(i), &I); + for (size_t i = 0, n = I.getNumOperands(); i < n; i++) { + Value *Operand = I.getOperand(i); + if (Operand->getType()->isSized()) + insertShadowCheck(Operand, &I); + } setShadow(&I, getCleanShadow(&I)); setOrigin(&I, getCleanOrigin()); } diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 0e7d11c553977..8e4bfc0b91bc5 100644 --- a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -224,7 +224,7 @@ std::string getBranchCondString(Instruction *TI) { OS << "_Zero"; else if (CV->isOne()) OS << "_One"; - else if (CV->isAllOnesValue()) + else if (CV->isMinusOne()) OS << "_MinusOne"; else OS << "_Const"; diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index a991792bf5a39..ec6904486e109 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -379,10 +379,11 @@ void ThreadSanitizer::chooseInstructionsToInstrument( } static bool isAtomic(Instruction *I) { + // TODO: Ask TTI whether synchronization scope is between threads. if (LoadInst *LI = dyn_cast<LoadInst>(I)) - return LI->isAtomic() && LI->getSynchScope() == CrossThread; + return LI->isAtomic() && LI->getSyncScopeID() != SyncScope::SingleThread; if (StoreInst *SI = dyn_cast<StoreInst>(I)) - return SI->isAtomic() && SI->getSynchScope() == CrossThread; + return SI->isAtomic() && SI->getSyncScopeID() != SyncScope::SingleThread; if (isa<AtomicRMWInst>(I)) return true; if (isa<AtomicCmpXchgInst>(I)) @@ -676,7 +677,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) { I->eraseFromParent(); } else if (FenceInst *FI = dyn_cast<FenceInst>(I)) { Value *Args[] = {createOrdering(&IRB, FI->getOrdering())}; - Function *F = FI->getSynchScope() == SingleThread ? + Function *F = FI->getSyncScopeID() == SyncScope::SingleThread ? TsanAtomicSignalFence : TsanAtomicThreadFence; CallInst *C = CallInst::Create(F, Args); ReplaceInstWithInst(I, C); diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp index a49c9b68c97d0..122c9314e022a 100644 --- a/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -44,6 +44,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include <tuple> using namespace llvm; @@ -55,7 +56,7 @@ STATISTIC(NumConstantsHoisted, "Number of constants hoisted"); STATISTIC(NumConstantsRebased, "Number of constants rebased"); static cl::opt<bool> ConstHoistWithBlockFrequency( - "consthoist-with-block-frequency", cl::init(false), cl::Hidden, + "consthoist-with-block-frequency", cl::init(true), cl::Hidden, cl::desc("Enable the use of the block frequency analysis to reduce the " "chance to execute const materialization more frequently than " "without hoisting.")); @@ -231,7 +232,8 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, // Return the optimal insert points in BBs. if (Node == Entry) { BBs.clear(); - if (InsertPtsFreq > BFI.getBlockFreq(Node)) + if (InsertPtsFreq > BFI.getBlockFreq(Node) || + (InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1)) BBs.insert(Entry); else BBs.insert(InsertPts.begin(), InsertPts.end()); @@ -244,7 +246,15 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, SmallPtrSet<BasicBlock *, 16> &ParentInsertPts = InsertPtsMap[Parent].first; BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second; // Choose to insert in Node or in subtree of Node. - if (InsertPtsFreq > BFI.getBlockFreq(Node) || NodeInBBs) { + // Don't hoist to EHPad because we may not find a proper place to insert + // in EHPad. + // If the total frequency of InsertPts is the same as the frequency of the + // target Node, and InsertPts contains more than one nodes, choose hoisting + // to reduce code size. + if (NodeInBBs || + (!Node->isEHPad() && + (InsertPtsFreq > BFI.getBlockFreq(Node) || + (InsertPtsFreq == BFI.getBlockFreq(Node) && InsertPts.size() > 1)))) { ParentInsertPts.insert(Node); ParentPtsFreq += BFI.getBlockFreq(Node); } else { @@ -392,42 +402,15 @@ void ConstantHoistingPass::collectConstantCandidates( if (Inst->isCast()) return; - // Can't handle inline asm. Skip it. - if (auto Call = dyn_cast<CallInst>(Inst)) - if (isa<InlineAsm>(Call->getCalledValue())) - return; - - // Switch cases must remain constant, and if the value being tested is - // constant the entire thing should disappear. - if (isa<SwitchInst>(Inst)) - return; - - // Static allocas (constant size in the entry block) are handled by - // prologue/epilogue insertion so they're free anyway. We definitely don't - // want to make them non-constant. - auto AI = dyn_cast<AllocaInst>(Inst); - if (AI && AI->isStaticAlloca()) - return; - - // Constants in GEPs that index into a struct type should not be hoisted. - if (isa<GetElementPtrInst>(Inst)) { - gep_type_iterator GTI = gep_type_begin(Inst); - - // Collect constant for first operand. - collectConstantCandidates(ConstCandMap, Inst, 0); - // Scan rest operands. - for (unsigned Idx = 1, E = Inst->getNumOperands(); Idx != E; ++Idx, ++GTI) { - // Only collect constants that index into a non struct type. - if (!GTI.isStruct()) { - collectConstantCandidates(ConstCandMap, Inst, Idx); - } - } - return; - } - // Scan all operands. for (unsigned Idx = 0, E = Inst->getNumOperands(); Idx != E; ++Idx) { - collectConstantCandidates(ConstCandMap, Inst, Idx); + // The cost of materializing the constants (defined in + // `TargetTransformInfo::getIntImmCost`) for instructions which only take + // constant variables is lower than `TargetTransformInfo::TCC_Basic`. So + // it's safe for us to collect constant candidates from all IntrinsicInsts. + if (canReplaceOperandWithVariable(Inst, Idx) || isa<IntrinsicInst>(Inst)) { + collectConstantCandidates(ConstCandMap, Inst, Idx); + } } // end of for all operands } diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 0f92760a874b5..7fd77a082b822 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -670,7 +670,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (auto *KnownCond = AvailableValues.lookup(CondI)) { // Is the condition known to be true? if (isa<ConstantInt>(KnownCond) && - cast<ConstantInt>(KnownCond)->isOneValue()) { + cast<ConstantInt>(KnownCond)->isOne()) { DEBUG(dbgs() << "EarlyCSE removing guard: " << *Inst << '\n'); removeMSSA(Inst); Inst->eraseFromParent(); diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index c0f628eb61e61..0fe72f3f73318 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -80,10 +80,9 @@ MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore, struct llvm::GVN::Expression { uint32_t opcode; Type *type; - bool commutative; SmallVector<uint32_t, 4> varargs; - Expression(uint32_t o = ~2U) : opcode(o), commutative(false) {} + Expression(uint32_t o = ~2U) : opcode(o) {} bool operator==(const Expression &other) const { if (opcode != other.opcode) @@ -247,7 +246,6 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) { assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!"); if (e.varargs[0] > e.varargs[1]) std::swap(e.varargs[0], e.varargs[1]); - e.commutative = true; } if (CmpInst *C = dyn_cast<CmpInst>(I)) { @@ -258,7 +256,6 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) { Predicate = CmpInst::getSwappedPredicate(Predicate); } e.opcode = (C->getOpcode() << 8) | Predicate; - e.commutative = true; } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) { for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); II != IE; ++II) @@ -284,7 +281,6 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode, Predicate = CmpInst::getSwappedPredicate(Predicate); } e.opcode = (Opcode << 8) | Predicate; - e.commutative = true; return e; } @@ -352,25 +348,25 @@ GVN::ValueTable::~ValueTable() = default; /// add - Insert a value into the table with a specified value number. void GVN::ValueTable::add(Value *V, uint32_t num) { valueNumbering.insert(std::make_pair(V, num)); - if (PHINode *PN = dyn_cast<PHINode>(V)) - NumberingPhi[num] = PN; } uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) { if (AA->doesNotAccessMemory(C)) { Expression exp = createExpr(C); - uint32_t e = assignExpNewValueNum(exp).first; + uint32_t &e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; valueNumbering[C] = e; return e; } else if (AA->onlyReadsMemory(C)) { Expression exp = createExpr(C); - auto ValNum = assignExpNewValueNum(exp); - if (ValNum.second) { - valueNumbering[C] = ValNum.first; - return ValNum.first; + uint32_t &e = expressionNumbering[exp]; + if (!e) { + e = nextValueNumber++; + valueNumbering[C] = e; + return e; } if (!MD) { - uint32_t e = assignExpNewValueNum(exp).first; + e = nextValueNumber++; valueNumbering[C] = e; return e; } @@ -526,29 +522,23 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) { case Instruction::ExtractValue: exp = createExtractvalueExpr(cast<ExtractValueInst>(I)); break; - case Instruction::PHI: - valueNumbering[V] = nextValueNumber; - NumberingPhi[nextValueNumber] = cast<PHINode>(V); - return nextValueNumber++; default: valueNumbering[V] = nextValueNumber; return nextValueNumber++; } - uint32_t e = assignExpNewValueNum(exp).first; + uint32_t& e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; valueNumbering[V] = e; return e; } /// Returns the value number of the specified value. Fails if /// the value has not yet been numbered. -uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const { +uint32_t GVN::ValueTable::lookup(Value *V) const { DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V); - if (Verify) { - assert(VI != valueNumbering.end() && "Value not numbered?"); - return VI->second; - } - return (VI != valueNumbering.end()) ? VI->second : 0; + assert(VI != valueNumbering.end() && "Value not numbered?"); + return VI->second; } /// Returns the value number of the given comparison, @@ -559,28 +549,21 @@ uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) { Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS); - return assignExpNewValueNum(exp).first; + uint32_t& e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; + return e; } /// Remove all entries from the ValueTable. void GVN::ValueTable::clear() { valueNumbering.clear(); expressionNumbering.clear(); - NumberingPhi.clear(); - PhiTranslateTable.clear(); nextValueNumber = 1; - Expressions.clear(); - ExprIdx.clear(); - nextExprNumber = 0; } /// Remove a value from the value numbering. void GVN::ValueTable::erase(Value *V) { - uint32_t Num = valueNumbering.lookup(V); valueNumbering.erase(V); - // If V is PHINode, V <--> value number is an one-to-one mapping. - if (isa<PHINode>(V)) - NumberingPhi.erase(Num); } /// verifyRemoved - Verify that the value is removed from all internal data @@ -1183,7 +1166,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, auto *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", LI->isVolatile(), LI->getAlignment(), - LI->getOrdering(), LI->getSynchScope(), + LI->getOrdering(), LI->getSyncScopeID(), UnavailablePred->getTerminator()); // Transfer the old load's AA tags to the new load. @@ -1219,7 +1202,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, V->takeName(LI); if (Instruction *I = dyn_cast<Instruction>(V)) I->setDebugLoc(LI->getDebugLoc()); - if (V->getType()->getScalarType()->isPointerTy()) + if (V->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(V); markInstructionForDeletion(LI); ORE->emit(OptimizationRemark(DEBUG_TYPE, "LoadPRE", LI) @@ -1306,7 +1289,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI) { // to propagate LI's DebugLoc because LI may not post-dominate I. if (LI->getDebugLoc() && LI->getParent() == I->getParent()) I->setDebugLoc(LI->getDebugLoc()); - if (V->getType()->getScalarType()->isPointerTy()) + if (V->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(V); markInstructionForDeletion(LI); ++NumGVNLoad; @@ -1460,7 +1443,7 @@ bool GVN::processLoad(LoadInst *L) { reportLoadElim(L, AvailableValue, ORE); // Tell MDA to rexamine the reused pointer since we might have more // information after forwarding it. - if (MD && AvailableValue->getType()->getScalarType()->isPointerTy()) + if (MD && AvailableValue->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(AvailableValue); return true; } @@ -1468,95 +1451,6 @@ bool GVN::processLoad(LoadInst *L) { return false; } -/// Return a pair the first field showing the value number of \p Exp and the -/// second field showing whether it is a value number newly created. -std::pair<uint32_t, bool> -GVN::ValueTable::assignExpNewValueNum(Expression &Exp) { - uint32_t &e = expressionNumbering[Exp]; - bool CreateNewValNum = !e; - if (CreateNewValNum) { - Expressions.push_back(Exp); - if (ExprIdx.size() < nextValueNumber + 1) - ExprIdx.resize(nextValueNumber * 2); - e = nextValueNumber; - ExprIdx[nextValueNumber++] = nextExprNumber++; - } - return {e, CreateNewValNum}; -} - -/// Return whether all the values related with the same \p num are -/// defined in \p BB. -bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB, - GVN &Gvn) { - LeaderTableEntry *Vals = &Gvn.LeaderTable[Num]; - while (Vals && Vals->BB == BB) - Vals = Vals->Next; - return !Vals; -} - -/// Wrap phiTranslateImpl to provide caching functionality. -uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred, - const BasicBlock *PhiBlock, uint32_t Num, - GVN &Gvn) { - auto FindRes = PhiTranslateTable.find({Num, Pred}); - if (FindRes != PhiTranslateTable.end()) - return FindRes->second; - uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn); - PhiTranslateTable.insert({{Num, Pred}, NewNum}); - return NewNum; -} - -/// Translate value number \p Num using phis, so that it has the values of -/// the phis in BB. -uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred, - const BasicBlock *PhiBlock, - uint32_t Num, GVN &Gvn) { - if (PHINode *PN = NumberingPhi[Num]) { - for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { - if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred) - if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false)) - return TransVal; - } - return Num; - } - - // If there is any value related with Num is defined in a BB other than - // PhiBlock, it cannot depend on a phi in PhiBlock without going through - // a backedge. We can do an early exit in that case to save compile time. - if (!areAllValsInBB(Num, PhiBlock, Gvn)) - return Num; - - if (Num >= ExprIdx.size() || ExprIdx[Num] == 0) - return Num; - Expression Exp = Expressions[ExprIdx[Num]]; - - for (unsigned i = 0; i < Exp.varargs.size(); i++) { - // For InsertValue and ExtractValue, some varargs are index numbers - // instead of value numbers. Those index numbers should not be - // translated. - if ((i > 1 && Exp.opcode == Instruction::InsertValue) || - (i > 0 && Exp.opcode == Instruction::ExtractValue)) - continue; - Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn); - } - - if (Exp.commutative) { - assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!"); - if (Exp.varargs[0] > Exp.varargs[1]) { - std::swap(Exp.varargs[0], Exp.varargs[1]); - uint32_t Opcode = Exp.opcode >> 8; - if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) - Exp.opcode = (Opcode << 8) | - CmpInst::getSwappedPredicate( - static_cast<CmpInst::Predicate>(Exp.opcode & 255)); - } - } - - if (uint32_t NewNum = expressionNumbering[Exp]) - return NewNum; - return Num; -} - // In order to find a leader for a given value number at a // specific basic block, we first obtain the list of all Values for that number, // and then scan the list to find one whose block dominates the block in @@ -1601,15 +1495,6 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, return Pred != nullptr; } - -void GVN::assignBlockRPONumber(Function &F) { - uint32_t NextBlockNumber = 1; - ReversePostOrderTraversal<Function *> RPOT(&F); - for (BasicBlock *BB : RPOT) - BlockRPONumber[BB] = NextBlockNumber++; -} - - // Tries to replace instruction with const, using information from // ReplaceWithConstMap. bool GVN::replaceOperandsWithConsts(Instruction *Instr) const { @@ -1713,7 +1598,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root, // RHS neither 'true' nor 'false' - bail out. continue; // Whether RHS equals 'true'. Otherwise it equals 'false'. - bool isKnownTrue = CI->isAllOnesValue(); + bool isKnownTrue = CI->isMinusOne(); bool isKnownFalse = !isKnownTrue; // If "A && B" is known true then both A and B are known true. If "A || B" @@ -1813,7 +1698,7 @@ bool GVN::processInstruction(Instruction *I) { Changed = true; } if (Changed) { - if (MD && V->getType()->getScalarType()->isPointerTy()) + if (MD && V->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(V); ++NumGVNSimpl; return true; @@ -1924,7 +1809,7 @@ bool GVN::processInstruction(Instruction *I) { // Remove it! patchAndReplaceAllUsesWith(I, Repl); - if (MD && Repl->getType()->getScalarType()->isPointerTy()) + if (MD && Repl->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(Repl); markInstructionForDeletion(I); return true; @@ -1971,7 +1856,6 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, // Fabricate val-num for dead-code in order to suppress assertion in // performPRE(). assignValNumForDeadCode(); - assignBlockRPONumber(F); bool PREChanged = true; while (PREChanged) { PREChanged = performPRE(F); @@ -2043,7 +1927,7 @@ bool GVN::processBlock(BasicBlock *BB) { // Instantiate an expression in a predecessor that lacked it. bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, - BasicBlock *Curr, unsigned int ValNo) { + unsigned int ValNo) { // Because we are going top-down through the block, all value numbers // will be available in the predecessor by the time we need them. Any // that weren't originally present will have been instantiated earlier @@ -2061,9 +1945,7 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, success = false; break; } - uint32_t TValNo = - VN.phiTranslate(Pred, Curr, VN.lookup(Op), *this); - if (Value *V = findLeader(Pred, TValNo)) { + if (Value *V = findLeader(Pred, VN.lookup(Op))) { Instr->setOperand(i, V); } else { success = false; @@ -2080,12 +1962,10 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, Instr->insertBefore(Pred->getTerminator()); Instr->setName(Instr->getName() + ".pre"); Instr->setDebugLoc(Instr->getDebugLoc()); - - unsigned Num = VN.lookupOrAdd(Instr); - VN.add(Instr, Num); + VN.add(Instr, ValNo); // Update the availability map to include the new instruction. - addToLeaderTable(Num, Instr, Pred); + addToLeaderTable(ValNo, Instr, Pred); return true; } @@ -2123,27 +2003,18 @@ bool GVN::performScalarPRE(Instruction *CurInst) { SmallVector<std::pair<Value *, BasicBlock *>, 8> predMap; for (BasicBlock *P : predecessors(CurrentBlock)) { - // We're not interested in PRE where blocks with predecessors that are - // not reachable. - if (!DT->isReachableFromEntry(P)) { + // We're not interested in PRE where the block is its + // own predecessor, or in blocks with predecessors + // that are not reachable. + if (P == CurrentBlock) { NumWithout = 2; break; - } - // It is not safe to do PRE when P->CurrentBlock is a loop backedge, and - // when CurInst has operand defined in CurrentBlock (so it may be defined - // by phi in the loop header). - if (BlockRPONumber[P] >= BlockRPONumber[CurrentBlock] && - any_of(CurInst->operands(), [&](const Use &U) { - if (auto *Inst = dyn_cast<Instruction>(U.get())) - return Inst->getParent() == CurrentBlock; - return false; - })) { + } else if (!DT->isReachableFromEntry(P)) { NumWithout = 2; break; } - uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this); - Value *predV = findLeader(P, TValNo); + Value *predV = findLeader(P, ValNo); if (!predV) { predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P)); PREPred = P; @@ -2183,7 +2054,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) { } // We need to insert somewhere, so let's give it a shot PREInstr = CurInst->clone(); - if (!performScalarPREInsertion(PREInstr, PREPred, CurrentBlock, ValNo)) { + if (!performScalarPREInsertion(PREInstr, PREPred, ValNo)) { // If we failed insertion, make sure we remove the instruction. DEBUG(verifyRemoved(PREInstr)); PREInstr->deleteValue(); @@ -2212,7 +2083,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) { addToLeaderTable(ValNo, Phi, CurrentBlock); Phi->setDebugLoc(CurInst->getDebugLoc()); CurInst->replaceAllUsesWith(Phi); - if (MD && Phi->getType()->getScalarType()->isPointerTy()) + if (MD && Phi->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(Phi); VN.erase(CurInst); removeFromLeaderTable(ValNo, CurInst, CurrentBlock); @@ -2297,7 +2168,6 @@ bool GVN::iterateOnFunction(Function &F) { void GVN::cleanupGlobalSets() { VN.clear(); LeaderTable.clear(); - BlockRPONumber.clear(); TableAllocator.Reset(); } diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp index 3c8fbd35bf8c1..89b28f0aeee6b 100644 --- a/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -232,7 +232,7 @@ bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II, case Intrinsic::amdgcn_atomic_inc: case Intrinsic::amdgcn_atomic_dec:{ const ConstantInt *IsVolatile = dyn_cast<ConstantInt>(II->getArgOperand(4)); - if (!IsVolatile || !IsVolatile->isNullValue()) + if (!IsVolatile || !IsVolatile->isZero()) return false; LLVM_FALLTHROUGH; @@ -358,7 +358,8 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { // If the operands of the expression on the top are already explored, // adds that expression to the resultant postorder. if (PostorderStack.back().second) { - Postorder.push_back(TopVal); + if (TopVal->getType()->getPointerAddressSpace() == FlatAddrSpace) + Postorder.push_back(TopVal); PostorderStack.pop_back(); continue; } diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 05293eb0079fc..ee3de51b13606 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -1212,7 +1212,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) { LoadInst *NewVal = new LoadInst( LoadedPtr->DoPHITranslation(LoadBB, UnavailablePred), LI->getName() + ".pr", false, LI->getAlignment(), LI->getOrdering(), - LI->getSynchScope(), UnavailablePred->getTerminator()); + LI->getSyncScopeID(), UnavailablePred->getTerminator()); NewVal->setDebugLoc(LI->getDebugLoc()); if (AATags) NewVal->setAAMetadata(AATags); diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index c41cc42db5e2c..ac4dd44a0e906 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -148,25 +148,27 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, LPMUpdater *Updater = nullptr) { assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); - // We can only remove the loop if there is a preheader that we can - // branch from after removing it. + // We can only remove the loop if there is a preheader that we can branch from + // after removing it. Also, if LoopSimplify form is not available, stay out + // of trouble. BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) + if (!Preheader || !L->hasDedicatedExits()) { + DEBUG(dbgs() + << "Deletion requires Loop with preheader and dedicated exits.\n"); return false; - - // If LoopSimplify form is not available, stay out of trouble. - if (!L->hasDedicatedExits()) - return false; - + } // We can't remove loops that contain subloops. If the subloops were dead, // they would already have been removed in earlier executions of this pass. - if (L->begin() != L->end()) + if (L->begin() != L->end()) { + DEBUG(dbgs() << "Loop contains subloops.\n"); return false; + } BasicBlock *ExitBlock = L->getUniqueExitBlock(); if (ExitBlock && isLoopNeverExecuted(L)) { + DEBUG(dbgs() << "Loop is proven to never execute, delete it!"); // Set incoming value to undef for phi nodes in the exit block. BasicBlock::iterator BI = ExitBlock->begin(); while (PHINode *P = dyn_cast<PHINode>(BI)) { @@ -188,20 +190,26 @@ static bool deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, // be in the situation of needing to be able to solve statically which exit // block will be branched to, or trying to preserve the branching logic in // a loop invariant manner. - if (!ExitBlock) + if (!ExitBlock) { + DEBUG(dbgs() << "Deletion requires single exit block\n"); return false; - + } // Finally, we have to check that the loop really is dead. bool Changed = false; - if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader)) + if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader)) { + DEBUG(dbgs() << "Loop is not invariant, cannot delete.\n"); return Changed; + } // Don't remove loops for which we can't solve the trip count. // They could be infinite, in which case we'd be changing program behavior. const SCEV *S = SE.getMaxBackedgeTakenCount(L); - if (isa<SCEVCouldNotCompute>(S)) + if (isa<SCEVCouldNotCompute>(S)) { + DEBUG(dbgs() << "Could not compute SCEV MaxBackedgeTakenCount.\n"); return Changed; + } + DEBUG(dbgs() << "Loop is invariant, delete it!"); deleteDeadLoop(L, DT, SE, LI, Updater); ++NumDeleted; @@ -311,6 +319,9 @@ static void deleteDeadLoop(Loop *L, DominatorTree &DT, ScalarEvolution &SE, PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &Updater) { + + DEBUG(dbgs() << "Analyzing Loop for deletion: "); + DEBUG(L.dump()); if (!deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, &Updater)) return PreservedAnalyses::all(); @@ -350,5 +361,7 @@ bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &) { ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + DEBUG(dbgs() << "Analyzing Loop for deletion: "); + DEBUG(L->dump()); return deleteLoopIfDead(L, DT, SE, LI); } diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 8b435050ac769..4a6a35c0ab1b9 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1160,7 +1160,7 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, if (!Dec || !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) || (SubInst->getOpcode() == Instruction::Add && - Dec->isAllOnesValue()))) { + Dec->isMinusOne()))) { return false; } } diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp index 9f3875a3027f4..606136dc31a4b 100644 --- a/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/lib/Transforms/Scalar/LoopInterchange.cpp @@ -757,8 +757,11 @@ bool LoopInterchangeLegality::currentLimitations() { PHINode *InnerInductionVar; SmallVector<PHINode *, 8> Inductions; SmallVector<PHINode *, 8> Reductions; - if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) + if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) { + DEBUG(dbgs() << "Only inner loops with induction or reduction PHI nodes " + << "are supported currently.\n"); return true; + } // TODO: Currently we handle only loops with 1 induction variable. if (Inductions.size() != 1) { @@ -771,16 +774,25 @@ bool LoopInterchangeLegality::currentLimitations() { InnerInductionVar = Inductions.pop_back_val(); Reductions.clear(); - if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) + if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) { + DEBUG(dbgs() << "Only outer loops with induction or reduction PHI nodes " + << "are supported currently.\n"); return true; + } // Outer loop cannot have reduction because then loops will not be tightly // nested. - if (!Reductions.empty()) + if (!Reductions.empty()) { + DEBUG(dbgs() << "Outer loops with reductions are not supported " + << "currently.\n"); return true; + } // TODO: Currently we handle only loops with 1 induction variable. - if (Inductions.size() != 1) + if (Inductions.size() != 1) { + DEBUG(dbgs() << "Loops with more than 1 induction variables are not " + << "supported currently.\n"); return true; + } // TODO: Triangular loops are not handled for now. if (!isLoopStructureUnderstood(InnerInductionVar)) { @@ -791,12 +803,16 @@ bool LoopInterchangeLegality::currentLimitations() { // TODO: We only handle LCSSA PHI's corresponding to reduction for now. BasicBlock *LoopExitBlock = getLoopLatchExitBlock(OuterLoopLatch, OuterLoopHeader); - if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) + if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, true)) { + DEBUG(dbgs() << "Can only handle LCSSA PHIs in outer loops currently.\n"); return true; + } LoopExitBlock = getLoopLatchExitBlock(InnerLoopLatch, InnerLoopHeader); - if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) + if (!LoopExitBlock || !containsSafePHI(LoopExitBlock, false)) { + DEBUG(dbgs() << "Can only handle LCSSA PHIs in inner loops currently.\n"); return true; + } // TODO: Current limitation: Since we split the inner loop latch at the point // were induction variable is incremented (induction.next); We cannot have @@ -816,8 +832,11 @@ bool LoopInterchangeLegality::currentLimitations() { InnerIndexVarInc = dyn_cast<Instruction>(InnerInductionVar->getIncomingValue(0)); - if (!InnerIndexVarInc) + if (!InnerIndexVarInc) { + DEBUG(dbgs() << "Did not find an instruction to increment the induction " + << "variable.\n"); return true; + } // Since we split the inner loop latch on this induction variable. Make sure // we do not have any instruction between the induction variable and branch @@ -827,19 +846,24 @@ bool LoopInterchangeLegality::currentLimitations() { for (const Instruction &I : reverse(*InnerLoopLatch)) { if (isa<BranchInst>(I) || isa<CmpInst>(I) || isa<TruncInst>(I)) continue; + // We found an instruction. If this is not induction variable then it is not // safe to split this loop latch. - if (!I.isIdenticalTo(InnerIndexVarInc)) + if (!I.isIdenticalTo(InnerIndexVarInc)) { + DEBUG(dbgs() << "Found unsupported instructions between induction " + << "variable increment and branch.\n"); return true; + } FoundInduction = true; break; } // The loop latch ended and we didn't find the induction variable return as // current limitation. - if (!FoundInduction) + if (!FoundInduction) { + DEBUG(dbgs() << "Did not find the induction variable.\n"); return true; - + } return false; } diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 7312d97f8efe1..3506ac343d594 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -485,10 +485,22 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { DomTreeNode *Node = HeaderChildren[I]; BasicBlock *BB = Node->getBlock(); - pred_iterator PI = pred_begin(BB); - BasicBlock *NearestDom = *PI; - for (pred_iterator PE = pred_end(BB); PI != PE; ++PI) - NearestDom = DT->findNearestCommonDominator(NearestDom, *PI); + BasicBlock *NearestDom = nullptr; + for (BasicBlock *Pred : predecessors(BB)) { + // Consider only reachable basic blocks. + if (!DT->getNode(Pred)) + continue; + + if (!NearestDom) { + NearestDom = Pred; + continue; + } + + NearestDom = DT->findNearestCommonDominator(NearestDom, Pred); + assert(NearestDom && "No NearestCommonDominator found"); + } + + assert(NearestDom && "Nearest dominator not found"); // Remember if this changes the DomTree. if (Node->getIDom()->getBlock() != NearestDom) { diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 73436f13c94e4..3638da118cb7e 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -140,6 +140,13 @@ static cl::opt<bool> LSRExpNarrow( cl::desc("Narrow LSR complex solution using" " expectation of registers number")); +// Flag to narrow search space by filtering non-optimal formulae with +// the same ScaledReg and Scale. +static cl::opt<bool> FilterSameScaledReg( + "lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), + cl::desc("Narrow LSR search space by filtering non-optimal formulae" + " with the same ScaledReg and Scale")); + #ifndef NDEBUG // Stress test IV chain generation. static cl::opt<bool> StressIVChain( @@ -1902,6 +1909,7 @@ class LSRInstance { void NarrowSearchSpaceByDetectingSupersets(); void NarrowSearchSpaceByCollapsingUnrolledCode(); void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); + void NarrowSearchSpaceByFilterFormulaWithSameScaledReg(); void NarrowSearchSpaceByDeletingCostlyFormulas(); void NarrowSearchSpaceByPickingWinnerRegs(); void NarrowSearchSpaceUsingHeuristics(); @@ -2318,7 +2326,7 @@ LSRInstance::OptimizeLoopTermCond() { dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) { const ConstantInt *C = D->getValue(); // Stride of one or negative one can have reuse with non-addresses. - if (C->isOne() || C->isAllOnesValue()) + if (C->isOne() || C->isMinusOne()) goto decline_post_inc; // Avoid weird situations. if (C->getValue().getMinSignedBits() >= 64 || @@ -4306,6 +4314,104 @@ void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){ } } +/// If a LSRUse has multiple formulae with the same ScaledReg and Scale. +/// Pick the best one and delete the others. +/// This narrowing heuristic is to keep as many formulae with different +/// Scale and ScaledReg pair as possible while narrowing the search space. +/// The benefit is that it is more likely to find out a better solution +/// from a formulae set with more Scale and ScaledReg variations than +/// a formulae set with the same Scale and ScaledReg. The picking winner +/// reg heurstic will often keep the formulae with the same Scale and +/// ScaledReg and filter others, and we want to avoid that if possible. +void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() { + if (EstimateSearchSpaceComplexity() < ComplexityLimit) + return; + + DEBUG(dbgs() << "The search space is too complex.\n" + "Narrowing the search space by choosing the best Formula " + "from the Formulae with the same Scale and ScaledReg.\n"); + + // Map the "Scale * ScaledReg" pair to the best formula of current LSRUse. + typedef DenseMap<std::pair<const SCEV *, int64_t>, size_t> BestFormulaeTy; + BestFormulaeTy BestFormulae; +#ifndef NDEBUG + bool ChangedFormulae = false; +#endif + DenseSet<const SCEV *> VisitedRegs; + SmallPtrSet<const SCEV *, 16> Regs; + + for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { + LSRUse &LU = Uses[LUIdx]; + DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n'); + + // Return true if Formula FA is better than Formula FB. + auto IsBetterThan = [&](Formula &FA, Formula &FB) { + // First we will try to choose the Formula with fewer new registers. + // For a register used by current Formula, the more the register is + // shared among LSRUses, the less we increase the register number + // counter of the formula. + size_t FARegNum = 0; + for (const SCEV *Reg : FA.BaseRegs) { + const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg); + FARegNum += (NumUses - UsedByIndices.count() + 1); + } + size_t FBRegNum = 0; + for (const SCEV *Reg : FB.BaseRegs) { + const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg); + FBRegNum += (NumUses - UsedByIndices.count() + 1); + } + if (FARegNum != FBRegNum) + return FARegNum < FBRegNum; + + // If the new register numbers are the same, choose the Formula with + // less Cost. + Cost CostFA, CostFB; + Regs.clear(); + CostFA.RateFormula(TTI, FA, Regs, VisitedRegs, L, SE, DT, LU); + Regs.clear(); + CostFB.RateFormula(TTI, FB, Regs, VisitedRegs, L, SE, DT, LU); + return CostFA.isLess(CostFB, TTI); + }; + + bool Any = false; + for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms; + ++FIdx) { + Formula &F = LU.Formulae[FIdx]; + if (!F.ScaledReg) + continue; + auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx}); + if (P.second) + continue; + + Formula &Best = LU.Formulae[P.first->second]; + if (IsBetterThan(F, Best)) + std::swap(F, Best); + DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); + dbgs() << "\n" + " in favor of formula "; + Best.print(dbgs()); dbgs() << '\n'); +#ifndef NDEBUG + ChangedFormulae = true; +#endif + LU.DeleteFormula(F); + --FIdx; + --NumForms; + Any = true; + } + if (Any) + LU.RecomputeRegs(LUIdx, RegUses); + + // Reset this to prepare for the next use. + BestFormulae.clear(); + } + + DEBUG(if (ChangedFormulae) { + dbgs() << "\n" + "After filtering out undesirable candidates:\n"; + print_uses(dbgs()); + }); +} + /// The function delete formulas with high registers number expectation. /// Assuming we don't know the value of each formula (already delete /// all inefficient), generate probability of not selecting for each @@ -4516,6 +4622,8 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() { NarrowSearchSpaceByDetectingSupersets(); NarrowSearchSpaceByCollapsingUnrolledCode(); NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); + if (FilterSameScaledReg) + NarrowSearchSpaceByFilterFormulaWithSameScaledReg(); if (LSRExpNarrow) NarrowSearchSpaceByDeletingCostlyFormulas(); else diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp index acd3ef6791bed..6727cf0179c18 100644 --- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -238,7 +238,7 @@ PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0, &BB->front()); NewPN->addIncoming(Opd1, S0->getParent()); NewPN->addIncoming(Opd2, S1->getParent()); - if (MD && NewPN->getType()->getScalarType()->isPointerTy()) + if (MD && NewPN->getType()->isPtrOrPtrVectorTy()) MD->invalidateCachedPointerInfo(NewPN); return NewPN; } diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index 9cf01c6582b58..9d018563618ea 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -866,9 +866,7 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, // Things in TOPClass are equivalent to everything. if (ValueToClass.lookup(*U) == TOPClass) return false; - if (lookupOperandLeader(*U) == PN) - return false; - return true; + return lookupOperandLeader(*U) != PN; }); std::transform(Filtered.begin(), Filtered.end(), op_inserter(E), [&](const Use *U) -> Value * { @@ -2063,9 +2061,10 @@ Value *NewGVN::getNextValueLeader(CongruenceClass *CC) const { // // The invariants of this function are: // -// I must be moving to NewClass from OldClass The StoreCount of OldClass and -// NewClass is expected to have been updated for I already if it is is a store. -// The OldClass memory leader has not been updated yet if I was the leader. +// - I must be moving to NewClass from OldClass +// - The StoreCount of OldClass and NewClass is expected to have been updated +// for I already if it is is a store. +// - The OldClass memory leader has not been updated yet if I was the leader. void NewGVN::moveMemoryToNewCongruenceClass(Instruction *I, MemoryAccess *InstMA, CongruenceClass *OldClass, @@ -2074,7 +2073,8 @@ void NewGVN::moveMemoryToNewCongruenceClass(Instruction *I, // be the MemoryAccess of OldClass. assert((!InstMA || !OldClass->getMemoryLeader() || OldClass->getLeader() != I || - OldClass->getMemoryLeader() == InstMA) && + MemoryAccessToClass.lookup(OldClass->getMemoryLeader()) == + MemoryAccessToClass.lookup(InstMA)) && "Representative MemoryAccess mismatch"); // First, see what happens to the new class if (!NewClass->getMemoryLeader()) { @@ -2136,7 +2136,7 @@ void NewGVN::moveValueToNewCongruenceClass(Instruction *I, const Expression *E, << NewClass->getID() << " from " << *NewClass->getLeader() << " to " << *SI << " because store joined class\n"); // If we changed the leader, we have to mark it changed because we don't - // know what it will do to symbolic evlauation. + // know what it will do to symbolic evaluation. NewClass->setLeader(SI); } // We rely on the code below handling the MemoryAccess change. diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index cdba0062953f1..29d1ba406ae49 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -2148,7 +2148,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) { if (I->getOpcode() == Instruction::Mul && cast<Instruction>(I->user_back())->getOpcode() == Instruction::Add && isa<ConstantInt>(Ops.back().Op) && - cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) { + cast<ConstantInt>(Ops.back().Op)->isMinusOne()) { ValueEntry Tmp = Ops.pop_back_val(); Ops.insert(Ops.begin(), Tmp); } else if (I->getOpcode() == Instruction::FMul && diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index a73e9aec06170..f19d45329d238 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1994,7 +1994,7 @@ static void rematerializeLiveValues(CallSite CS, Instruction *LastClonedValue = nullptr; Instruction *LastValue = nullptr; for (Instruction *Instr: ChainToBase) { - // Only GEP's and casts are suported as we need to be careful to not + // Only GEP's and casts are supported as we need to be careful to not // introduce any new uses of pointers not in the liveset. // Note that it's fine to introduce new uses of pointers which were // otherwise not used after this statepoint. diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 7a6fa1711411d..a738ebb4607e4 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -963,7 +963,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) { } else { // X or -1 = -1 if (ConstantInt *CI = NonOverdefVal->getConstantInt()) - if (CI->isAllOnesValue()) + if (CI->isMinusOne()) return markConstant(IV, &I, NonOverdefVal->getConstant()); } } diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 4729f4ef59567..b9cee5b2ba956 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -1673,8 +1673,7 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, // See if we need inttoptr for this type pair. A cast involving both scalars // and vectors requires and additional bitcast. - if (OldTy->getScalarType()->isIntegerTy() && - NewTy->getScalarType()->isPointerTy()) { + if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) { // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8* if (OldTy->isVectorTy() && !NewTy->isVectorTy()) return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)), @@ -1690,8 +1689,7 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, // See if we need ptrtoint for this type pair. A cast involving both scalars // and vectors requires and additional bitcast. - if (OldTy->getScalarType()->isPointerTy() && - NewTy->getScalarType()->isIntegerTy()) { + if (OldTy->isPtrOrPtrVectorTy() && NewTy->isIntOrIntVectorTy()) { // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128 if (OldTy->isVectorTy() && !NewTy->isVectorTy()) return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)), @@ -2400,7 +2398,7 @@ private: LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), LI.isVolatile(), LI.getName()); if (LI.isVolatile()) - NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope()); + NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); // Any !nonnull metadata or !range metadata on the old load is also valid // on the new load. This is even true in some cases even when the loads @@ -2435,7 +2433,7 @@ private: getSliceAlign(TargetTy), LI.isVolatile(), LI.getName()); if (LI.isVolatile()) - NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope()); + NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); V = NewLI; IsPtrAdjusted = true; @@ -2578,7 +2576,7 @@ private: } NewSI->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access); if (SI.isVolatile()) - NewSI->setAtomic(SI.getOrdering(), SI.getSynchScope()); + NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); Pass.DeadInsts.insert(&SI); deleteIfTriviallyDead(OldOp); diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index 486f3e5a43d49..0cccb415efdb1 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -329,7 +329,7 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) { Loops[Exit] = N->getEntry(); } else { - // Test for sucessors as back edge + // Test for successors as back edge BasicBlock *BB = N->getNodeAs<BasicBlock>(); BranchInst *Term = cast<BranchInst>(BB->getTerminator()); diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 314c990293cc5..7e75e88477852 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -46,13 +46,21 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; - + Module *TheModule = F ? F->getParent() : nullptr; + // Loop over all instructions, and copy them over. for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE; ++II) { - if (DIFinder && F->getParent() && II->getDebugLoc()) - DIFinder->processLocation(*F->getParent(), II->getDebugLoc().get()); + if (DIFinder && TheModule) { + if (auto *DDI = dyn_cast<DbgDeclareInst>(II)) + DIFinder->processDeclare(*TheModule, DDI); + else if (auto *DVI = dyn_cast<DbgValueInst>(II)) + DIFinder->processValue(*TheModule, DVI); + + if (auto DbgLoc = II->getDebugLoc()) + DIFinder->processLocation(*TheModule, DbgLoc.get()); + } Instruction *NewInst = II->clone(); if (II->hasName()) @@ -153,6 +161,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // When we remap instructions, we want to avoid duplicating inlined // DISubprograms, so record all subprograms we find as we duplicate // instructions and then freeze them in the MD map. + // We also record information about dbg.value and dbg.declare to avoid + // duplicating the types. DebugInfoFinder DIFinder; // Loop over all of the basic blocks in the function, cloning them as @@ -193,6 +203,10 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, } } + for (auto *Type : DIFinder.types()) { + VMap.MD()[Type].reset(Type); + } + // Loop over all of the instructions in the function, fixing up operand // references as we go. This uses VMap to do all the hard work. for (Function::iterator BB = diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp index 9f4d9c7e39810..d9294c4993091 100644 --- a/lib/Transforms/Utils/CmpInstAnalysis.cpp +++ b/lib/Transforms/Utils/CmpInstAnalysis.cpp @@ -81,7 +81,7 @@ bool llvm::decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred, break; case ICmpInst::ICMP_SGT: // X > -1 is equivalent to (X & SignMask) == 0. - if (!C->isAllOnesValue()) + if (!C->isMinusOne()) return false; Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth())); Pred = ICmpInst::ICMP_EQ; diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 30d8856cfbef1..1189714dfab10 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -1116,12 +1116,6 @@ Function *CodeExtractor::extractCodeRegion() { } } - //cerr << "NEW FUNCTION: " << *newFunction; - // verifyFunction(*newFunction); - - // cerr << "OLD FUNCTION: " << *oldFunction; - // verifyFunction(*oldFunction); - DEBUG(if (verifyFunction(*newFunction)) report_fatal_error("verifyFunction failed!")); return newFunction; diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp index c97e544e620a9..1328f2f3ec012 100644 --- a/lib/Transforms/Utils/Evaluator.cpp +++ b/lib/Transforms/Utils/Evaluator.cpp @@ -402,7 +402,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, Value *Ptr = PtrArg->stripPointerCasts(); if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { Type *ElemTy = GV->getValueType(); - if (!Size->isAllOnesValue() && + if (!Size->isMinusOne() && Size->getValue().getLimitedValue() >= DL.getTypeStoreSize(ElemTy)) { Invariants.insert(GV); diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp index 0457294361b56..4a2be3a531767 100644 --- a/lib/Transforms/Utils/FunctionComparator.cpp +++ b/lib/Transforms/Utils/FunctionComparator.cpp @@ -513,8 +513,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering())) return Res; - if (int Res = - cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope())) + if (int Res = cmpNumbers(LI->getSyncScopeID(), + cast<LoadInst>(R)->getSyncScopeID())) return Res; return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range), cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range)); @@ -529,7 +529,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering())) return Res; - return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope()); + return cmpNumbers(SI->getSyncScopeID(), + cast<StoreInst>(R)->getSyncScopeID()); } if (const CmpInst *CI = dyn_cast<CmpInst>(L)) return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate()); @@ -584,7 +585,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpOrderings(FI->getOrdering(), cast<FenceInst>(R)->getOrdering())) return Res; - return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope()); + return cmpNumbers(FI->getSyncScopeID(), + cast<FenceInst>(R)->getSyncScopeID()); } if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) { if (int Res = cmpNumbers(CXI->isVolatile(), @@ -601,8 +603,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, cmpOrderings(CXI->getFailureOrdering(), cast<AtomicCmpXchgInst>(R)->getFailureOrdering())) return Res; - return cmpNumbers(CXI->getSynchScope(), - cast<AtomicCmpXchgInst>(R)->getSynchScope()); + return cmpNumbers(CXI->getSyncScopeID(), + cast<AtomicCmpXchgInst>(R)->getSyncScopeID()); } if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) { if (int Res = cmpNumbers(RMWI->getOperation(), @@ -614,8 +616,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpOrderings(RMWI->getOrdering(), cast<AtomicRMWInst>(R)->getOrdering())) return Res; - return cmpNumbers(RMWI->getSynchScope(), - cast<AtomicRMWInst>(R)->getSynchScope()); + return cmpNumbers(RMWI->getSyncScopeID(), + cast<AtomicRMWInst>(R)->getSyncScopeID()); } if (const PHINode *PNL = dyn_cast<PHINode>(L)) { const PHINode *PNR = cast<PHINode>(R); diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 5127eba3f9aea..74610613001c6 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -1662,9 +1662,10 @@ void llvm::removeUnwindEdge(BasicBlock *BB) { TI->eraseFromParent(); } -/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even +/// removeUnreachableBlocks - Remove blocks that are not reachable, even /// if they are in a dead cycle. Return true if a change was made, false -/// otherwise. +/// otherwise. If `LVI` is passed, this function preserves LazyValueInfo +/// after modifying the CFG. bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) { SmallPtrSet<BasicBlock*, 16> Reachable; bool Changed = markAliveBlocks(F, Reachable); @@ -2168,6 +2169,9 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) { return true; case Instruction::Call: case Instruction::Invoke: + // Can't handle inline asm. Skip it. + if (isa<InlineAsm>(ImmutableCallSite(I).getCalledValue())) + return false; // Many arithmetic intrinsics have no issue taking a // variable, however it's hard to distingish these from // specials such as @llvm.frameaddress that require a constant. @@ -2182,12 +2186,18 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) { case Instruction::ShuffleVector: // Shufflevector masks are constant. return OpIdx != 2; + case Instruction::Switch: case Instruction::ExtractValue: - case Instruction::InsertValue: // All operands apart from the first are constant. return OpIdx == 0; + case Instruction::InsertValue: + // All operands apart from the first and the second are constant. + return OpIdx < 2; case Instruction::Alloca: - return false; + // Static allocas (constant size in the entry block) are handled by + // prologue/epilogue insertion so they're free anyway. We definitely don't + // want to make them non-constant. + return !dyn_cast<AllocaInst>(I)->isStaticAlloca(); case Instruction::GetElementPtr: if (OpIdx == 0) return true; diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 9ad2b707e6b23..5170c68e2915a 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -65,9 +65,11 @@ static cl::opt<bool> UnrollRuntimeMultiExit( /// than the unroll factor. /// static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, - BasicBlock *PrologExit, BasicBlock *PreHeader, - BasicBlock *NewPreHeader, ValueToValueMapTy &VMap, - DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA) { + BasicBlock *PrologExit, + BasicBlock *OriginalLoopLatchExit, + BasicBlock *PreHeader, BasicBlock *NewPreHeader, + ValueToValueMapTy &VMap, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Latch = L->getLoopLatch(); assert(Latch && "Loop must have a latch"); BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]); @@ -142,17 +144,15 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count, // then (BECount + 1) cannot unsigned-overflow. Value *BrLoopExit = B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1)); - BasicBlock *Exit = L->getUniqueExitBlock(); - assert(Exit && "Loop must have a single exit block only"); // Split the exit to maintain loop canonicalization guarantees - SmallVector<BasicBlock*, 4> Preds(predecessors(Exit)); - SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI, + SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit)); + SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolled loop) - B.CreateCondBr(BrLoopExit, Exit, NewPreHeader); + B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader); InsertPt->eraseFromParent(); if (DT) - DT->changeImmediateDominator(Exit, PrologExit); + DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit); } /// Connect the unrolling epilog code to the original loop. @@ -427,6 +427,50 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, return nullptr; } +/// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits +/// is populated with all the loop exit blocks other than the LatchExit block. +static bool +canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, + BasicBlock *LatchExit, bool PreserveLCSSA, + bool UseEpilogRemainder) { + + // Support runtime unrolling for multiple exit blocks and multiple exiting + // blocks. + if (!UnrollRuntimeMultiExit) + return false; + // Even if runtime multi exit is enabled, we currently have some correctness + // constrains in unrolling a multi-exit loop. + // We rely on LCSSA form being preserved when the exit blocks are transformed. + if (!PreserveLCSSA) + return false; + SmallVector<BasicBlock *, 4> Exits; + L->getUniqueExitBlocks(Exits); + for (auto *BB : Exits) + if (BB != LatchExit) + OtherExits.push_back(BB); + + // TODO: Support multiple exiting blocks jumping to the `LatchExit` when + // UnrollRuntimeMultiExit is true. This will need updating the logic in + // connectEpilog/connectProlog. + if (!LatchExit->getSinglePredecessor()) { + DEBUG(dbgs() << "Bailout for multi-exit handling when latch exit has >1 " + "predecessor.\n"); + return false; + } + // FIXME: We bail out of multi-exit unrolling when epilog loop is generated + // and L is an inner loop. This is because in presence of multiple exits, the + // outer loop is incorrect: we do not add the EpilogPreheader and exit to the + // outer loop. This is automatically handled in the prolog case, so we do not + // have that bug in prolog generation. + if (UseEpilogRemainder && L->getParentLoop()) + return false; + + // All constraints have been satisfied. + return true; +} + + + /// Insert code in the prolog/epilog code when unrolling a loop with a /// run-time trip-count. /// @@ -470,53 +514,40 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool UseEpilogRemainder, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, bool PreserveLCSSA) { - // for now, only unroll loops that contain a single exit - if (!UnrollRuntimeMultiExit && !L->getExitingBlock()) - return false; + DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); + DEBUG(L->dump()); // Make sure the loop is in canonical form. - if (!L->isLoopSimplifyForm()) + if (!L->isLoopSimplifyForm()) { + DEBUG(dbgs() << "Not in simplify form!\n"); return false; + } // Guaranteed by LoopSimplifyForm. BasicBlock *Latch = L->getLoopLatch(); BasicBlock *Header = L->getHeader(); - BasicBlock *LatchExit = L->getUniqueExitBlock(); // successor out of loop - if (!LatchExit && !UnrollRuntimeMultiExit) - return false; - // These are exit blocks other than the target of the latch exiting block. - SmallVector<BasicBlock *, 4> OtherExits; BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator()); - unsigned int ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; + unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; + BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex); // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the // targets of the Latch be an exit block out of the loop. This needs // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. - assert(!L->contains(LatchBR->getSuccessor(ExitIndex)) && + assert(!L->contains(LatchExit) && "one of the loop latch successors should be the exit block!"); - // Support runtime unrolling for multiple exit blocks and multiple exiting - // blocks. - if (!LatchExit) { - assert(UseEpilogRemainder && "Multi exit unrolling is currently supported " - "unrolling with epilog remainder only!"); - LatchExit = LatchBR->getSuccessor(ExitIndex); - // We rely on LCSSA form being preserved when the exit blocks are - // transformed. - if (!PreserveLCSSA) - return false; - // TODO: Support multiple exiting blocks jumping to the `LatchExit`. This - // will need updating the logic in connectEpilog. - if (!LatchExit->getSinglePredecessor()) - return false; - SmallVector<BasicBlock *, 4> Exits; - L->getUniqueExitBlocks(Exits); - for (auto *BB : Exits) - if (BB != LatchExit) - OtherExits.push_back(BB); + // These are exit blocks other than the target of the latch exiting block. + SmallVector<BasicBlock *, 4> OtherExits; + bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop( + L, OtherExits, LatchExit, PreserveLCSSA, UseEpilogRemainder); + // Support only single exit and exiting block unless multi-exit loop unrolling is enabled. + if (!isMultiExitUnrollingEnabled && + (!L->getExitingBlock() || OtherExits.size())) { + DEBUG( + dbgs() + << "Multiple exit/exiting blocks in loop and multi-exit unrolling not " + "enabled!\n"); + return false; } - - assert(LatchExit && "Latch Exit should exist!"); - // Use Scalar Evolution to compute the trip count. This allows more loops to // be unrolled than relying on induction var simplification. if (!SE) @@ -530,29 +561,38 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // exiting blocks). const SCEV *BECountSC = SE->getExitCount(L, Latch); if (isa<SCEVCouldNotCompute>(BECountSC) || - !BECountSC->getType()->isIntegerTy()) + !BECountSC->getType()->isIntegerTy()) { + DEBUG(dbgs() << "Could not compute exit block SCEV\n"); return false; + } unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth(); // Add 1 since the backedge count doesn't include the first loop iteration. const SCEV *TripCountSC = SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); - if (isa<SCEVCouldNotCompute>(TripCountSC)) + if (isa<SCEVCouldNotCompute>(TripCountSC)) { + DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); return false; + } BasicBlock *PreHeader = L->getLoopPreheader(); BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "loop-unroll"); if (!AllowExpensiveTripCount && - Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) + Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) { + DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); return false; + } // This constraint lets us deal with an overflowing trip count easily; see the // comment on ModVal below. - if (Log2_32(Count) > BEWidth) + if (Log2_32(Count) > BEWidth) { + DEBUG(dbgs() + << "Count failed constraint on overflow trip count calculation.\n"); return false; + } // Loop structure is the following: // @@ -711,11 +751,10 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, // node. for (unsigned i =0; i < oldNumOperands; i++){ Value *newVal = VMap[Phi->getIncomingValue(i)]; - if (!newVal) { - assert(isa<Constant>(Phi->getIncomingValue(i)) && - "VMap should exist for all values except constants!"); + // newVal can be a constant or derived from values outside the loop, and + // hence need not have a VMap value. + if (!newVal) newVal = Phi->getIncomingValue(i); - } Phi->addIncoming(newVal, cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)])); } @@ -781,8 +820,8 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, } else { // Connect the prolog code to the original loop and update the // PHI functions. - ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader, - VMap, DT, LI, PreserveLCSSA); + ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader, + NewPreHeader, VMap, DT, LI, PreserveLCSSA); } // If this loop is nested, then the loop unroller changes the code in the diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp index 1c2a60a6b8b24..900450b400612 100644 --- a/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -8,12 +8,256 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/LowerMemIntrinsics.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; +static unsigned getLoopOperandSizeInBytes(Type *Type) { + if (VectorType *VTy = dyn_cast<VectorType>(Type)) { + return VTy->getBitWidth() / 8; + } + + return Type->getPrimitiveSizeInBits() / 8; +} + +void llvm::createMemCpyLoopKnownSize(Instruction *InsertBefore, Value *SrcAddr, + Value *DstAddr, ConstantInt *CopyLen, + unsigned SrcAlign, unsigned DestAlign, + bool SrcIsVolatile, bool DstIsVolatile, + const TargetTransformInfo &TTI) { + // No need to expand zero length copies. + if (CopyLen->isZero()) + return; + + BasicBlock *PreLoopBB = InsertBefore->getParent(); + BasicBlock *PostLoopBB = nullptr; + Function *ParentFunc = PreLoopBB->getParent(); + LLVMContext &Ctx = PreLoopBB->getContext(); + + Type *TypeOfCopyLen = CopyLen->getType(); + Type *LoopOpType = + TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); + + unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); + uint64_t LoopEndCount = CopyLen->getZExtValue() / LoopOpSize; + + unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); + unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); + + if (LoopEndCount != 0) { + // Split + PostLoopBB = PreLoopBB->splitBasicBlock(InsertBefore, "memcpy-split"); + BasicBlock *LoopBB = + BasicBlock::Create(Ctx, "load-store-loop", ParentFunc, PostLoopBB); + PreLoopBB->getTerminator()->setSuccessor(0, LoopBB); + + IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); + + // Cast the Src and Dst pointers to pointers to the loop operand type (if + // needed). + PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); + PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); + if (SrcAddr->getType() != SrcOpType) { + SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); + } + if (DstAddr->getType() != DstOpType) { + DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); + } + + IRBuilder<> LoopBuilder(LoopBB); + PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 2, "loop-index"); + LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0U), PreLoopBB); + // Loop Body + Value *SrcGEP = + LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); + Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + Value *DstGEP = + LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); + LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + + Value *NewIndex = + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1U)); + LoopIndex->addIncoming(NewIndex, LoopBB); + + // Create the loop branch condition. + Constant *LoopEndCI = ConstantInt::get(TypeOfCopyLen, LoopEndCount); + LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, LoopEndCI), + LoopBB, PostLoopBB); + } + + uint64_t BytesCopied = LoopEndCount * LoopOpSize; + uint64_t RemainingBytes = CopyLen->getZExtValue() - BytesCopied; + if (RemainingBytes) { + IRBuilder<> RBuilder(PostLoopBB ? PostLoopBB->getFirstNonPHI() + : InsertBefore); + + // Update the alignment based on the copy size used in the loop body. + SrcAlign = std::min(SrcAlign, LoopOpSize); + DestAlign = std::min(DestAlign, LoopOpSize); + + SmallVector<Type *, 5> RemainingOps; + TTI.getMemcpyLoopResidualLoweringType(RemainingOps, Ctx, RemainingBytes, + SrcAlign, DestAlign); + + for (auto OpTy : RemainingOps) { + // Calaculate the new index + unsigned OperandSize = getLoopOperandSizeInBytes(OpTy); + uint64_t GepIndex = BytesCopied / OperandSize; + assert(GepIndex * OperandSize == BytesCopied && + "Division should have no Remainder!"); + // Cast source to operand type and load + PointerType *SrcPtrType = PointerType::get(OpTy, SrcAS); + Value *CastedSrc = SrcAddr->getType() == SrcPtrType + ? SrcAddr + : RBuilder.CreateBitCast(SrcAddr, SrcPtrType); + Value *SrcGEP = RBuilder.CreateInBoundsGEP( + OpTy, CastedSrc, ConstantInt::get(TypeOfCopyLen, GepIndex)); + Value *Load = RBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + + // Cast destination to operand type and store. + PointerType *DstPtrType = PointerType::get(OpTy, DstAS); + Value *CastedDst = DstAddr->getType() == DstPtrType + ? DstAddr + : RBuilder.CreateBitCast(DstAddr, DstPtrType); + Value *DstGEP = RBuilder.CreateInBoundsGEP( + OpTy, CastedDst, ConstantInt::get(TypeOfCopyLen, GepIndex)); + RBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + + BytesCopied += OperandSize; + } + } + assert(BytesCopied == CopyLen->getZExtValue() && + "Bytes copied should match size in the call!"); +} + +void llvm::createMemCpyLoopUnknownSize(Instruction *InsertBefore, + Value *SrcAddr, Value *DstAddr, + Value *CopyLen, unsigned SrcAlign, + unsigned DestAlign, bool SrcIsVolatile, + bool DstIsVolatile, + const TargetTransformInfo &TTI) { + BasicBlock *PreLoopBB = InsertBefore->getParent(); + BasicBlock *PostLoopBB = + PreLoopBB->splitBasicBlock(InsertBefore, "post-loop-memcpy-expansion"); + + Function *ParentFunc = PreLoopBB->getParent(); + LLVMContext &Ctx = PreLoopBB->getContext(); + + Type *LoopOpType = + TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAlign, DestAlign); + unsigned LoopOpSize = getLoopOperandSizeInBytes(LoopOpType); + + IRBuilder<> PLBuilder(PreLoopBB->getTerminator()); + + unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace(); + unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace(); + PointerType *SrcOpType = PointerType::get(LoopOpType, SrcAS); + PointerType *DstOpType = PointerType::get(LoopOpType, DstAS); + if (SrcAddr->getType() != SrcOpType) { + SrcAddr = PLBuilder.CreateBitCast(SrcAddr, SrcOpType); + } + if (DstAddr->getType() != DstOpType) { + DstAddr = PLBuilder.CreateBitCast(DstAddr, DstOpType); + } + + // Calculate the loop trip count, and remaining bytes to copy after the loop. + Type *CopyLenType = CopyLen->getType(); + IntegerType *ILengthType = dyn_cast<IntegerType>(CopyLenType); + assert(ILengthType && + "expected size argument to memcpy to be an integer type!"); + ConstantInt *CILoopOpSize = ConstantInt::get(ILengthType, LoopOpSize); + Value *RuntimeLoopCount = PLBuilder.CreateUDiv(CopyLen, CILoopOpSize); + Value *RuntimeResidual = PLBuilder.CreateURem(CopyLen, CILoopOpSize); + Value *RuntimeBytesCopied = PLBuilder.CreateSub(CopyLen, RuntimeResidual); + + BasicBlock *LoopBB = + BasicBlock::Create(Ctx, "loop-memcpy-expansion", ParentFunc, nullptr); + IRBuilder<> LoopBuilder(LoopBB); + + PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLenType, 2, "loop-index"); + LoopIndex->addIncoming(ConstantInt::get(CopyLenType, 0U), PreLoopBB); + + Value *SrcGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, SrcAddr, LoopIndex); + Value *Load = LoopBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + Value *DstGEP = LoopBuilder.CreateInBoundsGEP(LoopOpType, DstAddr, LoopIndex); + LoopBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + + Value *NewIndex = + LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLenType, 1U)); + LoopIndex->addIncoming(NewIndex, LoopBB); + + Type *Int8Type = Type::getInt8Ty(Ctx); + if (LoopOpType != Int8Type) { + // Loop body for the residual copy. + BasicBlock *ResLoopBB = BasicBlock::Create(Ctx, "loop-memcpy-residual", + PreLoopBB->getParent(), nullptr); + // Residual loop header. + BasicBlock *ResHeaderBB = BasicBlock::Create( + Ctx, "loop-memcpy-residual-header", PreLoopBB->getParent(), nullptr); + + // Need to update the pre-loop basic block to branch to the correct place. + // branch to the main loop if the count is non-zero, branch to the residual + // loop if the copy size is smaller then 1 iteration of the main loop but + // non-zero and finally branch to after the residual loop if the memcpy + // size is zero. + ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); + PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), + LoopBB, ResHeaderBB); + PreLoopBB->getTerminator()->eraseFromParent(); + + LoopBuilder.CreateCondBr( + LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, + ResHeaderBB); + + // Determine if we need to branch to the residual loop or bypass it. + IRBuilder<> RHBuilder(ResHeaderBB); + RHBuilder.CreateCondBr(RHBuilder.CreateICmpNE(RuntimeResidual, Zero), + ResLoopBB, PostLoopBB); + + // Copy the residual with single byte load/store loop. + IRBuilder<> ResBuilder(ResLoopBB); + PHINode *ResidualIndex = + ResBuilder.CreatePHI(CopyLenType, 2, "residual-loop-index"); + ResidualIndex->addIncoming(Zero, ResHeaderBB); + + Value *SrcAsInt8 = + ResBuilder.CreateBitCast(SrcAddr, PointerType::get(Int8Type, SrcAS)); + Value *DstAsInt8 = + ResBuilder.CreateBitCast(DstAddr, PointerType::get(Int8Type, DstAS)); + Value *FullOffset = ResBuilder.CreateAdd(RuntimeBytesCopied, ResidualIndex); + Value *SrcGEP = + ResBuilder.CreateInBoundsGEP(Int8Type, SrcAsInt8, FullOffset); + Value *Load = ResBuilder.CreateLoad(SrcGEP, SrcIsVolatile); + Value *DstGEP = + ResBuilder.CreateInBoundsGEP(Int8Type, DstAsInt8, FullOffset); + ResBuilder.CreateStore(Load, DstGEP, DstIsVolatile); + + Value *ResNewIndex = + ResBuilder.CreateAdd(ResidualIndex, ConstantInt::get(CopyLenType, 1U)); + ResidualIndex->addIncoming(ResNewIndex, ResLoopBB); + + // Create the loop branch condition. + ResBuilder.CreateCondBr( + ResBuilder.CreateICmpULT(ResNewIndex, RuntimeResidual), ResLoopBB, + PostLoopBB); + } else { + // In this case the loop operand type was a byte, and there is no need for a + // residual loop to copy the remaining memory after the main loop. + // We do however need to patch up the control flow by creating the + // terminators for the preloop block and the memcpy loop. + ConstantInt *Zero = ConstantInt::get(ILengthType, 0U); + PLBuilder.CreateCondBr(PLBuilder.CreateICmpNE(RuntimeLoopCount, Zero), + LoopBB, PostLoopBB); + PreLoopBB->getTerminator()->eraseFromParent(); + LoopBuilder.CreateCondBr( + LoopBuilder.CreateICmpULT(NewIndex, RuntimeLoopCount), LoopBB, + PostLoopBB); + } +} + void llvm::createMemCpyLoop(Instruction *InsertBefore, Value *SrcAddr, Value *DstAddr, Value *CopyLen, unsigned SrcAlign, unsigned DestAlign, @@ -208,15 +452,41 @@ static void createMemSetLoop(Instruction *InsertBefore, NewBB); } -void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy) { - createMemCpyLoop(/* InsertBefore */ Memcpy, - /* SrcAddr */ Memcpy->getRawSource(), - /* DstAddr */ Memcpy->getRawDest(), - /* CopyLen */ Memcpy->getLength(), - /* SrcAlign */ Memcpy->getAlignment(), - /* DestAlign */ Memcpy->getAlignment(), - /* SrcIsVolatile */ Memcpy->isVolatile(), - /* DstIsVolatile */ Memcpy->isVolatile()); +void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy, + const TargetTransformInfo &TTI) { + // Original implementation + if (!TTI.useWideIRMemcpyLoopLowering()) { + createMemCpyLoop(/* InsertBefore */ Memcpy, + /* SrcAddr */ Memcpy->getRawSource(), + /* DstAddr */ Memcpy->getRawDest(), + /* CopyLen */ Memcpy->getLength(), + /* SrcAlign */ Memcpy->getAlignment(), + /* DestAlign */ Memcpy->getAlignment(), + /* SrcIsVolatile */ Memcpy->isVolatile(), + /* DstIsVolatile */ Memcpy->isVolatile()); + } else { + if (ConstantInt *CI = dyn_cast<ConstantInt>(Memcpy->getLength())) { + createMemCpyLoopKnownSize(/* InsertBefore */ Memcpy, + /* SrcAddr */ Memcpy->getRawSource(), + /* DstAddr */ Memcpy->getRawDest(), + /* CopyLen */ CI, + /* SrcAlign */ Memcpy->getAlignment(), + /* DestAlign */ Memcpy->getAlignment(), + /* SrcIsVolatile */ Memcpy->isVolatile(), + /* DstIsVolatile */ Memcpy->isVolatile(), + /* TargetTransformInfo */ TTI); + } else { + createMemCpyLoopUnknownSize(/* InsertBefore */ Memcpy, + /* SrcAddr */ Memcpy->getRawSource(), + /* DstAddr */ Memcpy->getRawDest(), + /* CopyLen */ Memcpy->getLength(), + /* SrcAlign */ Memcpy->getAlignment(), + /* DestAlign */ Memcpy->getAlignment(), + /* SrcIsVolatile */ Memcpy->isVolatile(), + /* DstIsVolatile */ Memcpy->isVolatile(), + /* TargetTransfomrInfo */ TTI); + } + } } void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) { diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index e724b0a28c322..dee658f983932 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5754,8 +5754,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (BasicBlock *Dom = BB->getSinglePredecessor()) { auto *PBI = dyn_cast_or_null<BranchInst>(Dom->getTerminator()); if (PBI && PBI->isConditional() && - PBI->getSuccessor(0) != PBI->getSuccessor(1) && - (PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB)) { + PBI->getSuccessor(0) != PBI->getSuccessor(1)) { + assert(PBI->getSuccessor(0) == BB || PBI->getSuccessor(1) == BB); bool CondIsFalse = PBI->getSuccessor(1) == BB; Optional<bool> Implication = isImpliedCondition( PBI->getCondition(), BI->getCondition(), DL, CondIsFalse); diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index ec8b0d426265a..6d90e6b48358a 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -80,6 +81,7 @@ namespace { bool IsSigned); bool eliminateSDiv(BinaryOperator *SDiv); bool strengthenOverflowingOperation(BinaryOperator *OBO, Value *IVOperand); + bool strengthenRightShift(BinaryOperator *BO, Value *IVOperand); }; } @@ -154,6 +156,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { unsigned IVOperIdx = 0; ICmpInst::Predicate Pred = ICmp->getPredicate(); + ICmpInst::Predicate OriginalPred = Pred; if (IVOperand != ICmp->getOperand(0)) { // Swapped assert(IVOperand == ICmp->getOperand(1) && "Can't find IVOperand"); @@ -262,6 +265,16 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { ICmp->setPredicate(InvariantPredicate); ICmp->setOperand(0, NewLHS); ICmp->setOperand(1, NewRHS); + } else if (ICmpInst::isSigned(OriginalPred) && + SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) { + // If we were unable to make anything above, all we can is to canonicalize + // the comparison hoping that it will open the doors for other + // optimizations. If we find out that we compare two non-negative values, + // we turn the instruction's predicate to its unsigned version. Note that + // we cannot rely on Pred here unless we check if we have swapped it. + assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?"); + DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp << '\n'); + ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred)); } else return; @@ -583,6 +596,35 @@ bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO, return Changed; } +/// Annotate the Shr in (X << IVOperand) >> C as exact using the +/// information from the IV's range. Returns true if anything changed, false +/// otherwise. +bool SimplifyIndvar::strengthenRightShift(BinaryOperator *BO, + Value *IVOperand) { + using namespace llvm::PatternMatch; + + if (BO->getOpcode() == Instruction::Shl) { + bool Changed = false; + ConstantRange IVRange = SE->getUnsignedRange(SE->getSCEV(IVOperand)); + for (auto *U : BO->users()) { + const APInt *C; + if (match(U, + m_AShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C))) || + match(U, + m_LShr(m_Shl(m_Value(), m_Specific(IVOperand)), m_APInt(C)))) { + BinaryOperator *Shr = cast<BinaryOperator>(U); + if (!Shr->isExact() && IVRange.getUnsignedMin().uge(*C)) { + Shr->setIsExact(true); + Changed = true; + } + } + } + return Changed; + } + + return false; +} + /// Add all uses of Def to the current IV's worklist. static void pushIVUsers( Instruction *Def, @@ -675,8 +717,9 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) { } if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseOper.first)) { - if (isa<OverflowingBinaryOperator>(BO) && - strengthenOverflowingOperation(BO, IVOperand)) { + if ((isa<OverflowingBinaryOperator>(BO) && + strengthenOverflowingOperation(BO, IVOperand)) || + (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand))) { // re-queue uses of the now modified binary operator and fall // through to the checks that remain. pushIVUsers(IVOperand, Simplified, SimpleIVUsers); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index b723b65f35e59..77c0a41929ac7 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -656,7 +656,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilder<> &B) { ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); // memchr(x, y, 0) -> null - if (LenC && LenC->isNullValue()) + if (LenC && LenC->isZero()) return Constant::getNullValue(CI->getType()); // From now on we need at least constant length and string. @@ -2280,7 +2280,7 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI, return true; if (ConstantInt *ObjSizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(ObjSizeOp))) { - if (ObjSizeCI->isAllOnesValue()) + if (ObjSizeCI->isMinusOne()) return true; // If the object size wasn't -1 (unknown), bail out if we were asked to. if (OnlyLowerUnknownSize) diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp index 60d9ede2c4871..c3feea6a0a414 100644 --- a/lib/Transforms/Utils/VNCoercion.cpp +++ b/lib/Transforms/Utils/VNCoercion.cpp @@ -51,25 +51,24 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy, // If the store and reload are the same size, we can always reuse it. if (StoredValSize == LoadedValSize) { // Pointer to Pointer -> use bitcast. - if (StoredValTy->getScalarType()->isPointerTy() && - LoadedTy->getScalarType()->isPointerTy()) { + if (StoredValTy->isPtrOrPtrVectorTy() && LoadedTy->isPtrOrPtrVectorTy()) { StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy); } else { // Convert source pointers to integers, which can be bitcast. - if (StoredValTy->getScalarType()->isPointerTy()) { + if (StoredValTy->isPtrOrPtrVectorTy()) { StoredValTy = DL.getIntPtrType(StoredValTy); StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy); } Type *TypeToCastTo = LoadedTy; - if (TypeToCastTo->getScalarType()->isPointerTy()) + if (TypeToCastTo->isPtrOrPtrVectorTy()) TypeToCastTo = DL.getIntPtrType(TypeToCastTo); if (StoredValTy != TypeToCastTo) StoredVal = Helper.CreateBitCast(StoredVal, TypeToCastTo); // Cast to pointer if the load needs a pointer type. - if (LoadedTy->getScalarType()->isPointerTy()) + if (LoadedTy->isPtrOrPtrVectorTy()) StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy); } @@ -86,7 +85,7 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy, "canCoerceMustAliasedValueToLoad fail"); // Convert source pointers to integers, which can be manipulated. - if (StoredValTy->getScalarType()->isPointerTy()) { + if (StoredValTy->isPtrOrPtrVectorTy()) { StoredValTy = DL.getIntPtrType(StoredValTy); StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy); } @@ -112,7 +111,7 @@ static T *coerceAvailableValueToLoadTypeHelper(T *StoredVal, Type *LoadedTy, if (LoadedTy != NewIntTy) { // If the result is a pointer, inttoptr. - if (LoadedTy->getScalarType()->isPointerTy()) + if (LoadedTy->isPtrOrPtrVectorTy()) StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy); else // Otherwise, bitcast. @@ -316,7 +315,7 @@ static T *getStoreValueForLoadHelper(T *SrcVal, unsigned Offset, Type *LoadTy, uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy) + 7) / 8; // Compute which bits of the stored value are being used by the load. Convert // to an integer type to start with. - if (SrcVal->getType()->getScalarType()->isPointerTy()) + if (SrcVal->getType()->isPtrOrPtrVectorTy()) SrcVal = Helper.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType())); if (!SrcVal->getType()->isIntegerTy()) SrcVal = Helper.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8)); diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 193cc4d137870..eb82ee283d449 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5315,8 +5315,13 @@ void LoopVectorizationLegality::addInductionPhi( // Both the PHI node itself, and the "post-increment" value feeding // back into the PHI node may have external users. - AllowedExit.insert(Phi); - AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); + // We can allow those uses, except if the SCEVs we have for them rely + // on predicates that only hold within the loop, since allowing the exit + // currently means re-using this SCEV outside the loop. + if (PSE.getUnionPredicate().isAlwaysTrue()) { + AllowedExit.insert(Phi); + AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); + } DEBUG(dbgs() << "LV: Found an induction variable.\n"); return; diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index b494526369d6a..4425043ad39a0 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -860,7 +860,7 @@ private: bool tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP); /// Un-bundles a group of instructions. - void cancelScheduling(ArrayRef<Value *> VL); + void cancelScheduling(ArrayRef<Value *> VL, Value *OpValue); /// Extends the scheduling region so that V is inside the region. /// \returns true if the region size is within the limit. @@ -1258,7 +1258,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, cast<PHINode>(VL[j])->getIncomingValueForBlock(PH->getIncomingBlock(i))); if (Term) { DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n"); - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); return; } @@ -1284,7 +1284,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, if (Reuse) { DEBUG(dbgs() << "SLP: Reusing extract sequence.\n"); } else { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); } newTreeEntry(VL, Reuse, UserTreeIdx); return; @@ -1301,7 +1301,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, if (DL->getTypeSizeInBits(ScalarTy) != DL->getTypeAllocSizeInBits(ScalarTy)) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n"); return; @@ -1312,7 +1312,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) { LoadInst *L = cast<LoadInst>(VL[i]); if (!L->isSimple()) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n"); return; @@ -1349,7 +1349,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, break; } - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); if (ReverseConsecutive) { @@ -1376,7 +1376,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, for (unsigned i = 0; i < VL.size(); ++i) { Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType(); if (Ty != SrcTy || !isValidElementType(Ty)) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n"); return; @@ -1404,7 +1404,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, CmpInst *Cmp = cast<CmpInst>(VL[i]); if (Cmp->getPredicate() != P0 || Cmp->getOperand(0)->getType() != ComparedTy) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n"); return; @@ -1471,7 +1471,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, for (unsigned j = 0; j < VL.size(); ++j) { if (cast<Instruction>(VL[j])->getNumOperands() != 2) { DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n"); - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); return; } @@ -1484,7 +1484,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, Type *CurTy = cast<Instruction>(VL[j])->getOperand(0)->getType(); if (Ty0 != CurTy) { DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n"); - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); return; } @@ -1496,7 +1496,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, if (!isa<ConstantInt>(Op)) { DEBUG( dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n"); - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); return; } @@ -1518,7 +1518,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, // Check if the stores are consecutive or of we need to swizzle them. for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); return; @@ -1541,7 +1541,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, // represented by an intrinsic call Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); if (!isTriviallyVectorizable(ID)) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; @@ -1555,7 +1555,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, if (!CI2 || CI2->getCalledFunction() != Int || getVectorIntrinsicIDForCall(CI2, TLI) != ID || !CI->hasIdenticalOperandBundleSchema(*CI2)) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i] << "\n"); @@ -1566,7 +1566,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, if (hasVectorInstrinsicScalarOpd(ID, 1)) { Value *A1J = CI2->getArgOperand(1); if (A1I != A1J) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI << " argument "<< A1I<<"!=" << A1J @@ -1579,7 +1579,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, !std::equal(CI->op_begin() + CI->getBundleOperandsStartIndex(), CI->op_begin() + CI->getBundleOperandsEndIndex(), CI2->op_begin() + CI2->getBundleOperandsStartIndex())) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" << *CI << "!=" << *VL[i] << '\n'); @@ -1603,7 +1603,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, // If this is not an alternate sequence of opcode like add-sub // then do not vectorize this instruction. if (!isAltShuffle) { - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); return; @@ -1631,7 +1631,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth, return; } default: - BS.cancelScheduling(VL); + BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n"); return; @@ -3177,17 +3177,18 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, } } if (!Bundle->isReady()) { - cancelScheduling(VL); + cancelScheduling(VL, VL[0]); return false; } return true; } -void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL) { - if (isa<PHINode>(VL[0])) +void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL, + Value *OpValue) { + if (isa<PHINode>(OpValue)) return; - ScheduleData *Bundle = getScheduleData(VL[0]); + ScheduleData *Bundle = getScheduleData(OpValue); DEBUG(dbgs() << "SLP: cancel scheduling of " << *Bundle << "\n"); assert(!Bundle->IsScheduled && "Can't cancel bundle which is already scheduled"); |