diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 198 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 27 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/InstrEmitter.h | 2 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 2 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 2 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 2 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SDNodeDbgValue.h | 2 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 2 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 95 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 156 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 5 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 10 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp | 2 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/StatepointLowering.cpp | 187 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/TargetLowering.cpp | 5 |
15 files changed, 444 insertions, 253 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a71c6761c75f..5fea52c97496 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -255,6 +255,7 @@ namespace { SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); SDValue visitRotate(SDNode *N); + SDValue visitBSWAP(SDNode *N); SDValue visitCTLZ(SDNode *N); SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); SDValue visitCTTZ(SDNode *N); @@ -387,6 +388,13 @@ namespace { unsigned SequenceNum; }; + /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a + /// constant build_vector of the stored constant values in Stores. + SDValue getMergedConstantVectorStore(SelectionDAG &DAG, + SDLoc SL, + ArrayRef<MemOpLink> Stores, + EVT Ty) const; + /// This is a helper function for MergeConsecutiveStores. When the source /// elements of the consecutive stores are all constants or all extracted /// vector elements, try to merge them into one larger store. @@ -395,6 +403,13 @@ namespace { EVT MemVT, unsigned NumElem, bool IsConstantSrc, bool UseVector); + /// This is a helper function for MergeConsecutiveStores. + /// Stores that may be merged are placed in StoreNodes. + /// Loads that may alias with those stores are placed in AliasLoadNodes. + void getStoreMergeAndAliasCandidates( + StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, + SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes); + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return True if some memory operations were changed. @@ -444,7 +459,7 @@ namespace { return TLI.getSetCCResultType(*DAG.getContext(), VT); } }; -} +} // namespace namespace { @@ -460,7 +475,7 @@ public: DC.removeFromWorklist(N); } }; -} +} // namespace //===----------------------------------------------------------------------===// // TargetLowering::DAGCombinerInfo implementation @@ -1335,6 +1350,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SRL: return visitSRL(N); case ISD::ROTR: case ISD::ROTL: return visitRotate(N); + case ISD::BSWAP: return visitBSWAP(N); case ISD::CTLZ: return visitCTLZ(N); case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); case ISD::CTTZ: return visitCTTZ(N); @@ -1454,12 +1470,9 @@ SDValue DAGCombiner::combine(SDNode *N) { if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { SDValue Ops[] = {N1, N0}; SDNode *CSENode; - if (const BinaryWithFlagsSDNode *BinNode = - dyn_cast<BinaryWithFlagsSDNode>(N)) { + if (const auto *BinNode = dyn_cast<BinaryWithFlagsSDNode>(N)) { CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, - BinNode->Flags.hasNoUnsignedWrap(), - BinNode->Flags.hasNoSignedWrap(), - BinNode->Flags.hasExact()); + &BinNode->Flags); } else { CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); } @@ -4764,6 +4777,19 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitBSWAP(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (bswap c1) -> c2 + if (isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0); + // fold (bswap (bswap x)) -> x + if (N0.getOpcode() == ISD::BSWAP) + return N0->getOperand(0); + return SDValue(); +} + SDValue DAGCombiner::visitCTLZ(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -5141,7 +5167,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MSC->getPointerInfo(), + getMachineMemOperand(MSC->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, MSC->getAAInfo(), MSC->getRanges()); @@ -5280,7 +5306,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MGT->getPointerInfo(), + getMachineMemOperand(MGT->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); @@ -8078,7 +8104,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); auto *BV00 = dyn_cast<BuildVectorSDNode>(N00); auto *BV01 = dyn_cast<BuildVectorSDNode>(N01); - + // Check 1: Make sure that the first operand of the inner multiply is NOT // a constant. Otherwise, we may induce infinite looping. if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) { @@ -9928,7 +9954,7 @@ struct LoadedSlice { return true; } }; -} +} // namespace /// \brief Check that all bits set in \p UsedBits form a dense region, i.e., /// \p UsedBits looks like 0..0 1..1 0..0. @@ -10576,6 +10602,18 @@ struct BaseIndexOffset { }; } // namespace +SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG, + SDLoc SL, + ArrayRef<MemOpLink> Stores, + EVT Ty) const { + SmallVector<SDValue, 8> BuildVector; + + for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) + BuildVector.push_back(cast<StoreSDNode>(Stores[I].MemNode)->getValue()); + + return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector); +} + bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumElem, bool IsConstantSrc, bool UseVector) { @@ -10606,12 +10644,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); if (IsConstantSrc) { - // A vector store with a constant source implies that the constant is - // zero; we only handle merging stores of constant zeros because the zero - // can be materialized without a load. - // It may be beneficial to loosen this restriction to allow non-zero - // store merging. - StoredVal = DAG.getConstant(0, DL, Ty); + StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty); } else { SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i < NumElem ; ++i) { @@ -10631,8 +10664,8 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( // elements, so this path implies a store of constants. assert(IsConstantSrc && "Merged vector elements should use vector store"); - unsigned StoreBW = NumElem * ElementSizeBytes * 8; - APInt StoreInt(StoreBW, 0); + unsigned SizeInBits = NumElem * ElementSizeBytes * 8; + APInt StoreInt(SizeInBits, 0); // Construct a single integer constant which is made of the smaller // constant inputs. @@ -10641,18 +10674,18 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( unsigned Idx = IsLE ? (NumElem - 1 - i) : i; StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); SDValue Val = St->getValue(); - StoreInt <<= ElementSizeBytes*8; + StoreInt <<= ElementSizeBytes * 8; if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { - StoreInt |= C->getAPIntValue().zext(StoreBW); + StoreInt |= C->getAPIntValue().zext(SizeInBits); } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { - StoreInt |= C->getValueAPF().bitcastToAPInt().zext(StoreBW); + StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits); } else { llvm_unreachable("Invalid constant element type"); } } // Create the new Load and Store operations. - EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); } @@ -10698,62 +10731,25 @@ static bool allowableAlignment(const SelectionDAG &DAG, return (Align >= ABIAlignment); } -bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { - if (OptLevel == CodeGenOpt::None) - return false; - - EVT MemVT = St->getMemoryVT(); - int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; - bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( - Attribute::NoImplicitFloat); - - // This function cannot currently deal with non-byte-sized memory sizes. - if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) - return false; - - // Don't merge vectors into wider inputs. - if (MemVT.isVector() || !MemVT.isSimple()) - return false; - - // Perform an early exit check. Do not bother looking at stored values that - // are not constants, loads, or extracted vector elements. - SDValue StoredVal = St->getValue(); - bool IsLoadSrc = isa<LoadSDNode>(StoredVal); - bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || - isa<ConstantFPSDNode>(StoredVal); - bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT); - - if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc) - return false; - - // Only look at ends of store sequences. - SDValue Chain = SDValue(St, 0); - if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) - return false; - +void DAGCombiner::getStoreMergeAndAliasCandidates( + StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, + SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); // We must have a base and an offset. if (!BasePtr.Base.getNode()) - return false; + return; // Do not handle stores to undef base pointers. if (BasePtr.Base.getOpcode() == ISD::UNDEF) - return false; - - // Save the LoadSDNodes that we find in the chain. - // We need to make sure that these nodes do not interfere with - // any of the store nodes. - SmallVector<LSBaseSDNode*, 8> AliasLoadNodes; - - // Save the StoreSDNodes that we find in the chain. - SmallVector<MemOpLink, 8> StoreNodes; + return; // Walk up the chain and look for nodes with offsets from the same // base pointer. Stop when reaching an instruction with a different kind // or instruction which has a different base pointer. + EVT MemVT = St->getMemoryVT(); unsigned Seq = 0; StoreSDNode *Index = St; while (Index) { @@ -10810,7 +10806,51 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { } } } +} + +bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { + if (OptLevel == CodeGenOpt::None) + return false; + + EVT MemVT = St->getMemoryVT(); + int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; + bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( + Attribute::NoImplicitFloat); + + // This function cannot currently deal with non-byte-sized memory sizes. + if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) + return false; + + // Don't merge vectors into wider inputs. + if (MemVT.isVector() || !MemVT.isSimple()) + return false; + + // Perform an early exit check. Do not bother looking at stored values that + // are not constants, loads, or extracted vector elements. + SDValue StoredVal = St->getValue(); + bool IsLoadSrc = isa<LoadSDNode>(StoredVal); + bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || + isa<ConstantFPSDNode>(StoredVal); + bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT); + + if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc) + return false; + + // Only look at ends of store sequences. + SDValue Chain = SDValue(St, 0); + if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) + return false; + + // Save the LoadSDNodes that we find in the chain. + // We need to make sure that these nodes do not interfere with + // any of the store nodes. + SmallVector<LSBaseSDNode*, 8> AliasLoadNodes; + + // Save the StoreSDNodes that we find in the chain. + SmallVector<MemOpLink, 8> StoreNodes; + getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes); + // Check if there is anything to merge. if (StoreNodes.size() < 2) return false; @@ -10876,8 +10916,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { } // Find a legal type for the constant store. - unsigned StoreBW = (i+1) * ElementSizeBytes * 8; - EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; + EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); if (TLI.isTypeLegal(StoreTy) && allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign)) { @@ -11039,8 +11079,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { } // Find a legal type for the integer store. - unsigned StoreBW = (i+1) * ElementSizeBytes * 8; - StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; + StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); if (TLI.isTypeLegal(StoreTy) && allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) && allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) @@ -11094,8 +11134,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (UseVectorTy) { JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); } else { - unsigned StoreBW = NumElem * ElementSizeBytes * 8; - JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + unsigned SizeInBits = NumElem * ElementSizeBytes * 8; + JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); } SDLoc LoadDL(LoadNodes[0].MemNode); @@ -12093,7 +12133,7 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { } // If any of the operands is a floating point scalar bitcast to a vector, - // use floating point types throughout, and bitcast everything. + // use floating point types throughout, and bitcast everything. // Replace UNDEFs by another scalar UNDEF node, of the final desired type. if (AnyFP) { SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); @@ -12924,7 +12964,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { SDValue RHS = N->getOperand(1); SDLoc dl(N); - // Make sure we're not running after operation legalization where it + // Make sure we're not running after operation legalization where it // may have custom lowered the vector shuffles. if (LegalOperations) return SDValue(); @@ -13845,12 +13885,10 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) + Op1->getSrcValueOffset() - MinOffset; AliasAnalysis::AliasResult AAResult = - AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(), - Overlap1, - UseTBAA ? Op0->getAAInfo() : AAMDNodes()), - AliasAnalysis::Location(Op1->getMemOperand()->getValue(), - Overlap2, - UseTBAA ? Op1->getAAInfo() : AAMDNodes())); + AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1, + UseTBAA ? Op0->getAAInfo() : AAMDNodes()), + MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2, + UseTBAA ? Op1->getAAInfo() : AAMDNodes())); if (AAResult == AliasAnalysis::NoAlias) return false; } diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index f3d75cb32a7d..ecaa2c972719 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -259,20 +259,27 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // If this is an MSVC EH personality, we need to do a bit more work. EHPersonality Personality = EHPersonality::Unknown; - if (!LPads.empty()) - Personality = classifyEHPersonality(LPads.back()->getPersonalityFn()); + if (Fn->hasPersonalityFn()) + Personality = classifyEHPersonality(Fn->getPersonalityFn()); if (!isMSVCEHPersonality(Personality)) return; - if (Personality == EHPersonality::MSVC_Win64SEH) { + if (Personality == EHPersonality::MSVC_Win64SEH || + Personality == EHPersonality::MSVC_X86SEH) { addSEHHandlersForLPads(LPads); - } else if (Personality == EHPersonality::MSVC_CXX) { + } + + WinEHFuncInfo &EHInfo = MMI.getWinEHFuncInfo(&fn); + if (Personality == EHPersonality::MSVC_CXX) { const Function *WinEHParentFn = MMI.getWinEHParent(&fn); - WinEHFuncInfo &EHInfo = MMI.getWinEHFuncInfo(WinEHParentFn); calculateWinCXXEHStateNumbers(WinEHParentFn, EHInfo); + } - // Copy the state numbers to LandingPadInfo for the current function, which - // could be a handler or the parent. + // Copy the state numbers to LandingPadInfo for the current function, which + // could be a handler or the parent. This should happen for 32-bit SEH and + // C++ EH. + if (Personality == EHPersonality::MSVC_CXX || + Personality == EHPersonality::MSVC_X86SEH) { for (const LandingPadInst *LP : LPads) { MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()]; MMI.addWinEHState(LPadMBB, EHInfo.LandingPadStateMap[LP]); @@ -539,8 +546,10 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I, /// landingpad instruction and add them to the specified machine module info. void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, MachineBasicBlock *MBB) { - MMI.addPersonality(MBB, - cast<Function>(I.getPersonalityFn()->stripPointerCasts())); + MMI.addPersonality( + MBB, + cast<Function>( + I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts())); if (I.isCleanup()) MMI.addCleanup(MBB); diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index 7b86f7dd8de0..2a61914eecd3 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -140,6 +140,6 @@ private: DenseMap<SDValue, unsigned> &VRBaseMap); }; -} +} // namespace llvm #endif diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 7d98872f8af1..37f95e5a22b9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -198,7 +198,7 @@ public: ReplacedNode(Old); } }; -} +} // namespace /// Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 9c297698c1db..c3e3b7c525b9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -676,7 +676,7 @@ namespace { NodesToAnalyze.insert(N); } }; -} +} // namespace /// ReplaceValueWith - The specified value was legalized to the specified other diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index c06227bd9701..50ad2391d15b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1010,7 +1010,7 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } -} +} // namespace bool SelectionDAG::LegalizeVectors() { return VectorLegalizer(*this).Run(); diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index c27f8de601f2..949353256938 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -119,6 +119,6 @@ public: bool isInvalidated() const { return Invalid; } }; -} // end llvm namespace +} // namespace llvm #endif diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 6351fa2c4a2f..4c74182014a0 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -180,6 +180,6 @@ namespace llvm { void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, MachineBasicBlock::iterator InsertPos); }; -} +} // namespace llvm #endif diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index cf51e756d847..0eff930ceddd 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -400,19 +400,24 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID, ID.AddInteger(Op.getResNo()); } } +/// Add logical or fast math flag values to FoldingSetNodeID value. +static void AddNodeIDFlags(FoldingSetNodeID &ID, unsigned Opcode, + const SDNodeFlags *Flags) { + if (!Flags || !isBinOpWithFlags(Opcode)) + return; -static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, bool nuw, bool nsw, - bool exact) { - ID.AddBoolean(nuw); - ID.AddBoolean(nsw); - ID.AddBoolean(exact); + unsigned RawFlags = Flags->getRawFlags(); + // If no flags are set, do not alter the ID. We must match the ID of nodes + // that were created without explicitly specifying flags. This also saves time + // and allows a gradual increase in API usage of the optional optimization + // flags. + if (RawFlags != 0) + ID.AddInteger(RawFlags); } -/// AddBinaryNodeIDCustom - Add BinarySDNodes special infos -static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, unsigned Opcode, - bool nuw, bool nsw, bool exact) { - if (isBinOpWithFlags(Opcode)) - AddBinaryNodeIDCustom(ID, nuw, nsw, exact); +static void AddNodeIDFlags(FoldingSetNodeID &ID, const SDNode *N) { + if (auto *Node = dyn_cast<BinaryWithFlagsSDNode>(N)) + AddNodeIDFlags(ID, Node->getOpcode(), &Node->Flags); } static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC, @@ -507,20 +512,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(ST->getPointerInfo().getAddrSpace()); break; } - case ISD::SDIV: - case ISD::UDIV: - case ISD::SRA: - case ISD::SRL: - case ISD::MUL: - case ISD::ADD: - case ISD::SUB: - case ISD::SHL: { - const BinaryWithFlagsSDNode *BinNode = cast<BinaryWithFlagsSDNode>(N); - AddBinaryNodeIDCustom( - ID, N->getOpcode(), BinNode->Flags.hasNoUnsignedWrap(), - BinNode->Flags.hasNoSignedWrap(), BinNode->Flags.hasExact()); - break; - } case ISD::ATOMIC_CMP_SWAP: case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: case ISD::ATOMIC_SWAP: @@ -564,6 +555,8 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { } } // end switch (N->getOpcode()) + AddNodeIDFlags(ID, N); + // Target specific memory nodes could also have address spaces to check. if (N->isTargetMemoryOpcode()) ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace()); @@ -960,14 +953,16 @@ void SelectionDAG::allnodes_clear() { BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL, SDVTList VTs, SDValue N1, - SDValue N2, bool nuw, bool nsw, - bool exact) { + SDValue N2, + const SDNodeFlags *Flags) { if (isBinOpWithFlags(Opcode)) { + // If no flags were passed in, use a default flags object. + SDNodeFlags F; + if (Flags == nullptr) + Flags = &F; + BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode( - Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); - FN->Flags.setNoUnsignedWrap(nuw); - FN->Flags.setNoSignedWrap(nsw); - FN->Flags.setExact(exact); + Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2, *Flags); return FN; } @@ -2932,6 +2927,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, case ISD::TRUNCATE: case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: + case ISD::BSWAP: case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: case ISD::CTTZ: @@ -3081,6 +3077,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); break; + case ISD::BSWAP: + assert(VT.isInteger() && VT == Operand.getValueType() && + "Invalid BSWAP!"); + assert((VT.getScalarSizeInBits() % 16 == 0) && + "BSWAP types must be a multiple of 16 bits!"); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + break; case ISD::BITCAST: // Basic sanity checking. assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits() @@ -3260,7 +3264,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, - SDValue N2, bool nuw, bool nsw, bool exact) { + SDValue N2, const SDNodeFlags *Flags) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); switch (Opcode) { @@ -3747,22 +3751,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, // Memoize this node if possible. BinarySDNode *N; SDVTList VTs = getVTList(VT); - const bool BinOpHasFlags = isBinOpWithFlags(Opcode); if (VT != MVT::Glue) { SDValue Ops[] = {N1, N2}; FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); - if (BinOpHasFlags) - AddBinaryNodeIDCustom(ID, Opcode, nuw, nsw, exact); + AddNodeIDFlags(ID, Opcode, Flags); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) return SDValue(E, 0); - N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); + N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags); CSEMap.InsertNode(N, IP); } else { - N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); + N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags); } InsertNode(N); @@ -4023,10 +4025,10 @@ static bool isMemSrcFromString(SDValue Src, StringRef &Str) { return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false); } -/// FindOptimalMemOpLowering - Determines the optimial series memory ops -/// to replace the memset / memcpy. Return true if the number of memory ops -/// is below the threshold. It returns the types of the sequence of -/// memory ops to perform memset / memcpy by reference. +/// Determines the optimal series of memory ops to replace the memset / memcpy. +/// Return true if the number of memory ops is below the threshold (Limit). +/// It returns the types of the sequence of memory ops to perform +/// memset / memcpy by reference. static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, unsigned SrcAlign, @@ -6066,13 +6068,12 @@ SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT, /// getNodeIfExists - Get the specified node if it's already available, or /// else return NULL. SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, - ArrayRef<SDValue> Ops, bool nuw, bool nsw, - bool exact) { + ArrayRef<SDValue> Ops, + const SDNodeFlags *Flags) { if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops); - if (isBinOpWithFlags(Opcode)) - AddBinaryNodeIDCustom(ID, nuw, nsw, exact); + AddNodeIDFlags(ID, Opcode, Flags); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP)) return E; @@ -6133,7 +6134,7 @@ public: : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {} }; -} +} // namespace /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. /// This can cause recursive merging of nodes in the DAG. @@ -6343,7 +6344,7 @@ namespace { bool operator<(const UseMemo &L, const UseMemo &R) { return (intptr_t)L.User < (intptr_t)R.User; } -} +} // namespace /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving /// uses of other values produced by From.getNode() alone. The same value @@ -6588,7 +6589,7 @@ namespace { VTs.push_back(MVT((MVT::SimpleValueType)i)); } }; -} +} // namespace static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs; static ManagedStatic<EVTArray> SimpleVTArray; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 8ba957d62870..8313a48c3467 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -78,12 +78,16 @@ LimitFPPrecision("limit-float-precision", cl::location(LimitFloatPrecision), cl::init(0)); +static cl::opt<bool> +EnableFMFInDAG("enable-fmf-dag", cl::init(false), cl::Hidden, + cl::desc("Enable fast-math-flags for DAG nodes")); + // Limit the width of DAG chains. This is important in general to prevent -// prevent DAG-based analysis from blowing up. For example, alias analysis and +// DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to // recognize and avoid this situation within each individual analysis, and // future analyses are likely to have the same behavior. Limiting DAG width is -// the safe approach, and will be especially important with global DAGs. +// the safe approach and will be especially important with global DAGs. // // MaxParallelChains default is arbitrarily high to avoid affecting // optimization, but could be lowered to improve compile time. Any ld-ld-st-st @@ -2148,6 +2152,8 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { bool nuw = false; bool nsw = false; bool exact = false; + FastMathFlags FMF; + if (const OverflowingBinaryOperator *OFBinOp = dyn_cast<const OverflowingBinaryOperator>(&I)) { nuw = OFBinOp->hasNoUnsignedWrap(); @@ -2156,9 +2162,22 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { if (const PossiblyExactOperator *ExactOp = dyn_cast<const PossiblyExactOperator>(&I)) exact = ExactOp->isExact(); - + if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I)) + FMF = FPOp->getFastMathFlags(); + + SDNodeFlags Flags; + Flags.setExact(exact); + Flags.setNoSignedWrap(nsw); + Flags.setNoUnsignedWrap(nuw); + if (EnableFMFInDAG) { + Flags.setAllowReciprocal(FMF.allowReciprocal()); + Flags.setNoInfs(FMF.noInfs()); + Flags.setNoNaNs(FMF.noNaNs()); + Flags.setNoSignedZeros(FMF.noSignedZeros()); + Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); + } SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), - Op1, Op2, nuw, nsw, exact); + Op1, Op2, &Flags); setValue(&I, BinNodeValue); } @@ -2206,9 +2225,12 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { dyn_cast<const PossiblyExactOperator>(&I)) exact = ExactOp->isExact(); } - + SDNodeFlags Flags; + Flags.setExact(exact); + Flags.setNoSignedWrap(nsw); + Flags.setNoUnsignedWrap(nuw); SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, - nuw, nsw, exact); + &Flags); setValue(&I, Res); } @@ -2892,7 +2914,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // Serialize volatile loads with other side effects. Root = getRoot(); else if (AA->pointsToConstantMemory( - AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), AAInfo))) { + MemoryLocation(SV, AA->getTypeStoreSize(Ty), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -2907,8 +2929,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG); SmallVector<SDValue, 4> Values(NumValues); - SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), - NumValues)); + SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); EVT PtrVT = Ptr.getValueType(); unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { @@ -2972,8 +2993,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SDValue Ptr = getValue(PtrV); SDValue Root = getRoot(); - SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), - NumValues)); + SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); EVT PtrVT = Ptr.getValueType(); bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; @@ -3141,10 +3161,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); SDValue InChain = DAG.getRoot(); - if (AA->pointsToConstantMemory( - AliasAnalysis::Location(PtrOperand, - AA->getTypeStoreSize(I.getType()), - AAInfo))) { + if (AA->pointsToConstantMemory(MemoryLocation( + PtrOperand, AA->getTypeStoreSize(I.getType()), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. InChain = DAG.getEntryNode(); } @@ -3186,10 +3204,9 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { Value *BasePtr = Ptr; bool UniformBase = getUniformBase(BasePtr, Base, Index, this); bool ConstantMemory = false; - if (UniformBase && AA->pointsToConstantMemory( - AliasAnalysis::Location(BasePtr, - AA->getTypeStoreSize(I.getType()), - AAInfo))) { + if (UniformBase && + AA->pointsToConstantMemory( + MemoryLocation(BasePtr, AA->getTypeStoreSize(I.getType()), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -4983,6 +5000,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { assert(Reg && "cannot get exception code on this platform"); MVT PtrVT = TLI.getPointerTy(); const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); + assert(FuncInfo.MBB->isLandingPad() && "eh.exceptioncode in non-lpad"); unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC); SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT); @@ -7486,6 +7504,31 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, const int64_t N = Clusters.size(); const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries(); + // TotalCases[i]: Total nbr of cases in Clusters[0..i]. + SmallVector<unsigned, 8> TotalCases(N); + + for (unsigned i = 0; i < N; ++i) { + APInt Hi = Clusters[i].High->getValue(); + APInt Lo = Clusters[i].Low->getValue(); + TotalCases[i] = (Hi - Lo).getLimitedValue() + 1; + if (i != 0) + TotalCases[i] += TotalCases[i - 1]; + } + + if (N >= MinJumpTableSize && isDense(Clusters, &TotalCases[0], 0, N - 1)) { + // Cheap case: the whole range might be suitable for jump table. + CaseCluster JTCluster; + if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { + Clusters[0] = JTCluster; + Clusters.resize(1); + return; + } + } + + // The algorithm below is not suitable for -O0. + if (TM.getOptLevel() == CodeGenOpt::None) + return; + // Split Clusters into minimum number of dense partitions. The algorithm uses // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code // for the Case Statement'" (1994), but builds the MinPartitions array in @@ -7499,16 +7542,6 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, SmallVector<unsigned, 8> LastElement(N); // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1]. SmallVector<unsigned, 8> NumTables(N); - // TotalCases[i]: Total nbr of cases in Clusters[0..i]. - SmallVector<unsigned, 8> TotalCases(N); - - for (unsigned i = 0; i < N; ++i) { - APInt Hi = Clusters[i].High->getValue(); - APInt Lo = Clusters[i].Low->getValue(); - TotalCases[i] = (Hi - Lo).getLimitedValue() + 1; - if (i != 0) - TotalCases[i] += TotalCases[i - 1]; - } // Base case: There is only one way to partition Clusters[N-1]. MinPartitions[N - 1] = 1; @@ -7696,6 +7729,10 @@ void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue())); #endif + // The algorithm below is not suitable for -O0. + if (TM.getOptLevel() == CodeGenOpt::None) + return; + // If target does not have legal shift left, do not emit bit tests at all. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PTy = TLI.getPointerTy(); @@ -7959,6 +7996,18 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } } +unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC, + CaseClusterIt First, + CaseClusterIt Last) { + return std::count_if(First, Last + 1, [&](const CaseCluster &X) { + if (X.Weight != CC.Weight) + return X.Weight > CC.Weight; + + // Ties are broken by comparing the case value. + return X.Low->getValue().slt(CC.Low->getValue()); + }); +} + void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W, Value *Cond, @@ -7988,6 +8037,48 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, RightWeight += (--FirstRight)->Weight; I++; } + + for (;;) { + // Our binary search tree differs from a typical BST in that ours can have up + // to three values in each leaf. The pivot selection above doesn't take that + // into account, which means the tree might require more nodes and be less + // efficient. We compensate for this here. + + unsigned NumLeft = LastLeft - W.FirstCluster + 1; + unsigned NumRight = W.LastCluster - FirstRight + 1; + + if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) { + // If one side has less than 3 clusters, and the other has more than 3, + // consider taking a cluster from the other side. + + if (NumLeft < NumRight) { + // Consider moving the first cluster on the right to the left side. + CaseCluster &CC = *FirstRight; + unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); + unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); + if (LeftSideRank <= RightSideRank) { + // Moving the cluster to the left does not demote it. + ++LastLeft; + ++FirstRight; + continue; + } + } else { + assert(NumRight < NumLeft); + // Consider moving the last element on the left to the right side. + CaseCluster &CC = *LastLeft; + unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); + unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); + if (RightSideRank <= LeftSideRank) { + // Moving the cluster to the right does not demot it. + --LastLeft; + --FirstRight; + continue; + } + } + } + break; + } + assert(LastLeft + 1 == FirstRight); assert(LastLeft >= W.FirstCluster); assert(FirstRight <= W.LastCluster); @@ -8111,11 +8202,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { return; } - if (TM.getOptLevel() != CodeGenOpt::None) { - findJumpTables(Clusters, &SI, DefaultMBB); - findBitTestClusters(Clusters, &SI); - } - + findJumpTables(Clusters, &SI, DefaultMBB); + findBitTestClusters(Clusters, &SI); DEBUG({ dbgs() << "Case clusters: "; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index f0c03af3f64b..f225d54d189d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -342,6 +342,11 @@ private: }; typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList; + /// Determine the rank by weight of CC in [First,Last]. If CC has more weight + /// than each cluster in the range, its rank is 0. + static unsigned caseClusterRank(const CaseCluster &CC, CaseClusterIt First, + CaseClusterIt Last); + /// Emit comparison and split W into two subtrees. void splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W, Value *Cond, MachineBasicBlock *SwitchMBB); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 22f592afae71..c5562cd31067 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -307,7 +307,7 @@ namespace llvm { "Unknown sched type!"); return createILPListDAGScheduler(IS, OptLevel); } -} +} // namespace llvm // EmitInstrWithCustomInserter - This method should be implemented by targets // that mark instructions with the 'usesCustomInserter' flag. These @@ -938,8 +938,10 @@ bool SelectionDAGISel::PrepareEHLandingPad() { // pad into several BBs. const BasicBlock *LLVMBB = MBB->getBasicBlock(); const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst(); - MF->getMMI().addPersonality( - MBB, cast<Function>(LPadInst->getPersonalityFn()->stripPointerCasts())); + MF->getMMI().addPersonality(MBB, cast<Function>(LPadInst->getParent() + ->getParent() + ->getPersonalityFn() + ->stripPointerCasts())); EHPersonality Personality = MF->getMMI().getPersonalityType(); if (isMSVCEHPersonality(Personality)) { @@ -2540,7 +2542,7 @@ public: J.setNode(E); } }; -} +} // namespace SDNode *SelectionDAGISel:: SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 4df5ede388fc..19b5d160c8a9 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -132,7 +132,7 @@ namespace llvm { "color=blue,style=dashed"); } }; -} +} // namespace llvm std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, const SelectionDAG *G) { diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 8bbfa01e7594..a6b3fc6c4d4a 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -113,84 +113,137 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, llvm_unreachable("infinite loop?"); } +/// Utility function for reservePreviousStackSlotForValue. Tries to find +/// stack slot index to which we have spilled value for previous statepoints. +/// LookUpDepth specifies maximum DFS depth this function is allowed to look. +static Optional<int> findPreviousSpillSlot(const Value *Val, + SelectionDAGBuilder &Builder, + int LookUpDepth) { + // Can not look any futher - give up now + if (LookUpDepth <= 0) + return Optional<int>(); + + // Spill location is known for gc relocates + if (isGCRelocate(Val)) { + GCRelocateOperands RelocOps(cast<Instruction>(Val)); + + FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = + Builder.FuncInfo.StatepointRelocatedValues[RelocOps.getStatepoint()]; + + auto It = SpillMap.find(RelocOps.getDerivedPtr()); + if (It == SpillMap.end()) + return Optional<int>(); + + return It->second; + } + + // Look through bitcast instructions. + if (const BitCastInst *Cast = dyn_cast<BitCastInst>(Val)) { + return findPreviousSpillSlot(Cast->getOperand(0), Builder, LookUpDepth - 1); + } + + // Look through phi nodes + // All incoming values should have same known stack slot, otherwise result + // is unknown. + if (const PHINode *Phi = dyn_cast<PHINode>(Val)) { + Optional<int> MergedResult = None; + + for (auto &IncomingValue : Phi->incoming_values()) { + Optional<int> SpillSlot = + findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1); + if (!SpillSlot.hasValue()) + return Optional<int>(); + + if (MergedResult.hasValue() && *MergedResult != *SpillSlot) + return Optional<int>(); + + MergedResult = SpillSlot; + } + return MergedResult; + } + + // TODO: We can do better for PHI nodes. In cases like this: + // ptr = phi(relocated_pointer, not_relocated_pointer) + // statepoint(ptr) + // We will return that stack slot for ptr is unknown. And later we might + // assign different stack slots for ptr and relocated_pointer. This limits + // llvm's ability to remove redundant stores. + // Unfortunately it's hard to accomplish in current infrastructure. + // We use this function to eliminate spill store completely, while + // in example we still need to emit store, but instead of any location + // we need to use special "preferred" location. + + // TODO: handle simple updates. If a value is modified and the original + // value is no longer live, it would be nice to put the modified value in the + // same slot. This allows folding of the memory accesses for some + // instructions types (like an increment). + // statepoint (i) + // i1 = i+1 + // statepoint (i1) + // However we need to be careful for cases like this: + // statepoint(i) + // i1 = i+1 + // statepoint(i, i1) + // Here we want to reserve spill slot for 'i', but not for 'i+1'. If we just + // put handling of simple modifications in this function like it's done + // for bitcasts we might end up reserving i's slot for 'i+1' because order in + // which we visit values is unspecified. + + // Don't know any information about this instruction + return Optional<int>(); +} + /// Try to find existing copies of the incoming values in stack slots used for /// statepoint spilling. If we can find a spill slot for the incoming value, /// mark that slot as allocated, and reuse the same slot for this safepoint. /// This helps to avoid series of loads and stores that only serve to resuffle /// values on the stack between calls. -static void reservePreviousStackSlotForValue(SDValue Incoming, +static void reservePreviousStackSlotForValue(const Value *IncomingValue, SelectionDAGBuilder &Builder) { + SDValue Incoming = Builder.getValue(IncomingValue); + if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) { // We won't need to spill this, so no need to check for previously // allocated stack slots return; } - SDValue Loc = Builder.StatepointLowering.getLocation(Incoming); - if (Loc.getNode()) { + SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming); + if (OldLocation.getNode()) // duplicates in input return; - } - - // Search back for the load from a stack slot pattern to find the original - // slot we allocated for this value. We could extend this to deal with - // simple modification patterns, but simple dealing with trivial load/store - // sequences helps a lot already. - if (LoadSDNode *Load = dyn_cast<LoadSDNode>(Incoming)) { - if (auto *FI = dyn_cast<FrameIndexSDNode>(Load->getBasePtr())) { - const int Index = FI->getIndex(); - auto Itr = std::find(Builder.FuncInfo.StatepointStackSlots.begin(), - Builder.FuncInfo.StatepointStackSlots.end(), Index); - if (Itr == Builder.FuncInfo.StatepointStackSlots.end()) { - // not one of the lowering stack slots, can't reuse! - // TODO: Actually, we probably could reuse the stack slot if the value - // hasn't changed at all, but we'd need to look for intervening writes - return; - } else { - // This is one of our dedicated lowering slots - const int Offset = - std::distance(Builder.FuncInfo.StatepointStackSlots.begin(), Itr); - if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) { - // stack slot already assigned to someone else, can't use it! - // TODO: currently we reserve space for gc arguments after doing - // normal allocation for deopt arguments. We should reserve for - // _all_ deopt and gc arguments, then start allocating. This - // will prevent some moves being inserted when vm state changes, - // but gc state doesn't between two calls. - return; - } - // Reserve this stack slot - Builder.StatepointLowering.reserveStackSlot(Offset); - } - // Cache this slot so we find it when going through the normal - // assignment loop. - SDValue Loc = - Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType()); + const int LookUpDepth = 6; + Optional<int> Index = + findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth); + if (!Index.hasValue()) + return; - Builder.StatepointLowering.setLocation(Incoming, Loc); - } + auto Itr = std::find(Builder.FuncInfo.StatepointStackSlots.begin(), + Builder.FuncInfo.StatepointStackSlots.end(), *Index); + assert(Itr != Builder.FuncInfo.StatepointStackSlots.end() && + "value spilled to the unknown stack slot"); + + // This is one of our dedicated lowering slots + const int Offset = + std::distance(Builder.FuncInfo.StatepointStackSlots.begin(), Itr); + if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) { + // stack slot already assigned to someone else, can't use it! + // TODO: currently we reserve space for gc arguments after doing + // normal allocation for deopt arguments. We should reserve for + // _all_ deopt and gc arguments, then start allocating. This + // will prevent some moves being inserted when vm state changes, + // but gc state doesn't between two calls. + return; } + // Reserve this stack slot + Builder.StatepointLowering.reserveStackSlot(Offset); - // TODO: handle case where a reloaded value flows through a phi to - // another safepoint. e.g. - // bb1: - // a' = relocated... - // bb2: % pred: bb1, bb3, bb4, etc. - // a_phi = phi(a', ...) - // statepoint ... a_phi - // NOTE: This will require reasoning about cross basic block values. This is - // decidedly non trivial and this might not be the right place to do it. We - // don't really have the information we need here... - - // TODO: handle simple updates. If a value is modified and the original - // value is no longer live, it would be nice to put the modified value in the - // same slot. This allows folding of the memory accesses for some - // instructions types (like an increment). - // statepoint (i) - // i1 = i+1 - // statepoint (i1) + // Cache this slot so we find it when going through the normal + // assignment loop. + SDValue Loc = Builder.DAG.getTargetFrameIndex(*Index, Incoming.getValueType()); + Builder.StatepointLowering.setLocation(Incoming, Loc); } /// Remove any duplicate (as SDValues) from the derived pointer pairs. This @@ -319,8 +372,7 @@ static void getIncomingStatepointGCValues( SmallVectorImpl<const Value *> &Bases, SmallVectorImpl<const Value *> &Ptrs, SmallVectorImpl<const Value *> &Relocs, ImmutableStatepoint StatepointSite, SelectionDAGBuilder &Builder) { - for (GCRelocateOperands relocateOpers : - StatepointSite.getRelocates(StatepointSite)) { + for (GCRelocateOperands relocateOpers : StatepointSite.getRelocates()) { Relocs.push_back(relocateOpers.getUnderlyingCallSite().getInstruction()); Bases.push_back(relocateOpers.getBasePtr()); Ptrs.push_back(relocateOpers.getDerivedPtr()); @@ -458,15 +510,11 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // doesn't change semantics at all. It is important for performance that we // reserve slots for both deopt and gc values before lowering either. for (const Value *V : StatepointSite.vm_state_args()) { - SDValue Incoming = Builder.getValue(V); - reservePreviousStackSlotForValue(Incoming, Builder); + reservePreviousStackSlotForValue(V, Builder); } for (unsigned i = 0; i < Bases.size(); ++i) { - const Value *Base = Bases[i]; - reservePreviousStackSlotForValue(Builder.getValue(Base), Builder); - - const Value *Ptr = Ptrs[i]; - reservePreviousStackSlotForValue(Builder.getValue(Ptr), Builder); + reservePreviousStackSlotForValue(Bases[i], Builder); + reservePreviousStackSlotForValue(Ptrs[i], Builder); } // First, prefix the list with the number of unique values to be @@ -524,8 +572,7 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = Builder.FuncInfo.StatepointRelocatedValues[StatepointInstr]; - for (GCRelocateOperands RelocateOpers : - StatepointSite.getRelocates(StatepointSite)) { + for (GCRelocateOperands RelocateOpers : StatepointSite.getRelocates()) { const Value *V = RelocateOpers.getDerivedPtr(); SDValue SDV = Builder.getValue(V); SDValue Loc = Builder.StatepointLowering.getLocation(SDV); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 9daf2a50ad8f..c70c3a270403 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2671,8 +2671,9 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl, // TODO: For UDIV use SRL instead of SRA. SDValue Amt = DAG.getConstant(ShAmt, dl, getShiftAmountTy(Op1.getValueType())); - Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, false, false, - true); + SDNodeFlags Flags; + Flags.setExact(true); + Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, &Flags); d = d.ashr(ShAmt); } |