diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-06-16 21:03:24 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-06-16 21:03:24 +0000 |
commit | 7c7aba6e5fef47a01a136be655b0a92cfd7090f6 (patch) | |
tree | 99ec531924f6078534b100ab9d7696abce848099 /lib/CodeGen | |
parent | 7ab83427af0f77b59941ceba41d509d7d097b065 (diff) | |
download | src-test2-7c7aba6e5fef47a01a136be655b0a92cfd7090f6.tar.gz src-test2-7c7aba6e5fef47a01a136be655b0a92cfd7090f6.zip |
Notes
Diffstat (limited to 'lib/CodeGen')
23 files changed, 533 insertions, 247 deletions
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 407d5623d670..ad348d723bae 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1286,11 +1286,7 @@ bool AsmPrinter::doFinalization(Module &M) { const TargetLoweringObjectFile &TLOF = getObjFileLowering(); - // Emit module flags. - SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags; - M.getModuleFlagsMetadata(ModuleFlags); - if (!ModuleFlags.empty()) - TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, TM); + TLOF.emitModuleMetadata(*OutStreamer, M, TM); if (TM.getTargetTriple().isOSBinFormatELF()) { MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 04073b3aed68..dc39d1e6cb52 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -552,7 +552,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg); DwarfExpr.addFragmentOffset(Expr); SmallVector<uint64_t, 8> Ops; - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Offset); Ops.append(Expr->elements_begin(), Expr->elements_end()); DIExpressionCursor Cursor(Ops); @@ -821,7 +821,7 @@ void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, SmallVector<uint64_t, 8> Ops; if (Location.isIndirect() && Location.getOffset()) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Location.getOffset()); } DIExpressionCursor Cursor(Ops); @@ -850,7 +850,7 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, SmallVector<uint64_t, 8> Ops; if (Location.isIndirect() && Location.getOffset()) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Location.getOffset()); } Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index e3fd21a1fd70..75eb355bfb54 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1511,7 +1511,7 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, DwarfExpr.setMemoryLocationKind(); SmallVector<uint64_t, 8> Ops; if (Location.isIndirect() && Location.getOffset()) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Location.getOffset()); } Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index ebfba4cfc275..5dfe06c64ec2 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -134,6 +134,13 @@ public: assert(!FrameIndexExprs.empty() && "Expected an MMI entry"); assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry"); + if (FrameIndexExprs.size()) { + auto *Expr = FrameIndexExprs.back().Expr; + // Get rid of duplicate non-fragment entries. More than one non-fragment + // dbg.declare makes no sense so ignore all but the first. + if (!Expr || !Expr->isFragment()) + return; + } FrameIndexExprs.append(V.FrameIndexExprs.begin(), V.FrameIndexExprs.end()); assert(all_of(FrameIndexExprs, [](FrameIndexExpr &FIE) { diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index d96479f43433..fe38ee805682 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -248,15 +248,25 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, assert(Reg.Size == 0 && "subregister has same size as superregister"); // Pattern-match combinations for which more efficient representations exist. - // [Reg, Offset, DW_OP_plus] --> [DW_OP_breg, Offset]. - // [Reg, Offset, DW_OP_minus] --> [DW_OP_breg, -Offset]. - // If Reg is a subregister we need to mask it out before subtracting. - if (Op && ((Op->getOp() == dwarf::DW_OP_plus) || - (Op->getOp() == dwarf::DW_OP_minus && !SubRegisterSizeInBits))) { - int Offset = Op->getArg(0); - SignedOffset = (Op->getOp() == dwarf::DW_OP_plus) ? Offset : -Offset; + // [Reg, DW_OP_plus_uconst, Offset] --> [DW_OP_breg, Offset]. + if (Op && (Op->getOp() == dwarf::DW_OP_plus_uconst)) { + SignedOffset = Op->getArg(0); ExprCursor.take(); } + + // [Reg, DW_OP_constu, Offset, DW_OP_plus] --> [DW_OP_breg, Offset] + // [Reg, DW_OP_constu, Offset, DW_OP_minus] --> [DW_OP_breg,-Offset] + // If Reg is a subregister we need to mask it out before subtracting. + if (Op && Op->getOp() == dwarf::DW_OP_constu) { + auto N = ExprCursor.peekNext(); + if (N && (N->getOp() == dwarf::DW_OP_plus || + (N->getOp() == dwarf::DW_OP_minus && !SubRegisterSizeInBits))) { + int Offset = Op->getArg(0); + SignedOffset = (N->getOp() == dwarf::DW_OP_minus) ? -Offset : Offset; + ExprCursor.consume(2); + } + } + if (FBReg) addFBReg(SignedOffset); else @@ -320,17 +330,14 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, LocationKind = Unknown; return; } - case dwarf::DW_OP_plus: + case dwarf::DW_OP_plus_uconst: assert(LocationKind != Register); emitOp(dwarf::DW_OP_plus_uconst); emitUnsigned(Op->getArg(0)); break; + case dwarf::DW_OP_plus: case dwarf::DW_OP_minus: - assert(LocationKind != Register); - // There is no DW_OP_minus_uconst. - emitOp(dwarf::DW_OP_constu); - emitUnsigned(Op->getArg(0)); - emitOp(dwarf::DW_OP_minus); + emitOp(Op->getOp()); break; case dwarf::DW_OP_deref: { assert(LocationKind != Register); diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h index de8613200067..728f8ad9225b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -42,6 +42,9 @@ public: DIExpressionCursor(ArrayRef<uint64_t> Expr) : Start(Expr.begin()), End(Expr.end()) {} + DIExpressionCursor(const DIExpressionCursor &C) + : Start(C.Start), End(C.End) {} + /// Consume one operation. Optional<DIExpression::ExprOperand> take() { if (Start == End) diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 7f7d3e650e02..708f5f7536ff 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -475,7 +475,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, SmallVector<uint64_t, 9> Ops; if (Location.isIndirect() && Location.getOffset()) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Location.getOffset()); } // If we started with a pointer to the __Block_byref... struct, then @@ -487,7 +487,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in // adding the offset if it's 0. if (forwardingFieldOffset > 0) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(forwardingFieldOffset); } @@ -499,7 +499,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, // for the variable's field to get to the location of the actual variable: // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. if (varFieldOffset > 0) { - Ops.push_back(dwarf::DW_OP_plus); + Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(varFieldOffset); } diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index c2037cb7f1ae..37e176099ea7 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -134,7 +134,7 @@ static cl::opt<bool> DisablePreheaderProtect( cl::desc("Disable protection against removing loop preheaders")); static cl::opt<bool> ProfileGuidedSectionPrefix( - "profile-guided-section-prefix", cl::Hidden, cl::init(true), + "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Use profile info to add section prefix for hot/cold functions")); static cl::opt<unsigned> FreqRatioToSkipMerge( diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index ef5818dabe23..1d0d3dffa4c5 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -82,6 +82,12 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { case TargetOpcode::G_UDIV: assert(Size == 32 && "Unsupported size"); return RTLIB::UDIV_I32; + case TargetOpcode::G_SREM: + assert(Size == 32 && "Unsupported size"); + return RTLIB::SREM_I32; + case TargetOpcode::G_UREM: + assert(Size == 32 && "Unsupported size"); + return RTLIB::UREM_I32; case TargetOpcode::G_FADD: assert((Size == 32 || Size == 64) && "Unsupported size"); return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32; @@ -93,43 +99,57 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { llvm_unreachable("Unknown libcall function"); } -static LegalizerHelper::LegalizeResult -simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, - Type *OpType) { +LegalizerHelper::LegalizeResult llvm::replaceWithLibcall( + MachineInstr &MI, MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, + const CallLowering::ArgInfo &Result, ArrayRef<CallLowering::ArgInfo> Args) { auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); - auto Libcall = getRTLibDesc(MI.getOpcode(), Size); const char *Name = TLI.getLibcallName(Libcall); MIRBuilder.getMF().getFrameInfo().setHasCalls(true); - CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), - MachineOperand::CreateES(Name), - {MI.getOperand(0).getReg(), OpType}, - {{MI.getOperand(1).getReg(), OpType}, - {MI.getOperand(2).getReg(), OpType}}); + MIRBuilder.setInstr(MI); + if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), + MachineOperand::CreateES(Name), Result, Args)) + return LegalizerHelper::UnableToLegalize; + + // We're about to remove MI, so move the insert point after it. + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), + std::next(MIRBuilder.getInsertPt())); + MI.eraseFromParent(); return LegalizerHelper::Legalized; } +static LegalizerHelper::LegalizeResult +simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, + Type *OpType) { + auto Libcall = getRTLibDesc(MI.getOpcode(), Size); + return replaceWithLibcall(MI, MIRBuilder, Libcall, + {MI.getOperand(0).getReg(), OpType}, + {{MI.getOperand(1).getReg(), OpType}, + {MI.getOperand(2).getReg(), OpType}}); +} + LegalizerHelper::LegalizeResult LegalizerHelper::libcall(MachineInstr &MI) { - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - unsigned Size = Ty.getSizeInBits(); + LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); + unsigned Size = LLTy.getSizeInBits(); auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); - MIRBuilder.setInstr(MI); switch (MI.getOpcode()) { default: return UnableToLegalize; case TargetOpcode::G_SDIV: - case TargetOpcode::G_UDIV: { - Type *Ty = Type::getInt32Ty(Ctx); - return simpleLibcall(MI, MIRBuilder, Size, Ty); + case TargetOpcode::G_UDIV: + case TargetOpcode::G_SREM: + case TargetOpcode::G_UREM: { + Type *HLTy = Type::getInt32Ty(Ctx); + return simpleLibcall(MI, MIRBuilder, Size, HLTy); } case TargetOpcode::G_FADD: case TargetOpcode::G_FPOW: case TargetOpcode::G_FREM: { - Type *Ty = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); - return simpleLibcall(MI, MIRBuilder, Size, Ty); + Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); + return simpleLibcall(MI, MIRBuilder, Size, HLTy); } } } @@ -237,17 +257,18 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned NarrowSize = NarrowTy.getSizeInBits(); int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; - LLT NarrowPtrTy = LLT::pointer( - MRI.getType(MI.getOperand(1).getReg()).getAddressSpace(), NarrowSize); + LLT OffsetTy = LLT::scalar( + MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); SmallVector<unsigned, 2> DstRegs; for (int i = 0; i < NumParts; ++i) { unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); - unsigned SrcReg = MRI.createGenericVirtualRegister(NarrowPtrTy); - unsigned Offset = MRI.createGenericVirtualRegister(LLT::scalar(64)); + unsigned SrcReg = 0; + unsigned Adjustment = i * NarrowSize / 8; + + MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, + Adjustment); - MIRBuilder.buildConstant(Offset, i * NarrowSize / 8); - MIRBuilder.buildGEP(SrcReg, MI.getOperand(1).getReg(), Offset); // TODO: This is conservatively correct, but we probably want to split the // memory operands in the future. MIRBuilder.buildLoad(DstReg, SrcReg, **MI.memoperands_begin()); @@ -263,17 +284,19 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned NarrowSize = NarrowTy.getSizeInBits(); int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() / NarrowSize; - LLT NarrowPtrTy = LLT::pointer( - MRI.getType(MI.getOperand(1).getReg()).getAddressSpace(), NarrowSize); + LLT OffsetTy = LLT::scalar( + MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); SmallVector<unsigned, 2> SrcRegs; extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs); for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowPtrTy); - unsigned Offset = MRI.createGenericVirtualRegister(LLT::scalar(64)); - MIRBuilder.buildConstant(Offset, i * NarrowSize / 8); - MIRBuilder.buildGEP(DstReg, MI.getOperand(1).getReg(), Offset); + unsigned DstReg = 0; + unsigned Adjustment = i * NarrowSize / 8; + + MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy, + Adjustment); + // TODO: This is conservatively correct, but we probably want to split the // memory operands in the future. MIRBuilder.buildStore(SrcRegs[i], DstReg, **MI.memoperands_begin()); diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 54ef7e5c5a1b..79d312fb52ca 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -191,6 +191,24 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0, .addUse(Op1); } +Optional<MachineInstrBuilder> +MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0, + const LLT &ValueTy, uint64_t Value) { + assert(Res == 0 && "Res is a result argument"); + assert(ValueTy.isScalar() && "invalid offset type"); + + if (Value == 0) { + Res = Op0; + return None; + } + + Res = MRI->createGenericVirtualRegister(MRI->getType(Op0)); + unsigned TmpReg = MRI->createGenericVirtualRegister(ValueTy); + + buildConstant(TmpReg, Value); + return buildGEP(Res, Op0, TmpReg); +} + MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0, uint32_t NumBits) { assert(MRI->getType(Res).isPointer() && diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index fc52b0da0d61..2d4b95974cc6 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -594,8 +594,8 @@ BranchProbability MachineBlockPlacement::collectViableSuccessors( // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after // A->C is chosen as a fall-through, D won't be selected as a successor of C // due to CFG constraint (the probability of C->D is not greater than - // HotProb to break top-order). If we exclude E that is not in BlockFilter - // when calculating the probability of C->D, D will be selected and we + // HotProb to break topo-order). If we exclude E that is not in BlockFilter + // when calculating the probability of C->D, D will be selected and we // will get A C D B as the layout of this loop. auto AdjustedSumProb = BranchProbability::getOne(); for (MachineBasicBlock *Succ : BB->successors()) { @@ -1156,7 +1156,7 @@ void MachineBlockPlacement::precomputeTriangleChains() { continue; // Now we have an interesting triangle. Insert it if it's not part of an - // existing chain + // existing chain. // Note: This cannot be replaced with a call insert() or emplace() because // the find key is BB, but the insert/emplace key is PDom. auto Found = TriangleChainMap.find(&BB); @@ -1298,9 +1298,9 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( // | | | | // ---BB | | BB // | | | | - // | pred-- | Succ-- + // | Pred-- | Succ-- // | | | | - // ---succ ---pred-- + // ---Succ ---Pred-- // // cost = freq(S->Pred) + freq(BB->Succ) cost = 2 * freq (S->Pred) // = freq(S->Pred) + freq(S->BB) diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 52d5819f8dbc..c7113f1fdc47 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -895,8 +895,11 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. - if (!MRI->isConstantPhysReg(Reg)) - return false; + // However, if the physreg is known to always be caller saved/restored + // then this use is safe to hoist. + if (!MRI->isConstantPhysReg(Reg) && + !(TRI->isCallerPreservedPhysReg(Reg, *I.getParent()->getParent()))) + return false; // Otherwise it's safe to move. continue; } else if (!MO.isDead()) { diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a0967f574006..2d4422d94a17 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2217,7 +2217,8 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N) { // Iff the flag result is dead: // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry) - if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::UADDO) && + if ((N0.getOpcode() == ISD::ADD || + (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) && isNullConstant(N1) && !N->hasAnyUseOfValue(1)) return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0.getOperand(0), N0.getOperand(1), CarryIn); @@ -12460,10 +12461,27 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores); - SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), - FirstInChain->getAlignment()); + + // make sure we use trunc store if it's necessary to be legal. + SDValue NewStore; + if (TLI.isTypeLegal(StoredVal.getValueType())) { + NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + FirstInChain->getAlignment()); + } else { // Must be realized as a trunc store + EVT LegalizedStoredValueTy = + TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); + unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits(); + ConstantSDNode *C = cast<ConstantSDNode>(StoredVal); + SDValue ExtendedStoreVal = + DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL, + LegalizedStoredValueTy); + NewStore = DAG.getTruncStore( + NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, + FirstInChain->getAlignment(), + FirstInChain->getMemOperand()->getFlags()); + } // Replace all merged stores with the new store. for (unsigned i = 0; i < NumStores; ++i) @@ -12731,8 +12749,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { IsFast) { LastLegalType = i + 1; // Or check whether a truncstore is legal. - } else if (!LegalTypes && - TLI.getTypeAction(Context, StoreTy) == + } else if (TLI.getTypeAction(Context, StoreTy) == TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); @@ -12947,8 +12964,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { else if (TLI.getTypeAction(Context, StoreTy) == TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy); - if (!LegalTypes && - TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && + if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) && TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && @@ -12958,8 +12974,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, FirstStoreAS, FirstStoreAlign, &IsFastSt) && IsFastSt && - TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, - FirstLoadAS, FirstLoadAlign, &IsFastLd) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && IsFastLd) LastLegalIntegerType = i + 1; } @@ -13189,10 +13205,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { Chain = ST->getChain(); } - // Try transforming N to an indexed store. - if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) - return SDValue(N, 0); - // FIXME: is there such a thing as a truncating indexed store? if (ST->isTruncatingStore() && ST->isUnindexed() && Value.getValueType().isInteger()) { @@ -13287,6 +13299,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } } + // Try transforming N to an indexed store. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' // // Make sure to do this only after attempting to merge stores in order to @@ -14692,21 +14708,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset, VT.getStoreSize()); SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO); - - // The new load must have the same position as the old load in terms of memory - // dependency. Create a TokenFactor for Ld and NewLd and update uses of Ld's - // output chain to use that TokenFactor. - // TODO: This code is based on a similar sequence in x86 lowering. It should - // be moved to a helper function, so it can be shared and reused. - if (Ld->hasAnyUseOfValue(1)) { - SDValue OldChain = SDValue(Ld, 1); - SDValue NewChain = SDValue(NewLd.getNode(), 1); - SDValue TokenFactor = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - OldChain, NewChain); - DAG.ReplaceAllUsesOfValueWith(OldChain, TokenFactor); - DAG.UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain); - } - + DAG.makeEquivalentMemoryOrdering(Ld, NewLd); return NewLd; } diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 606b8952f3c1..b736037d71dd 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -523,3 +523,29 @@ void FunctionLoweringInfo::setCurrentSwiftErrorVReg( const MachineBasicBlock *MBB, const Value *Val, unsigned VReg) { SwiftErrorVRegDefMap[std::make_pair(MBB, Val)] = VReg; } + +std::pair<unsigned, bool> +FunctionLoweringInfo::getOrCreateSwiftErrorVRegDefAt(const Instruction *I) { + auto Key = PointerIntPair<const Instruction *, 1, bool>(I, true); + auto It = SwiftErrorVRegDefUses.find(Key); + if (It == SwiftErrorVRegDefUses.end()) { + auto &DL = MF->getDataLayout(); + const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL)); + unsigned VReg = MF->getRegInfo().createVirtualRegister(RC); + SwiftErrorVRegDefUses[Key] = VReg; + return std::make_pair(VReg, true); + } + return std::make_pair(It->second, false); +} + +std::pair<unsigned, bool> +FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) { + auto Key = PointerIntPair<const Instruction *, 1, bool>(I, false); + auto It = SwiftErrorVRegDefUses.find(Key); + if (It == SwiftErrorVRegDefUses.end()) { + unsigned VReg = getOrCreateSwiftErrorVReg(MBB, Val); + SwiftErrorVRegDefUses[Key] = VReg; + return std::make_pair(VReg, true); + } + return std::make_pair(It->second, false); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index e54eaa3b81be..15e87b7af18d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2192,19 +2192,6 @@ static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { return TLI.getLibcallName(LC) != nullptr; } -/// Return true if sincos libcall is available and can be used to combine sin -/// and cos. -static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, - const TargetMachine &TM) { - if (!isSinCosLibcallAvailable(Node, TLI)) - return false; - // GNU sin/cos functions set errno while sincos does not. Therefore - // combining sin and cos is only safe if unsafe-fpmath is enabled. - if (TM.getTargetTriple().isGNUEnvironment() && !TM.Options.UnsafeFPMath) - return false; - return true; -} - /// Only issue sincos libcall if both sin and cos are needed. static bool useSinCos(SDNode *Node) { unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN @@ -3247,7 +3234,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin / // fcos which share the same operand and both are used. if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) || - canCombineSinCosLibcall(Node, TLI, TM)) + isSinCosLibcallAvailable(Node, TLI)) && useSinCos(Node)) { SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 0d5e07ded25c..a3ba52a148ee 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1828,10 +1828,11 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? ISD::UADDO : ISD::USUBO, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); + if (hasOVF) { EVT OvfVT = getSetCCResultType(NVT); SDVTList VTList = DAG.getVTList(NVT, OvfVT); - TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); int RevOpc; if (N->getOpcode() == ISD::ADD) { RevOpc = ISD::SUB; @@ -1864,6 +1865,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], ISD::SETULT); + + if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) { + SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry); + return; + } + SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, DAG.getConstant(1, dl, NVT), DAG.getConstant(0, dl, NVT)); @@ -1878,9 +1886,14 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, SDValue Cmp = DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), LoOps[0], LoOps[1], ISD::SETULT); - SDValue Borrow = DAG.getSelect(dl, NVT, Cmp, - DAG.getConstant(1, dl, NVT), - DAG.getConstant(0, dl, NVT)); + + SDValue Borrow; + if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) + Borrow = DAG.getZExtOrTrunc(Cmp, dl, NVT); + else + Borrow = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT), + DAG.getConstant(0, dl, NVT)); + Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index dff8bd2ad37d..7abdc76cb004 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7244,6 +7244,24 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { AddDbgValue(I, ToNode, false); } +void SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, + SDValue NewMemOp) { + assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node"); + if (!OldLoad->hasAnyUseOfValue(1)) + return; + + // The new memory operation must have the same position as the old load in + // terms of memory dependency. Create a TokenFactor for the old load and new + // memory operation and update uses of the old load's output chain to use that + // TokenFactor. + SDValue OldChain = SDValue(OldLoad, 1); + SDValue NewChain = SDValue(NewMemOp.getNode(), 1); + SDValue TokenFactor = + getNode(ISD::TokenFactor, SDLoc(OldLoad), MVT::Other, OldChain, NewChain); + ReplaceAllUsesOfValueWith(OldChain, TokenFactor); + UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain); +} + //===----------------------------------------------------------------------===// // SDNode Class //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index d34ac40b9496..f9f431db55be 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1496,9 +1496,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { true /*isfixed*/, 1 /*origidx*/, 0 /*partOffs*/)); // Create SDNode for the swifterror virtual register. - OutVals.push_back(DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg( - FuncInfo.MBB, FuncInfo.SwiftErrorArg), - EVT(TLI.getPointerTy(DL)))); + OutVals.push_back( + DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt( + &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first, + EVT(TLI.getPointerTy(DL)))); } bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); @@ -3581,8 +3582,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - assert(TLI.supportSwiftError() && + assert(DAG.getTargetLoweringInfo().supportSwiftError() && "call visitStoreToSwiftError when backend supports swifterror"); SmallVector<EVT, 4> ValueVTs; @@ -3595,15 +3595,15 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { SDValue Src = getValue(SrcV); // Create a virtual register, then update the virtual register. - auto &DL = DAG.getDataLayout(); - const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); - unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); + unsigned VReg; bool CreatedVReg; + std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I); // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue // Chain can be getRoot or getControlRoot. SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg, SDValue(Src.getNode(), Src.getResNo())); DAG.setRoot(CopyNode); - FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg); + if (CreatedVReg) + FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg); } void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { @@ -3633,7 +3633,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT SDValue L = DAG.getCopyFromReg( getRoot(), getCurSDLoc(), - FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, SV), ValueVTs[0]); + FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first, + ValueVTs[0]); setValue(&I, L); } @@ -4942,11 +4943,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { updateDAGForMaybeTailCall(MM); return nullptr; } - case Intrinsic::memcpy_element_atomic: { - SDValue Dst = getValue(I.getArgOperand(0)); - SDValue Src = getValue(I.getArgOperand(1)); - SDValue NumElements = getValue(I.getArgOperand(2)); - SDValue ElementSize = getValue(I.getArgOperand(3)); + case Intrinsic::memcpy_element_unordered_atomic: { + const ElementUnorderedAtomicMemCpyInst &MI = + cast<ElementUnorderedAtomicMemCpyInst>(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Src = getValue(MI.getRawSource()); + SDValue Length = getValue(MI.getLength()); // Emit a library call. TargetLowering::ArgListTy Args; @@ -4958,18 +4960,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Entry.Node = Src; Args.push_back(Entry); - Entry.Ty = I.getArgOperand(2)->getType(); - Entry.Node = NumElements; - Args.push_back(Entry); - - Entry.Ty = Type::getInt32Ty(*DAG.getContext()); - Entry.Node = ElementSize; + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; Args.push_back(Entry); - uint64_t ElementSizeConstant = - cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); RTLIB::Libcall LibraryCall = - RTLIB::getMEMCPY_ELEMENT_ATOMIC(ElementSizeConstant); + RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) report_fatal_error("Unsupported element size"); @@ -6030,9 +6027,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SwiftErrorVal = V; // We find the virtual register for the actual swifterror argument. // Instead of using the Value, we use the virtual register instead. - Entry.Node = - DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, V), - EVT(TLI.getPointerTy(DL))); + Entry.Node = DAG.getRegister(FuncInfo + .getOrCreateSwiftErrorVRegUseAt( + CS.getInstruction(), FuncInfo.MBB, V) + .first, + EVT(TLI.getPointerTy(DL))); } Args.push_back(Entry); @@ -6073,11 +6072,13 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, if (SwiftErrorVal && TLI.supportSwiftError()) { // Get the last element of InVals. SDValue Src = CLI.InVals.back(); - const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy(DL)); - unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); + unsigned VReg; bool CreatedVReg; + std::tie(VReg, CreatedVReg) = + FuncInfo.getOrCreateSwiftErrorVRegDefAt(CS.getInstruction()); SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); // We update the virtual register for the actual swifterror argument. - FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg); + if (CreatedVReg) + FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg); DAG.setRoot(CopyNode); } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index b67f11f85b70..dcccd17bb98e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1055,6 +1055,7 @@ static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI, FuncInfo->SwiftErrorVals.clear(); FuncInfo->SwiftErrorVRegDefMap.clear(); FuncInfo->SwiftErrorVRegUpwardsUse.clear(); + FuncInfo->SwiftErrorVRegDefUses.clear(); FuncInfo->SwiftErrorArg = nullptr; // Check if function has a swifterror argument. @@ -1278,6 +1279,80 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) { } } +void preassignSwiftErrorRegs(const TargetLowering *TLI, + FunctionLoweringInfo *FuncInfo, + BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End) { + if (!TLI->supportSwiftError() || FuncInfo->SwiftErrorVals.empty()) + return; + + // Iterator over instructions and assign vregs to swifterror defs and uses. + for (auto It = Begin; It != End; ++It) { + ImmutableCallSite CS(&*It); + if (CS) { + // A call-site with a swifterror argument is both use and def. + const Value *SwiftErrorAddr = nullptr; + for (auto &Arg : CS.args()) { + if (!Arg->isSwiftError()) + continue; + // Use of swifterror. + assert(!SwiftErrorAddr && "Cannot have multiple swifterror arguments"); + SwiftErrorAddr = &*Arg; + assert(SwiftErrorAddr->isSwiftError() && + "Must have a swifterror value argument"); + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt( + &*It, FuncInfo->MBB, SwiftErrorAddr); + assert(CreatedReg); + } + if (!SwiftErrorAddr) + continue; + + // Def of swifterror. + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = + FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It); + assert(CreatedReg); + FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg); + + // A load is a use. + } else if (const LoadInst *LI = dyn_cast<const LoadInst>(&*It)) { + const Value *V = LI->getOperand(0); + if (!V->isSwiftError()) + continue; + + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = + FuncInfo->getOrCreateSwiftErrorVRegUseAt(LI, FuncInfo->MBB, V); + assert(CreatedReg); + + // A store is a def. + } else if (const StoreInst *SI = dyn_cast<const StoreInst>(&*It)) { + const Value *SwiftErrorAddr = SI->getOperand(1); + if (!SwiftErrorAddr->isSwiftError()) + continue; + + // Def of swifterror. + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = + FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It); + assert(CreatedReg); + FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg); + + // A return in a swiferror returning function is a use. + } else if (const ReturnInst *R = dyn_cast<const ReturnInst>(&*It)) { + const Function *F = R->getParent()->getParent(); + if(!F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + continue; + + unsigned VReg; bool CreatedReg; + std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt( + R, FuncInfo->MBB, FuncInfo->SwiftErrorArg); + assert(CreatedReg); + } + } +} + void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastISelFailed = false; // Initialize the Fast-ISel state, if needed. @@ -1384,6 +1459,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->startNewBlock(); unsigned NumFastIselRemaining = std::distance(Begin, End); + + // Pre-assign swifterror vregs. + preassignSwiftErrorRegs(TLI, FuncInfo, Begin, End); + // Do FastISel on as many instructions as possible. for (; BI != Begin; --BI) { const Instruction *Inst = &*std::prev(BI); diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 3a50aaa69985..008b984dd961 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -569,8 +569,7 @@ SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg, // Greedy heuristic: Keep iterating keeping the best covering subreg index // each time. - LaneBitmask LanesLeft = - LaneMask & ~(TRI.getSubRegIndexLaneMask(BestCover)); + LaneBitmask LanesLeft = LaneMask & ~(TRI.getSubRegIndexLaneMask(BestIdx)); while (LanesLeft.any()) { unsigned BestIdx = 0; int BestCover = INT_MIN; diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index acb3676fdd71..6bac39c7ee77 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -86,10 +86,134 @@ STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); STATISTIC(StackSlotMerged, "Number of stack slot merged."); STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); +//===----------------------------------------------------------------------===// +// StackColoring Pass +//===----------------------------------------------------------------------===// +// +// Stack Coloring reduces stack usage by merging stack slots when they +// can't be used together. For example, consider the following C program: +// +// void bar(char *, int); +// void foo(bool var) { +// A: { +// char z[4096]; +// bar(z, 0); +// } +// +// char *p; +// char x[4096]; +// char y[4096]; +// if (var) { +// p = x; +// } else { +// bar(y, 1); +// p = y + 1024; +// } +// B: +// bar(p, 2); +// } +// +// Naively-compiled, this program would use 12k of stack space. However, the +// stack slot corresponding to `z` is always destroyed before either of the +// stack slots for `x` or `y` are used, and then `x` is only used if `var` +// is true, while `y` is only used if `var` is false. So in no time are 2 +// of the stack slots used together, and therefore we can merge them, +// compiling the function using only a single 4k alloca: +// +// void foo(bool var) { // equivalent +// char x[4096]; +// char *p; +// bar(x, 0); +// if (var) { +// p = x; +// } else { +// bar(x, 1); +// p = x + 1024; +// } +// bar(p, 2); +// } +// +// This is an important optimization if we want stack space to be under +// control in large functions, both open-coded ones and ones created by +// inlining. // // Implementation Notes: // --------------------- // +// An important part of the above reasoning is that `z` can't be accessed +// while the latter 2 calls to `bar` are running. This is justified because +// `z`'s lifetime is over after we exit from block `A:`, so any further +// accesses to it would be UB. The way we represent this information +// in LLVM is by having frontends delimit blocks with `lifetime.start` +// and `lifetime.end` intrinsics. +// +// The effect of these intrinsics seems to be as follows (maybe I should +// specify this in the reference?): +// +// L1) at start, each stack-slot is marked as *out-of-scope*, unless no +// lifetime intrinsic refers to that stack slot, in which case +// it is marked as *in-scope*. +// L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and +// the stack slot is overwritten with `undef`. +// L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*. +// L4) on function exit, all stack slots are marked as *out-of-scope*. +// L5) `lifetime.end` is a no-op when called on a slot that is already +// *out-of-scope*. +// L6) memory accesses to *out-of-scope* stack slots are UB. +// L7) when a stack-slot is marked as *out-of-scope*, all pointers to it +// are invalidated, unless the slot is "degenerate". This is used to +// justify not marking slots as in-use until the pointer to them is +// used, but feels a bit hacky in the presence of things like LICM. See +// the "Degenerate Slots" section for more details. +// +// Now, let's ground stack coloring on these rules. We'll define a slot +// as *in-use* at a (dynamic) point in execution if it either can be +// written to at that point, or if it has a live and non-undef content +// at that point. +// +// Obviously, slots that are never *in-use* together can be merged, and +// in our example `foo`, the slots for `x`, `y` and `z` are never +// in-use together (of course, sometimes slots that *are* in-use together +// might still be mergable, but we don't care about that here). +// +// In this implementation, we successively merge pairs of slots that are +// not *in-use* together. We could be smarter - for example, we could merge +// a single large slot with 2 small slots, or we could construct the +// interference graph and run a "smart" graph coloring algorithm, but with +// that aside, how do we find out whether a pair of slots might be *in-use* +// together? +// +// From our rules, we see that *out-of-scope* slots are never *in-use*, +// and from (L7) we see that "non-degenerate" slots remain non-*in-use* +// until their address is taken. Therefore, we can approximate slot activity +// using dataflow. +// +// A subtle point: naively, we might try to figure out which pairs of +// stack-slots interfere by propagating `S in-use` through the CFG for every +// stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in +// which they are both *in-use*. +// +// That is sound, but overly conservative in some cases: in our (artificial) +// example `foo`, either `x` or `y` might be in use at the label `B:`, but +// as `x` is only in use if we came in from the `var` edge and `y` only +// if we came from the `!var` edge, they still can't be in use together. +// See PR32488 for an important real-life case. +// +// If we wanted to find all points of interference precisely, we could +// propagate `S in-use` and `S&T in-use` predicates through the CFG. That +// would be precise, but requires propagating `O(n^2)` dataflow facts. +// +// However, we aren't interested in the *set* of points of interference +// between 2 stack slots, only *whether* there *is* such a point. So we +// can rely on a little trick: for `S` and `T` to be in-use together, +// one of them needs to become in-use while the other is in-use (or +// they might both become in use simultaneously). We can check this +// by also keeping track of the points at which a stack slot might *start* +// being in-use. +// +// Exact first use: +// ---------------- +// // Consider the following motivating example: // // int foo() { @@ -158,6 +282,9 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); // lifetime, we can additionally overlap b1 and b5, giving us a 3*1024 // byte stack (better). // +// Degenerate Slots: +// ----------------- +// // Relying entirely on first-use of stack slots is problematic, // however, due to the fact that optimizations can sometimes migrate // uses of a variable outside of its lifetime start/end region. Here @@ -237,10 +364,6 @@ STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); // for "b" then it will appear that 'b' has a degenerate lifetime. // -//===----------------------------------------------------------------------===// -// StackColoring Pass -//===----------------------------------------------------------------------===// - namespace { /// StackColoring - A machine pass for merging disjoint stack allocations, /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions. @@ -271,8 +394,11 @@ class StackColoring : public MachineFunctionPass { /// Maps basic blocks to a serial number. SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering; - /// Maps liveness intervals for each slot. + /// Maps slots to their use interval. Outside of this interval, slots + /// values are either dead or `undef` and they will not be written to. SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals; + /// Maps slots to the points where they can become in-use. + SmallVector<SmallVector<SlotIndex, 4>, 16> LiveStarts; /// VNInfo is used for the construction of LiveIntervals. VNInfo::Allocator VNInfoAllocator; /// SlotIndex analysis object. @@ -672,15 +798,22 @@ void StackColoring::calculateLocalLiveness() void StackColoring::calculateLiveIntervals(unsigned NumSlots) { SmallVector<SlotIndex, 16> Starts; - SmallVector<SlotIndex, 16> Finishes; + SmallVector<bool, 16> DefinitelyInUse; // For each block, find which slots are active within this block // and update the live intervals. for (const MachineBasicBlock &MBB : *MF) { Starts.clear(); Starts.resize(NumSlots); - Finishes.clear(); - Finishes.resize(NumSlots); + DefinitelyInUse.clear(); + DefinitelyInUse.resize(NumSlots); + + // Start the interval of the slots that we previously found to be 'in-use'. + BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB]; + for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1; + pos = MBBLiveness.LiveIn.find_next(pos)) { + Starts[pos] = Indexes->getMBBStartIdx(&MBB); + } // Create the interval for the basic blocks containing lifetime begin/end. for (const MachineInstr &MI : MBB) { @@ -692,66 +825,35 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { SlotIndex ThisIndex = Indexes->getInstructionIndex(MI); for (auto Slot : slots) { if (IsStart) { - if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex) + // If a slot is already definitely in use, we don't have to emit + // a new start marker because there is already a pre-existing + // one. + if (!DefinitelyInUse[Slot]) { + LiveStarts[Slot].push_back(ThisIndex); + DefinitelyInUse[Slot] = true; + } + if (!Starts[Slot].isValid()) Starts[Slot] = ThisIndex; } else { - if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex) - Finishes[Slot] = ThisIndex; + if (Starts[Slot].isValid()) { + VNInfo *VNI = Intervals[Slot]->getValNumInfo(0); + Intervals[Slot]->addSegment( + LiveInterval::Segment(Starts[Slot], ThisIndex, VNI)); + Starts[Slot] = SlotIndex(); // Invalidate the start index + DefinitelyInUse[Slot] = false; + } } } } - // Create the interval of the blocks that we previously found to be 'alive'. - BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB]; - for (unsigned pos : MBBLiveness.LiveIn.set_bits()) { - Starts[pos] = Indexes->getMBBStartIdx(&MBB); - } - for (unsigned pos : MBBLiveness.LiveOut.set_bits()) { - Finishes[pos] = Indexes->getMBBEndIdx(&MBB); - } - + // Finish up started segments for (unsigned i = 0; i < NumSlots; ++i) { - // - // When LifetimeStartOnFirstUse is turned on, data flow analysis - // is forward (from starts to ends), not bidirectional. A - // consequence of this is that we can wind up in situations - // where Starts[i] is invalid but Finishes[i] is valid and vice - // versa. Example: - // - // LIFETIME_START x - // if (...) { - // <use of x> - // throw ...; - // } - // LIFETIME_END x - // return 2; - // - // - // Here the slot for "x" will not be live into the block - // containing the "return 2" (since lifetimes start with first - // use, not at the dominating LIFETIME_START marker). - // - if (Starts[i].isValid() && !Finishes[i].isValid()) { - Finishes[i] = Indexes->getMBBEndIdx(&MBB); - } if (!Starts[i].isValid()) continue; - assert(Starts[i] && Finishes[i] && "Invalid interval"); - VNInfo *ValNum = Intervals[i]->getValNumInfo(0); - SlotIndex S = Starts[i]; - SlotIndex F = Finishes[i]; - if (S < F) { - // We have a single consecutive region. - Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum)); - } else { - // We have two non-consecutive regions. This happens when - // LIFETIME_START appears after the LIFETIME_END marker. - SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB); - SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB); - Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum)); - Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum)); - } + SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB); + VNInfo *VNI = Intervals[i]->getValNumInfo(0); + Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI)); } } } @@ -981,6 +1083,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { BasicBlockNumbering.clear(); Markers.clear(); Intervals.clear(); + LiveStarts.clear(); VNInfoAllocator.Reset(); unsigned NumSlots = MFI->getObjectIndexEnd(); @@ -992,6 +1095,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { SmallVector<int, 8> SortedSlots; SortedSlots.reserve(NumSlots); Intervals.reserve(NumSlots); + LiveStarts.resize(NumSlots); unsigned NumMarkers = collectMarkers(NumSlots); @@ -1063,6 +1167,9 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); }); + for (auto &s : LiveStarts) + std::sort(s.begin(), s.end()); + bool Changed = true; while (Changed) { Changed = false; @@ -1078,12 +1185,22 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { int SecondSlot = SortedSlots[J]; LiveInterval *First = &*Intervals[FirstSlot]; LiveInterval *Second = &*Intervals[SecondSlot]; + auto &FirstS = LiveStarts[FirstSlot]; + auto &SecondS = LiveStarts[SecondSlot]; assert (!First->empty() && !Second->empty() && "Found an empty range"); - // Merge disjoint slots. - if (!First->overlaps(*Second)) { + // Merge disjoint slots. This is a little bit tricky - see the + // Implementation Notes section for an explanation. + if (!First->isLiveAtIndexes(SecondS) && + !Second->isLiveAtIndexes(FirstS)) { Changed = true; First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0)); + + int OldSize = FirstS.size(); + FirstS.append(SecondS.begin(), SecondS.end()); + auto Mid = FirstS.begin() + OldSize; + std::inplace_merge(FirstS.begin(), Mid, FirstS.end()); + SlotRemap[SecondSlot] = FirstSlot; SortedSlots[J] = -1; DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<< diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 581cfaf60755..e9d38c10c860 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -374,11 +374,16 @@ static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::MEMCPY] = "memcpy"; Names[RTLIB::MEMMOVE] = "memmove"; Names[RTLIB::MEMSET] = "memset"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_1] = "__llvm_memcpy_element_atomic_1"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_2] = "__llvm_memcpy_element_atomic_2"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_4] = "__llvm_memcpy_element_atomic_4"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_8] = "__llvm_memcpy_element_atomic_8"; - Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_16] = "__llvm_memcpy_element_atomic_16"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_1] = + "__llvm_memcpy_element_unordered_atomic_1"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_2] = + "__llvm_memcpy_element_unordered_atomic_2"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_4] = + "__llvm_memcpy_element_unordered_atomic_4"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_8] = + "__llvm_memcpy_element_unordered_atomic_8"; + Names[RTLIB::MEMCPY_ELEMENT_UNORDERED_ATOMIC_16] = + "__llvm_memcpy_element_unordered_atomic_16"; Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; @@ -781,22 +786,21 @@ RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) { return UNKNOWN_LIBCALL; } -RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_ATOMIC(uint64_t ElementSize) { +RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { switch (ElementSize) { case 1: - return MEMCPY_ELEMENT_ATOMIC_1; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_1; case 2: - return MEMCPY_ELEMENT_ATOMIC_2; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_2; case 4: - return MEMCPY_ELEMENT_ATOMIC_4; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_4; case 8: - return MEMCPY_ELEMENT_ATOMIC_8; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_8; case 16: - return MEMCPY_ELEMENT_ATOMIC_16; + return MEMCPY_ELEMENT_UNORDERED_ATOMIC_16; default: return UNKNOWN_LIBCALL; } - } /// InitCmpLibcallCCs - Set default comparison libcall CC. diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index a0c68e1dcce8..6922e33c8d6c 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -61,9 +61,11 @@ using namespace llvm; using namespace dwarf; -static void GetObjCImageInfo(ArrayRef<Module::ModuleFlagEntry> ModuleFlags, - unsigned &Version, unsigned &Flags, +static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags, StringRef &Section) { + SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags; + M.getModuleFlagsMetadata(ModuleFlags); + for (const auto &MFE: ModuleFlags) { // Ignore flags with 'Require' behaviour. if (MFE.Behavior == Module::Require) @@ -88,14 +90,13 @@ static void GetObjCImageInfo(ArrayRef<Module::ModuleFlagEntry> ModuleFlags, // ELF //===----------------------------------------------------------------------===// -void TargetLoweringObjectFileELF::emitModuleFlags( - MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags, - const TargetMachine &TM) const { +void TargetLoweringObjectFileELF::emitModuleMetadata( + MCStreamer &Streamer, Module &M, const TargetMachine &TM) const { unsigned Version = 0; unsigned Flags = 0; StringRef Section; - GetObjCImageInfo(ModuleFlags, Version, Flags, Section); + GetObjCImageInfo(M, Version, Flags, Section); if (Section.empty()) return; @@ -618,20 +619,10 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, } } -/// emitModuleFlags - Perform code emission for module flags. -void TargetLoweringObjectFileMachO::emitModuleFlags( - MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags, - const TargetMachine &TM) const { - MDNode *LinkerOptions = nullptr; - - for (const auto &MFE : ModuleFlags) { - StringRef Key = MFE.Key->getString(); - if (Key == "Linker Options") - LinkerOptions = cast<MDNode>(MFE.Val); - } - +void TargetLoweringObjectFileMachO::emitModuleMetadata( + MCStreamer &Streamer, Module &M, const TargetMachine &TM) const { // Emit the linker options if present. - if (LinkerOptions) { + if (auto *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) { for (const auto &Option : LinkerOptions->operands()) { SmallVector<std::string, 4> StrOptions; for (const auto &Piece : cast<MDNode>(Option)->operands()) @@ -643,7 +634,8 @@ void TargetLoweringObjectFileMachO::emitModuleFlags( unsigned VersionVal = 0; unsigned ImageInfoFlags = 0; StringRef SectionVal; - GetObjCImageInfo(ModuleFlags, VersionVal, ImageInfoFlags, SectionVal); + + GetObjCImageInfo(M, VersionVal, ImageInfoFlags, SectionVal); // The section is mandatory. If we don't have it, then we don't have GC info. if (SectionVal.empty()) @@ -1159,18 +1151,9 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable( COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID); } -void TargetLoweringObjectFileCOFF::emitModuleFlags( - MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags, - const TargetMachine &TM) const { - MDNode *LinkerOptions = nullptr; - - for (const auto &MFE : ModuleFlags) { - StringRef Key = MFE.Key->getString(); - if (Key == "Linker Options") - LinkerOptions = cast<MDNode>(MFE.Val); - } - - if (LinkerOptions) { +void TargetLoweringObjectFileCOFF::emitModuleMetadata( + MCStreamer &Streamer, Module &M, const TargetMachine &TM) const { + if (NamedMDNode *LinkerOptions = M.getNamedMetadata("llvm.linker.options")) { // Emit the linker options to the linker .drectve section. According to the // spec, this section is a space-separated string containing flags for // linker. @@ -1190,7 +1173,7 @@ void TargetLoweringObjectFileCOFF::emitModuleFlags( unsigned Flags = 0; StringRef Section; - GetObjCImageInfo(ModuleFlags, Version, Flags, Section); + GetObjCImageInfo(M, Version, Flags, Section); if (Section.empty()) return; |