diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp')
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 996 |
1 files changed, 590 insertions, 406 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 996c95bd5f07..8708f58f1e63 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -84,10 +84,6 @@ LimitFPPrecision("limit-float-precision", cl::location(LimitFloatPrecision), cl::init(0)); -static cl::opt<bool> -EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden, - cl::desc("Enable fast-math-flags for DAG nodes")); - /// Minimum jump table density for normal functions. static cl::opt<unsigned> JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden, @@ -634,10 +630,6 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, } } -/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from -/// this value and returns the result as a ValueVT value. This uses -/// Chain/Flag as the input and updates them for the output Chain/Flag. -/// If the Flag pointer is NULL, no flag is used. SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, const SDLoc &dl, SDValue &Chain, @@ -739,10 +731,6 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values); } -/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the -/// specified value into the registers specified by this object. This uses -/// Chain/Flag as the input and updates them for the output Chain/Flag. -/// If the Flag pointer is NULL, no flag is used. void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, const SDLoc &dl, SDValue &Chain, SDValue *Flag, const Value *V, @@ -796,9 +784,6 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); } -/// AddInlineAsmOperands - Add this value to the specified inlineasm node -/// operand list. This adds the code marker and includes the number of -/// values added into it. void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, unsigned MatchingIdx, const SDLoc &dl, SelectionDAG &DAG, @@ -850,12 +835,6 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, LPadToCallSiteMap.clear(); } -/// clear - Clear out the current SelectionDAG and the associated -/// state and prepare this SelectionDAGBuilder object to be used -/// for a new block. This doesn't clear out information about -/// additional blocks that are needed to complete switch lowering -/// or PHI node updating; that information is cleared out as it is -/// consumed. void SelectionDAGBuilder::clear() { NodeMap.clear(); UnusedArgNodeMap.clear(); @@ -867,21 +846,10 @@ void SelectionDAGBuilder::clear() { StatepointLowering.clear(); } -/// clearDanglingDebugInfo - Clear the dangling debug information -/// map. This function is separated from the clear so that debug -/// information that is dangling in a basic block can be properly -/// resolved in a different basic block. This allows the -/// SelectionDAG to resolve dangling debug information attached -/// to PHI nodes. void SelectionDAGBuilder::clearDanglingDebugInfo() { DanglingDebugInfoMap.clear(); } -/// getRoot - Return the current virtual root of the Selection DAG, -/// flushing any PendingLoad items. This must be done before emitting -/// a store or any other node that may need to be ordered after any -/// prior load instructions. -/// SDValue SelectionDAGBuilder::getRoot() { if (PendingLoads.empty()) return DAG.getRoot(); @@ -901,10 +869,6 @@ SDValue SelectionDAGBuilder::getRoot() { return Root; } -/// getControlRoot - Similar to getRoot, but instead of flushing all the -/// PendingLoad items, flush all the PendingExports items. It is necessary -/// to do this before emitting a terminator instruction. -/// SDValue SelectionDAGBuilder::getControlRoot() { SDValue Root = DAG.getRoot(); @@ -937,7 +901,9 @@ void SelectionDAGBuilder::visit(const Instruction &I) { HandlePHINodesInSuccessorBlocks(I.getParent()); } - ++SDNodeOrder; + // Increase the SDNodeOrder if dealing with a non-debug instruction. + if (!isa<DbgInfoIntrinsic>(I)) + ++SDNodeOrder; CurInst = &I; @@ -1403,16 +1369,16 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { const Function *F = I.getParent()->getParent(); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) ExtendKind = ISD::SIGN_EXTEND; - else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt)) ExtendKind = ISD::ZERO_EXTEND; LLVMContext &Context = F->getContext(); - bool RetInReg = F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, - Attribute::InReg); + bool RetInReg = F->getAttributes().hasAttribute( + AttributeList::ReturnIndex, Attribute::InReg); for (unsigned j = 0; j != NumValues; ++j) { EVT VT = ValueVTs[j]; @@ -1582,7 +1548,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, BranchProbability TProb, - BranchProbability FProb) { + BranchProbability FProb, + bool InvertCond) { const BasicBlock *BB = CurBB->getBasicBlock(); // If the leaf of the tree is a comparison, merge the condition into @@ -1596,10 +1563,14 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { ISD::CondCode Condition; if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { - Condition = getICmpCondCode(IC->getPredicate()); + ICmpInst::Predicate Pred = + InvertCond ? IC->getInversePredicate() : IC->getPredicate(); + Condition = getICmpCondCode(Pred); } else { const FCmpInst *FC = cast<FCmpInst>(Cond); - Condition = getFCmpCondCode(FC->getPredicate()); + FCmpInst::Predicate Pred = + InvertCond ? FC->getInversePredicate() : FC->getPredicate(); + Condition = getFCmpCondCode(Pred); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); } @@ -1612,7 +1583,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, } // Create a CaseBlock record representing this branch. - CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), + ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ; + CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()), nullptr, TBB, FBB, CurBB, TProb, FProb); SwitchCases.push_back(CB); } @@ -1625,16 +1597,44 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, MachineBasicBlock *SwitchBB, Instruction::BinaryOps Opc, BranchProbability TProb, - BranchProbability FProb) { - // If this node is not part of the or/and tree, emit it as a branch. + BranchProbability FProb, + bool InvertCond) { + // Skip over not part of the tree and remember to invert op and operands at + // next level. + if (BinaryOperator::isNot(Cond) && Cond->hasOneUse()) { + const Value *CondOp = BinaryOperator::getNotArgument(Cond); + if (InBlock(CondOp, CurBB->getBasicBlock())) { + FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb, + !InvertCond); + return; + } + } + const Instruction *BOp = dyn_cast<Instruction>(Cond); + // Compute the effective opcode for Cond, taking into account whether it needs + // to be inverted, e.g. + // and (not (or A, B)), C + // gets lowered as + // and (and (not A, not B), C) + unsigned BOpc = 0; + if (BOp) { + BOpc = BOp->getOpcode(); + if (InvertCond) { + if (BOpc == Instruction::And) + BOpc = Instruction::Or; + else if (BOpc == Instruction::Or) + BOpc = Instruction::And; + } + } + + // If this node is not part of the or/and tree, emit it as a branch. if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || - (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || + BOpc != Opc || !BOp->hasOneUse() || BOp->getParent() != CurBB->getBasicBlock() || !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, - TProb, FProb); + TProb, FProb, InvertCond); return; } @@ -1669,14 +1669,14 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, auto NewFalseProb = TProb / 2 + FProb; // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, - NewTrueProb, NewFalseProb); + NewTrueProb, NewFalseProb, InvertCond); // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb}; BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, - Probs[0], Probs[1]); + Probs[0], Probs[1], InvertCond); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: @@ -1702,14 +1702,14 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, auto NewFalseProb = FProb / 2; // Emit the LHS condition. FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, - NewTrueProb, NewFalseProb); + NewTrueProb, NewFalseProb, InvertCond); // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2}; BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, - Probs[0], Probs[1]); + Probs[0], Probs[1], InvertCond); } } @@ -1793,7 +1793,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode, getEdgeProbability(BrMBB, Succ0MBB), - getEdgeProbability(BrMBB, Succ1MBB)); + getEdgeProbability(BrMBB, Succ1MBB), + /*InvertCond=*/false); // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. @@ -2027,7 +2028,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, Entry.Node = StackSlot; Entry.Ty = FnTy->getParamType(0); if (Fn->hasAttribute(1, Attribute::AttrKind::InReg)) - Entry.isInReg = true; + Entry.IsInReg = true; Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); @@ -2581,13 +2582,13 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { Flags.setNoSignedWrap(nsw); Flags.setNoUnsignedWrap(nuw); Flags.setVectorReduction(vec_redux); - if (EnableFMFInDAG) { - Flags.setAllowReciprocal(FMF.allowReciprocal()); - Flags.setNoInfs(FMF.noInfs()); - Flags.setNoNaNs(FMF.noNaNs()); - Flags.setNoSignedZeros(FMF.noSignedZeros()); - Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); - } + Flags.setAllowReciprocal(FMF.allowReciprocal()); + Flags.setAllowContract(FMF.allowContract()); + Flags.setNoInfs(FMF.noInfs()); + Flags.setNoNaNs(FMF.noNaNs()); + Flags.setNoSignedZeros(FMF.noSignedZeros()); + Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); + SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, &Flags); setValue(&I, BinNodeValue); @@ -2914,7 +2915,7 @@ void SelectionDAGBuilder::visitBitCast(const User &I) { DestVT, N)); // convert types. // Check if the original LLVM IR Operand was a ConstantInt, because getValue() // might fold any kind of constant expression to an integer constant and that - // is not what we are looking for. Only regcognize a bitcast of a genuine + // is not what we are looking for. Only recognize a bitcast of a genuine // constant integer as an opaque constant. else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0))) setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false, @@ -3067,14 +3068,10 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { if (SrcNumElts > MaskNumElts) { // Analyze the access pattern of the vector to see if we can extract - // two subvectors and do the shuffle. The analysis is done by calculating - // the range of elements the mask access on both vectors. - int MinRange[2] = { static_cast<int>(SrcNumElts), - static_cast<int>(SrcNumElts)}; - int MaxRange[2] = {-1, -1}; - - for (unsigned i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; + // two subvectors and do the shuffle. + int StartIdx[2] = { -1, -1 }; // StartIdx to extract from + bool CanExtract = true; + for (int Idx : Mask) { unsigned Input = 0; if (Idx < 0) continue; @@ -3083,41 +3080,28 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { Input = 1; Idx -= SrcNumElts; } - if (Idx > MaxRange[Input]) - MaxRange[Input] = Idx; - if (Idx < MinRange[Input]) - MinRange[Input] = Idx; - } - - // Check if the access is smaller than the vector size and can we find - // a reasonable extract index. - int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not - // Extract. - int StartIdx[2]; // StartIdx to extract from - for (unsigned Input = 0; Input < 2; ++Input) { - if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) { - RangeUse[Input] = 0; // Unused - StartIdx[Input] = 0; - continue; - } - // Find a good start index that is a multiple of the mask length. Then - // see if the rest of the elements are in range. - StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; - if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && - StartIdx[Input] + MaskNumElts <= SrcNumElts) - RangeUse[Input] = 1; // Extract from a multiple of the mask length. + // If all the indices come from the same MaskNumElts sized portion of + // the sources we can use extract. Also make sure the extract wouldn't + // extract past the end of the source. + int NewStartIdx = alignDown(Idx, MaskNumElts); + if (NewStartIdx + MaskNumElts > SrcNumElts || + (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx)) + CanExtract = false; + // Make sure we always update StartIdx as we use it to track if all + // elements are undef. + StartIdx[Input] = NewStartIdx; } - if (RangeUse[0] == 0 && RangeUse[1] == 0) { + if (StartIdx[0] < 0 && StartIdx[1] < 0) { setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. return; } - if (RangeUse[0] >= 0 && RangeUse[1] >= 0) { + if (CanExtract) { // Extract appropriate subvector and generate a vector shuffle for (unsigned Input = 0; Input < 2; ++Input) { SDValue &Src = Input == 0 ? Src1 : Src2; - if (RangeUse[Input] == 0) + if (StartIdx[Input] < 0) Src = DAG.getUNDEF(VT); else { Src = DAG.getNode( @@ -3128,16 +3112,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { } // Calculate new mask. - SmallVector<int, 8> MappedOps; - for (unsigned i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; - if (Idx >= 0) { - if (Idx < (int)SrcNumElts) - Idx -= StartIdx[0]; - else - Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; - } - MappedOps.push_back(Idx); + SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end()); + for (int &Idx : MappedOps) { + if (Idx >= (int)SrcNumElts) + Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; + else if (Idx >= 0) + Idx -= StartIdx[0]; } setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps)); @@ -3151,8 +3131,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { EVT EltVT = VT.getVectorElementType(); EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); SmallVector<SDValue,8> Ops; - for (unsigned i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; + for (int Idx : Mask) { SDValue Res; if (Idx < 0) { @@ -3281,7 +3260,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // N = N + Offset uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); - // In an inbouds GEP with an offset that is nonnegative even when + // In an inbounds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. SDNodeFlags Flags; if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds()) @@ -4752,7 +4731,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( else FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE)) - .addOperand(*Op) + .add(*Op) .addImm(Offset) .addMetadata(Variable) .addMetadata(Expr)); @@ -4764,7 +4743,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, DILocalVariable *Variable, DIExpression *Expr, int64_t Offset, - DebugLoc dl, + const DebugLoc &dl, unsigned DbgSDNodeOrder) { SDDbgValue *SDV; auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode()); @@ -4794,9 +4773,9 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, # define setjmp_undefined_for_msvc #endif -/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If -/// we want to emit this as a call to a named external function, return the name -/// otherwise lower it and return null. +/// Lower the call to the specified intrinsic function. If we want to emit this +/// as a call to a named external function, return the name. Otherwise, lower it +/// and return null. const char * SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -4929,14 +4908,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { report_fatal_error("Unsupported element size"); TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(sdl) - .setChain(getRoot()) - .setCallee(TLI.getLibcallCallingConv(LibraryCall), - Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol( - TLI.getLibcallName(LibraryCall), - TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args)); + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( + TLI.getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); DAG.setRoot(CallResult.second); @@ -5301,6 +5278,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); return nullptr; + case Intrinsic::experimental_constrained_fadd: + case Intrinsic::experimental_constrained_fsub: + case Intrinsic::experimental_constrained_fmul: + case Intrinsic::experimental_constrained_fdiv: + case Intrinsic::experimental_constrained_frem: + visitConstrainedFPIntrinsic(I, Intrinsic); + return nullptr; case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && @@ -5537,7 +5521,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::trap: { StringRef TrapFuncName = I.getAttributes() - .getAttribute(AttributeSet::FunctionIndex, "trap-func-name") + .getAttribute(AttributeList::FunctionIndex, "trap-func-name") .getValueAsString(); if (TrapFuncName.empty()) { ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? @@ -5548,7 +5532,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { TargetLowering::ArgListTy Args; TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(sdl).setChain(getRoot()).setCallee( + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( CallingConv::C, I.getType(), DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy(DAG.getDataLayout())), @@ -5749,6 +5733,46 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } } +void SelectionDAGBuilder::visitConstrainedFPIntrinsic(const CallInst &I, + unsigned Intrinsic) { + SDLoc sdl = getCurSDLoc(); + unsigned Opcode; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::experimental_constrained_fadd: + Opcode = ISD::STRICT_FADD; + break; + case Intrinsic::experimental_constrained_fsub: + Opcode = ISD::STRICT_FSUB; + break; + case Intrinsic::experimental_constrained_fmul: + Opcode = ISD::STRICT_FMUL; + break; + case Intrinsic::experimental_constrained_fdiv: + Opcode = ISD::STRICT_FDIV; + break; + case Intrinsic::experimental_constrained_frem: + Opcode = ISD::STRICT_FREM; + break; + } + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Chain = getRoot(); + SDValue Ops[3] = { Chain, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)) }; + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); + ValueVTs.push_back(MVT::Other); // Out chain + + SDVTList VTs = DAG.getVTList(ValueVTs); + SDValue Result = DAG.getNode(Opcode, sdl, VTs, Ops); + + assert(Result.getNode()->getNumValues() == 2); + SDValue OutChain = Result.getValue(1); + DAG.setRoot(OutChain); + SDValue FPResult = Result.getValue(0); + setValue(&I, FPResult); +} + std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, const BasicBlock *EHPadBB) { @@ -5827,7 +5851,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Type *RetTy = CS.getType(); TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; Args.reserve(CS.arg_size()); const Value *SwiftErrorVal = nullptr; @@ -5843,6 +5866,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { + TargetLowering::ArgListEntry Entry; const Value *V = *i; // Skip empty types @@ -5852,11 +5876,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue ArgNode = getValue(V); Entry.Node = ArgNode; Entry.Ty = V->getType(); - // Skip the first return-type Attribute to get to params. - Entry.setAttributes(&CS, i - CS.arg_begin() + 1); + Entry.setAttributes(&CS, i - CS.arg_begin()); // Use swifterror virtual register as input to the call. - if (Entry.isSwiftError && TLI.supportSwiftError()) { + if (Entry.IsSwiftError && TLI.supportSwiftError()) { SwiftErrorVal = V; // We find the virtual register for the actual swifterror argument. // Instead of using the Value, we use the virtual register instead. @@ -5869,7 +5892,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // If we have an explicit sret argument that is an Instruction, (i.e., it // might point to function-local memory), we can't meaningfully tail-call. - if (Entry.isSRet && isa<Instruction>(V)) + if (Entry.IsSRet && isa<Instruction>(V)) isTailCall = false; } @@ -5912,8 +5935,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, } } -/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the -/// value is equal or not-equal to zero. +/// Return true if it only matters that the value is equal or not-equal to zero. static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { for (const User *U : V->users()) { if (const ICmpInst *IC = dyn_cast<ICmpInst>(U)) @@ -5928,13 +5950,17 @@ static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { } static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, - Type *LoadTy, SelectionDAGBuilder &Builder) { // Check to see if this load can be trivially constant folded, e.g. if the // input is from a string literal. if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { // Cast pointer to the type we really want to load. + Type *LoadTy = + Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits()); + if (LoadVT.isVector()) + LoadTy = VectorType::get(LoadTy, LoadVT.getVectorNumElements()); + LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), PointerType::getUnqual(LoadTy)); @@ -5967,8 +5993,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, return LoadVal; } -/// processIntegerCallValue - Record the value for an instruction that -/// produces an integer result, converting the type where necessary. +/// Record the value for an instruction that produces an integer result, +/// converting the type where necessary. void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, SDValue Value, bool IsSigned) { @@ -5981,20 +6007,13 @@ void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, setValue(&I, Value); } -/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. -/// If so, return true and lower it, otherwise return false and it will be -/// lowered like a normal call. +/// See if we can lower a memcmp call into an optimized form. If so, return +/// true and lower it. Otherwise return false, and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { - // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) - if (I.getNumArgOperands() != 3) - return false; - const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); - if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || - !I.getArgOperand(2)->getType()->isIntegerTy() || - !I.getType()->isIntegerTy()) - return false; - const Value *Size = I.getArgOperand(2); const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); if (CSize && CSize->getZExtValue() == 0) { @@ -6005,11 +6024,9 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { } const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); - std::pair<SDValue, SDValue> Res = - TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), - getValue(LHS), getValue(RHS), getValue(Size), - MachinePointerInfo(LHS), - MachinePointerInfo(RHS)); + std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp( + DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS), + getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS)); if (Res.first.getNode()) { processIntegerCallValue(I, Res.first, true); PendingLoads.push_back(Res.second); @@ -6018,88 +6035,79 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 - if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) { - bool ActuallyDoIt = true; - MVT LoadVT; - Type *LoadTy; - switch (CSize->getZExtValue()) { - default: - LoadVT = MVT::Other; - LoadTy = nullptr; - ActuallyDoIt = false; - break; - case 2: - LoadVT = MVT::i16; - LoadTy = Type::getInt16Ty(CSize->getContext()); - break; - case 4: - LoadVT = MVT::i32; - LoadTy = Type::getInt32Ty(CSize->getContext()); - break; - case 8: - LoadVT = MVT::i64; - LoadTy = Type::getInt64Ty(CSize->getContext()); - break; - /* - case 16: - LoadVT = MVT::v4i32; - LoadTy = Type::getInt32Ty(CSize->getContext()); - LoadTy = VectorType::get(LoadTy, 4); - break; - */ - } - - // This turns into unaligned loads. We only do this if the target natively - // supports the MVT we'll be loading or if it is small enough (<= 4) that - // we'll only produce a small number of byte loads. + if (!CSize || !IsOnlyUsedInZeroEqualityComparison(&I)) + return false; - // Require that we can find a legal MVT, and only do this if the target - // supports unaligned loads of that type. Expanding into byte loads would - // bloat the code. + // If the target has a fast compare for the given size, it will return a + // preferred load type for that size. Require that the load VT is legal and + // that the target supports unaligned loads of that type. Otherwise, return + // INVALID. + auto hasFastLoadsAndCompare = [&](unsigned NumBits) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (ActuallyDoIt && CSize->getZExtValue() > 4) { - unsigned DstAS = LHS->getType()->getPointerAddressSpace(); - unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); + MVT LVT = TLI.hasFastEqualityCompare(NumBits); + if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) { // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. // TODO: Check alignment of src and dest ptrs. - if (!TLI.isTypeLegal(LoadVT) || - !TLI.allowsMisalignedMemoryAccesses(LoadVT, SrcAS) || - !TLI.allowsMisalignedMemoryAccesses(LoadVT, DstAS)) - ActuallyDoIt = false; + unsigned DstAS = LHS->getType()->getPointerAddressSpace(); + unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); + if (!TLI.isTypeLegal(LVT) || + !TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) || + !TLI.allowsMisalignedMemoryAccesses(LVT, DstAS)) + LVT = MVT::INVALID_SIMPLE_VALUE_TYPE; } - if (ActuallyDoIt) { - SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); - SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); + return LVT; + }; - SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal, - ISD::SETNE); - processIntegerCallValue(I, Res, false); - return true; - } + // This turns into unaligned loads. We only do this if the target natively + // supports the MVT we'll be loading or if it is small enough (<= 4) that + // we'll only produce a small number of byte loads. + MVT LoadVT; + unsigned NumBitsToCompare = CSize->getZExtValue() * 8; + switch (NumBitsToCompare) { + default: + return false; + case 16: + LoadVT = MVT::i16; + break; + case 32: + LoadVT = MVT::i32; + break; + case 64: + case 128: + case 256: + LoadVT = hasFastLoadsAndCompare(NumBitsToCompare); + break; } + if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE) + return false; - return false; + SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this); + SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this); + + // Bitcast to a wide integer type if the loads are vectors. + if (LoadVT.isVector()) { + EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits()); + LoadL = DAG.getBitcast(CmpVT, LoadL); + LoadR = DAG.getBitcast(CmpVT, LoadR); + } + + SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE); + processIntegerCallValue(I, Cmp, false); + return true; } -/// visitMemChrCall -- See if we can lower a memchr call into an optimized -/// form. If so, return true and lower it, otherwise return false and it -/// will be lowered like a normal call. +/// See if we can lower a memchr call into an optimized form. If so, return +/// true and lower it. Otherwise return false, and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { - // Verify that the prototype makes sense. void *memchr(void *, int, size_t) - if (I.getNumArgOperands() != 3) - return false; - const Value *Src = I.getArgOperand(0); const Value *Char = I.getArgOperand(1); const Value *Length = I.getArgOperand(2); - if (!Src->getType()->isPointerTy() || - !Char->getType()->isIntegerTy() || - !Length->getType()->isIntegerTy() || - !I.getType()->isPointerTy()) - return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = @@ -6115,15 +6123,12 @@ bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { return false; } -/// -/// visitMemPCpyCall -- lower a mempcpy call as a memcpy followed by code to -/// to adjust the dst pointer by the size of the copied memory. +/// See if we can lower a mempcpy call into an optimized form. If so, return +/// true and lower it. Otherwise return false, and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { - - // Verify argument count: void *mempcpy(void *, const void *, size_t) - if (I.getNumArgOperands() != 3) - return false; - SDValue Dst = getValue(I.getArgOperand(0)); SDValue Src = getValue(I.getArgOperand(1)); SDValue Size = getValue(I.getArgOperand(2)); @@ -6158,19 +6163,13 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { return true; } -/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an -/// optimized form. If so, return true and lower it, otherwise return false -/// and it will be lowered like a normal call. +/// See if we can lower a strcpy call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { - // Verify that the prototype makes sense. char *strcpy(char *, char *) - if (I.getNumArgOperands() != 2) - return false; - const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); - if (!Arg0->getType()->isPointerTy() || - !Arg1->getType()->isPointerTy() || - !I.getType()->isPointerTy()) - return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = @@ -6187,19 +6186,13 @@ bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { return false; } -/// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form. -/// If so, return true and lower it, otherwise return false and it will be -/// lowered like a normal call. +/// See if we can lower a strcmp call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { - // Verify that the prototype makes sense. int strcmp(void*,void*) - if (I.getNumArgOperands() != 2) - return false; - const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); - if (!Arg0->getType()->isPointerTy() || - !Arg1->getType()->isPointerTy() || - !I.getType()->isIntegerTy()) - return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = @@ -6216,17 +6209,13 @@ bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { return false; } -/// visitStrLenCall -- See if we can lower a strlen call into an optimized -/// form. If so, return true and lower it, otherwise return false and it -/// will be lowered like a normal call. +/// See if we can lower a strlen call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { - // Verify that the prototype makes sense. size_t strlen(char *) - if (I.getNumArgOperands() != 1) - return false; - const Value *Arg0 = I.getArgOperand(0); - if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy()) - return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = @@ -6241,19 +6230,13 @@ bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { return false; } -/// visitStrNLenCall -- See if we can lower a strnlen call into an optimized -/// form. If so, return true and lower it, otherwise return false and it -/// will be lowered like a normal call. +/// See if we can lower a strnlen call into an optimized form. If so, return +/// true and lower it, otherwise return false and it will be lowered like a +/// normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { - // Verify that the prototype makes sense. size_t strnlen(char *, size_t) - if (I.getNumArgOperands() != 2) - return false; - const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); - if (!Arg0->getType()->isPointerTy() || - !Arg1->getType()->isIntegerTy() || - !I.getType()->isIntegerTy()) - return false; const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); std::pair<SDValue, SDValue> Res = @@ -6269,16 +6252,15 @@ bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { return false; } -/// visitUnaryFloatCall - If a call instruction is a unary floating-point -/// operation (as expected), translate it to an SDNode with the specified opcode -/// and return true. +/// See if we can lower a unary floating-point operation into an SDNode with +/// the specified Opcode. If so, return true and lower it, otherwise return +/// false and it will be lowered like a normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, unsigned Opcode) { - // Sanity check that it really is a unary floating-point call. - if (I.getNumArgOperands() != 1 || - !I.getArgOperand(0)->getType()->isFloatingPointTy() || - I.getType() != I.getArgOperand(0)->getType() || - !I.onlyReadsMemory()) + // We already checked this call's prototype; verify it doesn't modify errno. + if (!I.onlyReadsMemory()) return false; SDValue Tmp = getValue(I.getArgOperand(0)); @@ -6286,17 +6268,15 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, return true; } -/// visitBinaryFloatCall - If a call instruction is a binary floating-point -/// operation (as expected), translate it to an SDNode with the specified opcode -/// and return true. +/// See if we can lower a binary floating-point operation into an SDNode with +/// the specified Opcode. If so, return true and lower it. Otherwise return +/// false, and it will be lowered like a normal call. +/// The caller already checked that \p I calls the appropriate LibFunc with a +/// correct prototype. bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, unsigned Opcode) { - // Sanity check that it really is a binary floating-point call. - if (I.getNumArgOperands() != 2 || - !I.getArgOperand(0)->getType()->isFloatingPointTy() || - I.getType() != I.getArgOperand(0)->getType() || - I.getType() != I.getArgOperand(1)->getType() || - !I.onlyReadsMemory()) + // We already checked this call's prototype; verify it doesn't modify errno. + if (!I.onlyReadsMemory()) return false; SDValue Tmp0 = getValue(I.getArgOperand(0)); @@ -6336,20 +6316,18 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Check for well-known libc/libm calls. If the function is internal, it // can't be a library call. Don't do the check if marked as nobuiltin for // some reason. - LibFunc::Func Func; + LibFunc Func; if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() && - LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->getLibFunc(*F, Func) && LibInfo->hasOptimizedCodeGen(Func)) { switch (Func) { default: break; - case LibFunc::copysign: - case LibFunc::copysignf: - case LibFunc::copysignl: - if (I.getNumArgOperands() == 2 && // Basic sanity checks. - I.getArgOperand(0)->getType()->isFloatingPointTy() && - I.getType() == I.getArgOperand(0)->getType() && - I.getType() == I.getArgOperand(1)->getType() && - I.onlyReadsMemory()) { + case LibFunc_copysign: + case LibFunc_copysignf: + case LibFunc_copysignl: + // We already checked this call's prototype; verify it doesn't modify + // errno. + if (I.onlyReadsMemory()) { SDValue LHS = getValue(I.getArgOperand(0)); SDValue RHS = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(), @@ -6357,122 +6335,122 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } break; - case LibFunc::fabs: - case LibFunc::fabsf: - case LibFunc::fabsl: + case LibFunc_fabs: + case LibFunc_fabsf: + case LibFunc_fabsl: if (visitUnaryFloatCall(I, ISD::FABS)) return; break; - case LibFunc::fmin: - case LibFunc::fminf: - case LibFunc::fminl: + case LibFunc_fmin: + case LibFunc_fminf: + case LibFunc_fminl: if (visitBinaryFloatCall(I, ISD::FMINNUM)) return; break; - case LibFunc::fmax: - case LibFunc::fmaxf: - case LibFunc::fmaxl: + case LibFunc_fmax: + case LibFunc_fmaxf: + case LibFunc_fmaxl: if (visitBinaryFloatCall(I, ISD::FMAXNUM)) return; break; - case LibFunc::sin: - case LibFunc::sinf: - case LibFunc::sinl: + case LibFunc_sin: + case LibFunc_sinf: + case LibFunc_sinl: if (visitUnaryFloatCall(I, ISD::FSIN)) return; break; - case LibFunc::cos: - case LibFunc::cosf: - case LibFunc::cosl: + case LibFunc_cos: + case LibFunc_cosf: + case LibFunc_cosl: if (visitUnaryFloatCall(I, ISD::FCOS)) return; break; - case LibFunc::sqrt: - case LibFunc::sqrtf: - case LibFunc::sqrtl: - case LibFunc::sqrt_finite: - case LibFunc::sqrtf_finite: - case LibFunc::sqrtl_finite: + case LibFunc_sqrt: + case LibFunc_sqrtf: + case LibFunc_sqrtl: + case LibFunc_sqrt_finite: + case LibFunc_sqrtf_finite: + case LibFunc_sqrtl_finite: if (visitUnaryFloatCall(I, ISD::FSQRT)) return; break; - case LibFunc::floor: - case LibFunc::floorf: - case LibFunc::floorl: + case LibFunc_floor: + case LibFunc_floorf: + case LibFunc_floorl: if (visitUnaryFloatCall(I, ISD::FFLOOR)) return; break; - case LibFunc::nearbyint: - case LibFunc::nearbyintf: - case LibFunc::nearbyintl: + case LibFunc_nearbyint: + case LibFunc_nearbyintf: + case LibFunc_nearbyintl: if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) return; break; - case LibFunc::ceil: - case LibFunc::ceilf: - case LibFunc::ceill: + case LibFunc_ceil: + case LibFunc_ceilf: + case LibFunc_ceill: if (visitUnaryFloatCall(I, ISD::FCEIL)) return; break; - case LibFunc::rint: - case LibFunc::rintf: - case LibFunc::rintl: + case LibFunc_rint: + case LibFunc_rintf: + case LibFunc_rintl: if (visitUnaryFloatCall(I, ISD::FRINT)) return; break; - case LibFunc::round: - case LibFunc::roundf: - case LibFunc::roundl: + case LibFunc_round: + case LibFunc_roundf: + case LibFunc_roundl: if (visitUnaryFloatCall(I, ISD::FROUND)) return; break; - case LibFunc::trunc: - case LibFunc::truncf: - case LibFunc::truncl: + case LibFunc_trunc: + case LibFunc_truncf: + case LibFunc_truncl: if (visitUnaryFloatCall(I, ISD::FTRUNC)) return; break; - case LibFunc::log2: - case LibFunc::log2f: - case LibFunc::log2l: + case LibFunc_log2: + case LibFunc_log2f: + case LibFunc_log2l: if (visitUnaryFloatCall(I, ISD::FLOG2)) return; break; - case LibFunc::exp2: - case LibFunc::exp2f: - case LibFunc::exp2l: + case LibFunc_exp2: + case LibFunc_exp2f: + case LibFunc_exp2l: if (visitUnaryFloatCall(I, ISD::FEXP2)) return; break; - case LibFunc::memcmp: + case LibFunc_memcmp: if (visitMemCmpCall(I)) return; break; - case LibFunc::mempcpy: + case LibFunc_mempcpy: if (visitMemPCpyCall(I)) return; break; - case LibFunc::memchr: + case LibFunc_memchr: if (visitMemChrCall(I)) return; break; - case LibFunc::strcpy: + case LibFunc_strcpy: if (visitStrCpyCall(I, false)) return; break; - case LibFunc::stpcpy: + case LibFunc_stpcpy: if (visitStrCpyCall(I, true)) return; break; - case LibFunc::strcmp: + case LibFunc_strcmp: if (visitStrCmpCall(I)) return; break; - case LibFunc::strlen: + case LibFunc_strlen: if (visitStrLenCall(I)) return; break; - case LibFunc::strnlen: + case LibFunc_strnlen: if (visitStrNLenCall(I)) return; break; @@ -7361,7 +7339,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo( // Populate the argument list. // Attributes for args start at offset 1, after the return attribute. - for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; + for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) { const Value *V = CS->getOperand(ArgI); @@ -7370,7 +7348,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo( TargetLowering::ArgListEntry Entry; Entry.Node = getValue(V); Entry.Ty = V->getType(); - Entry.setAttributes(&CS, AttrI); + Entry.setAttributes(&CS, ArgIdx); Args.push_back(Entry); } @@ -7631,9 +7609,9 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, FuncInfo.MF->getFrameInfo().setHasPatchPoint(); } -/// Returns an AttributeSet representing the attributes applied to the return +/// Returns an AttributeList representing the attributes applied to the return /// value of the given call. -static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { +static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { SmallVector<Attribute::AttrKind, 2> Attrs; if (CLI.RetSExt) Attrs.push_back(Attribute::SExt); @@ -7642,8 +7620,8 @@ static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { if (CLI.IsInReg) Attrs.push_back(Attribute::InReg); - return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex, - Attrs); + return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex, + Attrs); } /// TargetLowering::LowerCallTo - This is the default LowerCallTo @@ -7683,15 +7661,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { ArgListEntry Entry; Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; - Entry.isSExt = false; - Entry.isZExt = false; - Entry.isInReg = false; - Entry.isSRet = true; - Entry.isNest = false; - Entry.isByVal = false; - Entry.isReturned = false; - Entry.isSwiftSelf = false; - Entry.isSwiftError = false; + Entry.IsSExt = false; + Entry.IsZExt = false; + Entry.IsInReg = false; + Entry.IsSRet = true; + Entry.IsNest = false; + Entry.IsByVal = false; + Entry.IsReturned = false; + Entry.IsSwiftSelf = false; + Entry.IsSwiftError = false; Entry.Alignment = Align; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); @@ -7724,7 +7702,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { ArgListTy &Args = CLI.getArgs(); if (supportSwiftError()) { for (unsigned i = 0, e = Args.size(); i != e; ++i) { - if (Args[i].isSwiftError) { + if (Args[i].IsSwiftError) { ISD::InputArg MyFlags; MyFlags.VT = getPointerTy(DL); MyFlags.ArgVT = EVT(getPointerTy(DL)); @@ -7741,7 +7719,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs); Type *FinalType = Args[i].Ty; - if (Args[i].isByVal) + if (Args[i].IsByVal) FinalType = cast<PointerType>(Args[i].Ty)->getElementType(); bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( FinalType, CLI.CallConv, CLI.IsVarArg); @@ -7754,11 +7732,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { ISD::ArgFlagsTy Flags; unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - if (Args[i].isZExt) + if (Args[i].IsZExt) Flags.setZExt(); - if (Args[i].isSExt) + if (Args[i].IsSExt) Flags.setSExt(); - if (Args[i].isInReg) { + if (Args[i].IsInReg) { // If we are using vectorcall calling convention, a structure that is // passed InReg - is surely an HVA if (CLI.CallConv == CallingConv::X86_VectorCall && @@ -7771,15 +7749,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // Set InReg Flag Flags.setInReg(); } - if (Args[i].isSRet) + if (Args[i].IsSRet) Flags.setSRet(); - if (Args[i].isSwiftSelf) + if (Args[i].IsSwiftSelf) Flags.setSwiftSelf(); - if (Args[i].isSwiftError) + if (Args[i].IsSwiftError) Flags.setSwiftError(); - if (Args[i].isByVal) + if (Args[i].IsByVal) Flags.setByVal(); - if (Args[i].isInAlloca) { + if (Args[i].IsInAlloca) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about // inalloca. This way we can know how many bytes we should've allocated @@ -7788,7 +7766,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // in the various CC lowering callbacks. Flags.setByVal(); } - if (Args[i].isByVal || Args[i].isInAlloca) { + if (Args[i].IsByVal || Args[i].IsInAlloca) { PointerType *Ty = cast<PointerType>(Args[i].Ty); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); @@ -7801,7 +7779,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { FrameAlign = getByValTypeAlignment(ElementTy, DL); Flags.setByValAlign(FrameAlign); } - if (Args[i].isNest) + if (Args[i].IsNest) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); @@ -7812,13 +7790,13 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { SmallVector<SDValue, 4> Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - if (Args[i].isSExt) + if (Args[i].IsSExt) ExtendKind = ISD::SIGN_EXTEND; - else if (Args[i].isZExt) + else if (Args[i].IsZExt) ExtendKind = ISD::ZERO_EXTEND; // Conservatively only handle 'returned' on non-vectors for now - if (Args[i].isReturned && !Op.getValueType().isVector()) { + if (Args[i].IsReturned && !Op.getValueType().isVector()) { assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues && "unexpected use of 'returned'"); // Before passing 'returned' to the target lowering code, ensure that @@ -7832,9 +7810,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // parameter extension method is not compatible with the return // extension method if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) || - (ExtendKind != ISD::ANY_EXTEND && - CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt)) - Flags.setReturned(); + (ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt && + CLI.RetZExt == Args[i].IsZExt)) + Flags.setReturned(); } getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, @@ -8010,6 +7988,173 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { return true; } +typedef DenseMap<const Argument *, + std::pair<const AllocaInst *, const StoreInst *>> + ArgCopyElisionMapTy; + +/// Scan the entry block of the function in FuncInfo for arguments that look +/// like copies into a local alloca. Record any copied arguments in +/// ArgCopyElisionCandidates. +static void +findArgumentCopyElisionCandidates(const DataLayout &DL, + FunctionLoweringInfo *FuncInfo, + ArgCopyElisionMapTy &ArgCopyElisionCandidates) { + // Record the state of every static alloca used in the entry block. Argument + // allocas are all used in the entry block, so we need approximately as many + // entries as we have arguments. + enum StaticAllocaInfo { Unknown, Clobbered, Elidable }; + SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas; + unsigned NumArgs = FuncInfo->Fn->arg_size(); + StaticAllocas.reserve(NumArgs * 2); + + auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * { + if (!V) + return nullptr; + V = V->stripPointerCasts(); + const auto *AI = dyn_cast<AllocaInst>(V); + if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(AI)) + return nullptr; + auto Iter = StaticAllocas.insert({AI, Unknown}); + return &Iter.first->second; + }; + + // Look for stores of arguments to static allocas. Look through bitcasts and + // GEPs to handle type coercions, as long as the alloca is fully initialized + // by the store. Any non-store use of an alloca escapes it and any subsequent + // unanalyzed store might write it. + // FIXME: Handle structs initialized with multiple stores. + for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) { + // Look for stores, and handle non-store uses conservatively. + const auto *SI = dyn_cast<StoreInst>(&I); + if (!SI) { + // We will look through cast uses, so ignore them completely. + if (I.isCast()) + continue; + // Ignore debug info intrinsics, they don't escape or store to allocas. + if (isa<DbgInfoIntrinsic>(I)) + continue; + // This is an unknown instruction. Assume it escapes or writes to all + // static alloca operands. + for (const Use &U : I.operands()) { + if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U)) + *Info = StaticAllocaInfo::Clobbered; + } + continue; + } + + // If the stored value is a static alloca, mark it as escaped. + if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand())) + *Info = StaticAllocaInfo::Clobbered; + + // Check if the destination is a static alloca. + const Value *Dst = SI->getPointerOperand()->stripPointerCasts(); + StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst); + if (!Info) + continue; + const AllocaInst *AI = cast<AllocaInst>(Dst); + + // Skip allocas that have been initialized or clobbered. + if (*Info != StaticAllocaInfo::Unknown) + continue; + + // Check if the stored value is an argument, and that this store fully + // initializes the alloca. Don't elide copies from the same argument twice. + const Value *Val = SI->getValueOperand()->stripPointerCasts(); + const auto *Arg = dyn_cast<Argument>(Val); + if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() || + Arg->getType()->isEmptyTy() || + DL.getTypeStoreSize(Arg->getType()) != + DL.getTypeAllocSize(AI->getAllocatedType()) || + ArgCopyElisionCandidates.count(Arg)) { + *Info = StaticAllocaInfo::Clobbered; + continue; + } + + DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI << '\n'); + + // Mark this alloca and store for argument copy elision. + *Info = StaticAllocaInfo::Elidable; + ArgCopyElisionCandidates.insert({Arg, {AI, SI}}); + + // Stop scanning if we've seen all arguments. This will happen early in -O0 + // builds, which is useful, because -O0 builds have large entry blocks and + // many allocas. + if (ArgCopyElisionCandidates.size() == NumArgs) + break; + } +} + +/// Try to elide argument copies from memory into a local alloca. Succeeds if +/// ArgVal is a load from a suitable fixed stack object. +static void tryToElideArgumentCopy( + FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains, + DenseMap<int, int> &ArgCopyElisionFrameIndexMap, + SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs, + ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg, + SDValue ArgVal, bool &ArgHasUses) { + // Check if this is a load from a fixed stack object. + auto *LNode = dyn_cast<LoadSDNode>(ArgVal); + if (!LNode) + return; + auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()); + if (!FINode) + return; + + // Check that the fixed stack object is the right size and alignment. + // Look at the alignment that the user wrote on the alloca instead of looking + // at the stack object. + auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg); + assert(ArgCopyIter != ArgCopyElisionCandidates.end()); + const AllocaInst *AI = ArgCopyIter->second.first; + int FixedIndex = FINode->getIndex(); + int &AllocaIndex = FuncInfo->StaticAllocaMap[AI]; + int OldIndex = AllocaIndex; + MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo(); + if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) { + DEBUG(dbgs() << " argument copy elision failed due to bad fixed stack " + "object size\n"); + return; + } + unsigned RequiredAlignment = AI->getAlignment(); + if (!RequiredAlignment) { + RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment( + AI->getAllocatedType()); + } + if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) { + DEBUG(dbgs() << " argument copy elision failed: alignment of alloca " + "greater than stack argument alignment (" + << RequiredAlignment << " vs " + << MFI.getObjectAlignment(FixedIndex) << ")\n"); + return; + } + + // Perform the elision. Delete the old stack object and replace its only use + // in the variable info map. Mark the stack object as mutable. + DEBUG({ + dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n' + << " Replacing frame index " << OldIndex << " with " << FixedIndex + << '\n'; + }); + MFI.RemoveStackObject(OldIndex); + MFI.setIsImmutableObjectIndex(FixedIndex, false); + AllocaIndex = FixedIndex; + ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex}); + Chains.push_back(ArgVal.getValue(1)); + + // Avoid emitting code for the store implementing the copy. + const StoreInst *SI = ArgCopyIter->second.second; + ElidedArgCopyInstrs.insert(SI); + + // Check for uses of the argument again so that we can avoid exporting ArgVal + // if it is't used by anything other than the store. + for (const Value *U : Arg.users()) { + if (U != SI) { + ArgHasUses = true; + break; + } + } +} + void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); @@ -8032,15 +8177,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Ins.push_back(RetArg); } + // Look for stores of arguments to static allocas. Mark such arguments with a + // flag to ask the target to give us the memory location of that argument if + // available. + ArgCopyElisionMapTy ArgCopyElisionCandidates; + findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates); + // Set up the incoming argument description vector. - unsigned Idx = 1; - for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); - I != E; ++I, ++Idx) { + unsigned Idx = 0; + for (const Argument &Arg : F.args()) { + ++Idx; SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); - bool isArgValueUsed = !I->use_empty(); + ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); + bool isArgValueUsed = !Arg.use_empty(); unsigned PartBase = 0; - Type *FinalType = I->getType(); + Type *FinalType = Arg.getType(); if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) FinalType = cast<PointerType>(FinalType)->getElementType(); bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( @@ -8060,7 +8211,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // If we are using vectorcall calling convention, a structure that is // passed InReg - is surely an HVA if (F.getCallingConv() == CallingConv::X86_VectorCall && - isa<StructType>(I->getType())) { + isa<StructType>(Arg.getType())) { // The first value of a structure is marked if (0 == Value) Flags.setHvaStart(); @@ -8092,7 +8243,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setByVal(); } if (Flags.isByVal() || Flags.isInAlloca()) { - PointerType *Ty = cast<PointerType>(I->getType()); + PointerType *Ty = cast<PointerType>(Arg.getType()); Type *ElementTy = Ty->getElementType(); Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if @@ -8109,6 +8260,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (NeedsRegBlock) Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); + if (ArgCopyElisionCandidates.count(&Arg)) + Flags.setCopyElisionCandidate(); MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); @@ -8155,7 +8308,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Set up the argument values. unsigned i = 0; - Idx = 1; + Idx = 0; if (!FuncInfo->CanLowerReturn) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. @@ -8181,25 +8334,39 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ++i; } - for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; - ++I, ++Idx) { + SmallVector<SDValue, 4> Chains; + DenseMap<int, int> ArgCopyElisionFrameIndexMap; + for (const Argument &Arg : F.args()) { + ++Idx; SmallVector<SDValue, 4> ArgValues; SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); + ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + continue; + + bool ArgHasUses = !Arg.use_empty(); + + // Elide the copying store if the target loaded this argument from a + // suitable fixed stack object. + if (Ins[i].Flags.isCopyElisionCandidate()) { + tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap, + ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg, + InVals[i], ArgHasUses); + } // If this argument is unused then remember its value. It is used to generate // debugging information. bool isSwiftErrorArg = TLI->supportSwiftError() && F.getAttributes().hasAttribute(Idx, Attribute::SwiftError); - if (I->use_empty() && NumValues && !isSwiftErrorArg) { - SDB->setUnusedArgValue(&*I, InVals[i]); + if (!ArgHasUses && !isSwiftErrorArg) { + SDB->setUnusedArgValue(&Arg, InVals[i]); // Also remember any frame index for use in FastISel. if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(InVals[i].getNode())) - FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); } for (unsigned Val = 0; Val != NumValues; ++Val) { @@ -8210,16 +8377,15 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Even an apparant 'unused' swifterror argument needs to be returned. So // we do generate a copy for it that can be used on return from the // function. - if (!I->use_empty() || isSwiftErrorArg) { + if (ArgHasUses || isSwiftErrorArg) { Optional<ISD::NodeType> AssertOp; if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) AssertOp = ISD::AssertSext; else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) AssertOp = ISD::AssertZext; - ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], - NumParts, PartVT, VT, - nullptr, AssertOp)); + ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, + PartVT, VT, nullptr, AssertOp)); } i += NumParts; @@ -8232,18 +8398,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Note down frame index. if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) - FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), SDB->getCurSDLoc()); - SDB->setValue(&*I, Res); + SDB->setValue(&Arg, Res); if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) - FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); } // Update the SwiftErrorVRegDefMap. @@ -8263,18 +8429,36 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // uses with vregs. unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { - FuncInfo->ValueMap[&*I] = Reg; + FuncInfo->ValueMap[&Arg] = Reg; continue; } } - if (!isOnlyUsedInEntryBlock(&*I, TM.Options.EnableFastISel)) { - FuncInfo->InitializeRegForValue(&*I); - SDB->CopyToExportRegsIfNeeded(&*I); + if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) { + FuncInfo->InitializeRegForValue(&Arg); + SDB->CopyToExportRegsIfNeeded(&Arg); } } + if (!Chains.empty()) { + Chains.push_back(NewRoot); + NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); + } + + DAG.setRoot(NewRoot); + assert(i == InVals.size() && "Argument register count mismatch!"); + // If any argument copy elisions occurred and we have debug info, update the + // stale frame indices used in the dbg.declare variable info table. + MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo(); + if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) { + for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) { + auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot); + if (I != ArgCopyElisionFrameIndexMap.end()) + VI.Slot = I->second; + } + } + // Finally, if the target has anything special to do, allow it to do so. EmitFunctionEntryCode(); } |