aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp')
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp996
1 files changed, 590 insertions, 406 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 996c95bd5f07..8708f58f1e63 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -84,10 +84,6 @@ LimitFPPrecision("limit-float-precision",
cl::location(LimitFloatPrecision),
cl::init(0));
-static cl::opt<bool>
-EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden,
- cl::desc("Enable fast-math-flags for DAG nodes"));
-
/// Minimum jump table density for normal functions.
static cl::opt<unsigned>
JumpTableDensity("jump-table-density", cl::init(10), cl::Hidden,
@@ -634,10 +630,6 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
}
}
-/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
-/// this value and returns the result as a ValueVT value. This uses
-/// Chain/Flag as the input and updates them for the output Chain/Flag.
-/// If the Flag pointer is NULL, no flag is used.
SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
FunctionLoweringInfo &FuncInfo,
const SDLoc &dl, SDValue &Chain,
@@ -739,10 +731,6 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
}
-/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
-/// specified value into the registers specified by this object. This uses
-/// Chain/Flag as the input and updates them for the output Chain/Flag.
-/// If the Flag pointer is NULL, no flag is used.
void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
const SDLoc &dl, SDValue &Chain, SDValue *Flag,
const Value *V,
@@ -796,9 +784,6 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
}
-/// AddInlineAsmOperands - Add this value to the specified inlineasm node
-/// operand list. This adds the code marker and includes the number of
-/// values added into it.
void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
unsigned MatchingIdx, const SDLoc &dl,
SelectionDAG &DAG,
@@ -850,12 +835,6 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
LPadToCallSiteMap.clear();
}
-/// clear - Clear out the current SelectionDAG and the associated
-/// state and prepare this SelectionDAGBuilder object to be used
-/// for a new block. This doesn't clear out information about
-/// additional blocks that are needed to complete switch lowering
-/// or PHI node updating; that information is cleared out as it is
-/// consumed.
void SelectionDAGBuilder::clear() {
NodeMap.clear();
UnusedArgNodeMap.clear();
@@ -867,21 +846,10 @@ void SelectionDAGBuilder::clear() {
StatepointLowering.clear();
}
-/// clearDanglingDebugInfo - Clear the dangling debug information
-/// map. This function is separated from the clear so that debug
-/// information that is dangling in a basic block can be properly
-/// resolved in a different basic block. This allows the
-/// SelectionDAG to resolve dangling debug information attached
-/// to PHI nodes.
void SelectionDAGBuilder::clearDanglingDebugInfo() {
DanglingDebugInfoMap.clear();
}
-/// getRoot - Return the current virtual root of the Selection DAG,
-/// flushing any PendingLoad items. This must be done before emitting
-/// a store or any other node that may need to be ordered after any
-/// prior load instructions.
-///
SDValue SelectionDAGBuilder::getRoot() {
if (PendingLoads.empty())
return DAG.getRoot();
@@ -901,10 +869,6 @@ SDValue SelectionDAGBuilder::getRoot() {
return Root;
}
-/// getControlRoot - Similar to getRoot, but instead of flushing all the
-/// PendingLoad items, flush all the PendingExports items. It is necessary
-/// to do this before emitting a terminator instruction.
-///
SDValue SelectionDAGBuilder::getControlRoot() {
SDValue Root = DAG.getRoot();
@@ -937,7 +901,9 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
HandlePHINodesInSuccessorBlocks(I.getParent());
}
- ++SDNodeOrder;
+ // Increase the SDNodeOrder if dealing with a non-debug instruction.
+ if (!isa<DbgInfoIntrinsic>(I))
+ ++SDNodeOrder;
CurInst = &I;
@@ -1403,16 +1369,16 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
const Function *F = I.getParent()->getParent();
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
- if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
Attribute::SExt))
ExtendKind = ISD::SIGN_EXTEND;
- else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+ else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
LLVMContext &Context = F->getContext();
- bool RetInReg = F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
- Attribute::InReg);
+ bool RetInReg = F->getAttributes().hasAttribute(
+ AttributeList::ReturnIndex, Attribute::InReg);
for (unsigned j = 0; j != NumValues; ++j) {
EVT VT = ValueVTs[j];
@@ -1582,7 +1548,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
MachineBasicBlock *CurBB,
MachineBasicBlock *SwitchBB,
BranchProbability TProb,
- BranchProbability FProb) {
+ BranchProbability FProb,
+ bool InvertCond) {
const BasicBlock *BB = CurBB->getBasicBlock();
// If the leaf of the tree is a comparison, merge the condition into
@@ -1596,10 +1563,14 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
ISD::CondCode Condition;
if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
- Condition = getICmpCondCode(IC->getPredicate());
+ ICmpInst::Predicate Pred =
+ InvertCond ? IC->getInversePredicate() : IC->getPredicate();
+ Condition = getICmpCondCode(Pred);
} else {
const FCmpInst *FC = cast<FCmpInst>(Cond);
- Condition = getFCmpCondCode(FC->getPredicate());
+ FCmpInst::Predicate Pred =
+ InvertCond ? FC->getInversePredicate() : FC->getPredicate();
+ Condition = getFCmpCondCode(Pred);
if (TM.Options.NoNaNsFPMath)
Condition = getFCmpCodeWithoutNaN(Condition);
}
@@ -1612,7 +1583,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
}
// Create a CaseBlock record representing this branch.
- CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
+ ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
+ CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
nullptr, TBB, FBB, CurBB, TProb, FProb);
SwitchCases.push_back(CB);
}
@@ -1625,16 +1597,44 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
MachineBasicBlock *SwitchBB,
Instruction::BinaryOps Opc,
BranchProbability TProb,
- BranchProbability FProb) {
- // If this node is not part of the or/and tree, emit it as a branch.
+ BranchProbability FProb,
+ bool InvertCond) {
+ // Skip over not part of the tree and remember to invert op and operands at
+ // next level.
+ if (BinaryOperator::isNot(Cond) && Cond->hasOneUse()) {
+ const Value *CondOp = BinaryOperator::getNotArgument(Cond);
+ if (InBlock(CondOp, CurBB->getBasicBlock())) {
+ FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
+ !InvertCond);
+ return;
+ }
+ }
+
const Instruction *BOp = dyn_cast<Instruction>(Cond);
+ // Compute the effective opcode for Cond, taking into account whether it needs
+ // to be inverted, e.g.
+ // and (not (or A, B)), C
+ // gets lowered as
+ // and (and (not A, not B), C)
+ unsigned BOpc = 0;
+ if (BOp) {
+ BOpc = BOp->getOpcode();
+ if (InvertCond) {
+ if (BOpc == Instruction::And)
+ BOpc = Instruction::Or;
+ else if (BOpc == Instruction::Or)
+ BOpc = Instruction::And;
+ }
+ }
+
+ // If this node is not part of the or/and tree, emit it as a branch.
if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
- (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
+ BOpc != Opc || !BOp->hasOneUse() ||
BOp->getParent() != CurBB->getBasicBlock() ||
!InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
!InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
- TProb, FProb);
+ TProb, FProb, InvertCond);
return;
}
@@ -1669,14 +1669,14 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
auto NewFalseProb = TProb / 2 + FProb;
// Emit the LHS condition.
FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
- NewTrueProb, NewFalseProb);
+ NewTrueProb, NewFalseProb, InvertCond);
// Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
- Probs[0], Probs[1]);
+ Probs[0], Probs[1], InvertCond);
} else {
assert(Opc == Instruction::And && "Unknown merge op!");
// Codegen X & Y as:
@@ -1702,14 +1702,14 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
auto NewFalseProb = FProb / 2;
// Emit the LHS condition.
FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
- NewTrueProb, NewFalseProb);
+ NewTrueProb, NewFalseProb, InvertCond);
// Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
// Emit the RHS condition into TmpBB.
FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
- Probs[0], Probs[1]);
+ Probs[0], Probs[1], InvertCond);
}
}
@@ -1793,7 +1793,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
Opcode,
getEdgeProbability(BrMBB, Succ0MBB),
- getEdgeProbability(BrMBB, Succ1MBB));
+ getEdgeProbability(BrMBB, Succ1MBB),
+ /*InvertCond=*/false);
// If the compares in later blocks need to use values not currently
// exported from this block, export them now. This block should always
// be the first entry.
@@ -2027,7 +2028,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
Entry.Node = StackSlot;
Entry.Ty = FnTy->getParamType(0);
if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
- Entry.isInReg = true;
+ Entry.IsInReg = true;
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
@@ -2581,13 +2582,13 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
Flags.setNoSignedWrap(nsw);
Flags.setNoUnsignedWrap(nuw);
Flags.setVectorReduction(vec_redux);
- if (EnableFMFInDAG) {
- Flags.setAllowReciprocal(FMF.allowReciprocal());
- Flags.setNoInfs(FMF.noInfs());
- Flags.setNoNaNs(FMF.noNaNs());
- Flags.setNoSignedZeros(FMF.noSignedZeros());
- Flags.setUnsafeAlgebra(FMF.unsafeAlgebra());
- }
+ Flags.setAllowReciprocal(FMF.allowReciprocal());
+ Flags.setAllowContract(FMF.allowContract());
+ Flags.setNoInfs(FMF.noInfs());
+ Flags.setNoNaNs(FMF.noNaNs());
+ Flags.setNoSignedZeros(FMF.noSignedZeros());
+ Flags.setUnsafeAlgebra(FMF.unsafeAlgebra());
+
SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
Op1, Op2, &Flags);
setValue(&I, BinNodeValue);
@@ -2914,7 +2915,7 @@ void SelectionDAGBuilder::visitBitCast(const User &I) {
DestVT, N)); // convert types.
// Check if the original LLVM IR Operand was a ConstantInt, because getValue()
// might fold any kind of constant expression to an integer constant and that
- // is not what we are looking for. Only regcognize a bitcast of a genuine
+ // is not what we are looking for. Only recognize a bitcast of a genuine
// constant integer as an opaque constant.
else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
@@ -3067,14 +3068,10 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
if (SrcNumElts > MaskNumElts) {
// Analyze the access pattern of the vector to see if we can extract
- // two subvectors and do the shuffle. The analysis is done by calculating
- // the range of elements the mask access on both vectors.
- int MinRange[2] = { static_cast<int>(SrcNumElts),
- static_cast<int>(SrcNumElts)};
- int MaxRange[2] = {-1, -1};
-
- for (unsigned i = 0; i != MaskNumElts; ++i) {
- int Idx = Mask[i];
+ // two subvectors and do the shuffle.
+ int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
+ bool CanExtract = true;
+ for (int Idx : Mask) {
unsigned Input = 0;
if (Idx < 0)
continue;
@@ -3083,41 +3080,28 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
Input = 1;
Idx -= SrcNumElts;
}
- if (Idx > MaxRange[Input])
- MaxRange[Input] = Idx;
- if (Idx < MinRange[Input])
- MinRange[Input] = Idx;
- }
-
- // Check if the access is smaller than the vector size and can we find
- // a reasonable extract index.
- int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not
- // Extract.
- int StartIdx[2]; // StartIdx to extract from
- for (unsigned Input = 0; Input < 2; ++Input) {
- if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) {
- RangeUse[Input] = 0; // Unused
- StartIdx[Input] = 0;
- continue;
- }
- // Find a good start index that is a multiple of the mask length. Then
- // see if the rest of the elements are in range.
- StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
- if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
- StartIdx[Input] + MaskNumElts <= SrcNumElts)
- RangeUse[Input] = 1; // Extract from a multiple of the mask length.
+ // If all the indices come from the same MaskNumElts sized portion of
+ // the sources we can use extract. Also make sure the extract wouldn't
+ // extract past the end of the source.
+ int NewStartIdx = alignDown(Idx, MaskNumElts);
+ if (NewStartIdx + MaskNumElts > SrcNumElts ||
+ (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
+ CanExtract = false;
+ // Make sure we always update StartIdx as we use it to track if all
+ // elements are undef.
+ StartIdx[Input] = NewStartIdx;
}
- if (RangeUse[0] == 0 && RangeUse[1] == 0) {
+ if (StartIdx[0] < 0 && StartIdx[1] < 0) {
setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
return;
}
- if (RangeUse[0] >= 0 && RangeUse[1] >= 0) {
+ if (CanExtract) {
// Extract appropriate subvector and generate a vector shuffle
for (unsigned Input = 0; Input < 2; ++Input) {
SDValue &Src = Input == 0 ? Src1 : Src2;
- if (RangeUse[Input] == 0)
+ if (StartIdx[Input] < 0)
Src = DAG.getUNDEF(VT);
else {
Src = DAG.getNode(
@@ -3128,16 +3112,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
}
// Calculate new mask.
- SmallVector<int, 8> MappedOps;
- for (unsigned i = 0; i != MaskNumElts; ++i) {
- int Idx = Mask[i];
- if (Idx >= 0) {
- if (Idx < (int)SrcNumElts)
- Idx -= StartIdx[0];
- else
- Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
- }
- MappedOps.push_back(Idx);
+ SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
+ for (int &Idx : MappedOps) {
+ if (Idx >= (int)SrcNumElts)
+ Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
+ else if (Idx >= 0)
+ Idx -= StartIdx[0];
}
setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
@@ -3151,8 +3131,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
EVT EltVT = VT.getVectorElementType();
EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
SmallVector<SDValue,8> Ops;
- for (unsigned i = 0; i != MaskNumElts; ++i) {
- int Idx = Mask[i];
+ for (int Idx : Mask) {
SDValue Res;
if (Idx < 0) {
@@ -3281,7 +3260,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// N = N + Offset
uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
- // In an inbouds GEP with an offset that is nonnegative even when
+ // In an inbounds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
SDNodeFlags Flags;
if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
@@ -4752,7 +4731,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
else
FuncInfo.ArgDbgValues.push_back(
BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
- .addOperand(*Op)
+ .add(*Op)
.addImm(Offset)
.addMetadata(Variable)
.addMetadata(Expr));
@@ -4764,7 +4743,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
DILocalVariable *Variable,
DIExpression *Expr, int64_t Offset,
- DebugLoc dl,
+ const DebugLoc &dl,
unsigned DbgSDNodeOrder) {
SDDbgValue *SDV;
auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode());
@@ -4794,9 +4773,9 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
# define setjmp_undefined_for_msvc
#endif
-/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
-/// we want to emit this as a call to a named external function, return the name
-/// otherwise lower it and return null.
+/// Lower the call to the specified intrinsic function. If we want to emit this
+/// as a call to a named external function, return the name. Otherwise, lower it
+/// and return null.
const char *
SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -4929,14 +4908,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
report_fatal_error("Unsupported element size");
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(sdl)
- .setChain(getRoot())
- .setCallee(TLI.getLibcallCallingConv(LibraryCall),
- Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(
- TLI.getLibcallName(LibraryCall),
- TLI.getPointerTy(DAG.getDataLayout())),
- std::move(Args));
+ CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
+ TLI.getLibcallCallingConv(LibraryCall),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall),
+ TLI.getPointerTy(DAG.getDataLayout())),
+ std::move(Args));
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
DAG.setRoot(CallResult.second);
@@ -5301,6 +5278,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
return nullptr;
+ case Intrinsic::experimental_constrained_fadd:
+ case Intrinsic::experimental_constrained_fsub:
+ case Intrinsic::experimental_constrained_fmul:
+ case Intrinsic::experimental_constrained_fdiv:
+ case Intrinsic::experimental_constrained_frem:
+ visitConstrainedFPIntrinsic(I, Intrinsic);
+ return nullptr;
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
@@ -5537,7 +5521,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::trap: {
StringRef TrapFuncName =
I.getAttributes()
- .getAttribute(AttributeSet::FunctionIndex, "trap-func-name")
+ .getAttribute(AttributeList::FunctionIndex, "trap-func-name")
.getValueAsString();
if (TrapFuncName.empty()) {
ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
@@ -5548,7 +5532,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
TargetLowering::ArgListTy Args;
TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(sdl).setChain(getRoot()).setCallee(
+ CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
CallingConv::C, I.getType(),
DAG.getExternalSymbol(TrapFuncName.data(),
TLI.getPointerTy(DAG.getDataLayout())),
@@ -5749,6 +5733,46 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
}
+void SelectionDAGBuilder::visitConstrainedFPIntrinsic(const CallInst &I,
+ unsigned Intrinsic) {
+ SDLoc sdl = getCurSDLoc();
+ unsigned Opcode;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::experimental_constrained_fadd:
+ Opcode = ISD::STRICT_FADD;
+ break;
+ case Intrinsic::experimental_constrained_fsub:
+ Opcode = ISD::STRICT_FSUB;
+ break;
+ case Intrinsic::experimental_constrained_fmul:
+ Opcode = ISD::STRICT_FMUL;
+ break;
+ case Intrinsic::experimental_constrained_fdiv:
+ Opcode = ISD::STRICT_FDIV;
+ break;
+ case Intrinsic::experimental_constrained_frem:
+ Opcode = ISD::STRICT_FREM;
+ break;
+ }
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Chain = getRoot();
+ SDValue Ops[3] = { Chain, getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)) };
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
+ ValueVTs.push_back(MVT::Other); // Out chain
+
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+ SDValue Result = DAG.getNode(Opcode, sdl, VTs, Ops);
+
+ assert(Result.getNode()->getNumValues() == 2);
+ SDValue OutChain = Result.getValue(1);
+ DAG.setRoot(OutChain);
+ SDValue FPResult = Result.getValue(0);
+ setValue(&I, FPResult);
+}
+
std::pair<SDValue, SDValue>
SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
const BasicBlock *EHPadBB) {
@@ -5827,7 +5851,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
Type *RetTy = CS.getType();
TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
Args.reserve(CS.arg_size());
const Value *SwiftErrorVal = nullptr;
@@ -5843,6 +5866,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
+ TargetLowering::ArgListEntry Entry;
const Value *V = *i;
// Skip empty types
@@ -5852,11 +5876,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SDValue ArgNode = getValue(V);
Entry.Node = ArgNode; Entry.Ty = V->getType();
- // Skip the first return-type Attribute to get to params.
- Entry.setAttributes(&CS, i - CS.arg_begin() + 1);
+ Entry.setAttributes(&CS, i - CS.arg_begin());
// Use swifterror virtual register as input to the call.
- if (Entry.isSwiftError && TLI.supportSwiftError()) {
+ if (Entry.IsSwiftError && TLI.supportSwiftError()) {
SwiftErrorVal = V;
// We find the virtual register for the actual swifterror argument.
// Instead of using the Value, we use the virtual register instead.
@@ -5869,7 +5892,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// If we have an explicit sret argument that is an Instruction, (i.e., it
// might point to function-local memory), we can't meaningfully tail-call.
- if (Entry.isSRet && isa<Instruction>(V))
+ if (Entry.IsSRet && isa<Instruction>(V))
isTailCall = false;
}
@@ -5912,8 +5935,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
}
}
-/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
-/// value is equal or not-equal to zero.
+/// Return true if it only matters that the value is equal or not-equal to zero.
static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
for (const User *U : V->users()) {
if (const ICmpInst *IC = dyn_cast<ICmpInst>(U))
@@ -5928,13 +5950,17 @@ static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
}
static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
- Type *LoadTy,
SelectionDAGBuilder &Builder) {
// Check to see if this load can be trivially constant folded, e.g. if the
// input is from a string literal.
if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
// Cast pointer to the type we really want to load.
+ Type *LoadTy =
+ Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits());
+ if (LoadVT.isVector())
+ LoadTy = VectorType::get(LoadTy, LoadVT.getVectorNumElements());
+
LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
PointerType::getUnqual(LoadTy));
@@ -5967,8 +5993,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
return LoadVal;
}
-/// processIntegerCallValue - Record the value for an instruction that
-/// produces an integer result, converting the type where necessary.
+/// Record the value for an instruction that produces an integer result,
+/// converting the type where necessary.
void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
SDValue Value,
bool IsSigned) {
@@ -5981,20 +6007,13 @@ void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
setValue(&I, Value);
}
-/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
-/// If so, return true and lower it, otherwise return false and it will be
-/// lowered like a normal call.
+/// See if we can lower a memcmp call into an optimized form. If so, return
+/// true and lower it. Otherwise return false, and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
- // Verify that the prototype makes sense. int memcmp(void*,void*,size_t)
- if (I.getNumArgOperands() != 3)
- return false;
-
const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
- if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
- !I.getArgOperand(2)->getType()->isIntegerTy() ||
- !I.getType()->isIntegerTy())
- return false;
-
const Value *Size = I.getArgOperand(2);
const ConstantInt *CSize = dyn_cast<ConstantInt>(Size);
if (CSize && CSize->getZExtValue() == 0) {
@@ -6005,11 +6024,9 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
}
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
- std::pair<SDValue, SDValue> Res =
- TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(),
- getValue(LHS), getValue(RHS), getValue(Size),
- MachinePointerInfo(LHS),
- MachinePointerInfo(RHS));
+ std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp(
+ DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS),
+ getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS));
if (Res.first.getNode()) {
processIntegerCallValue(I, Res.first, true);
PendingLoads.push_back(Res.second);
@@ -6018,88 +6035,79 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
// memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
// memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
- if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) {
- bool ActuallyDoIt = true;
- MVT LoadVT;
- Type *LoadTy;
- switch (CSize->getZExtValue()) {
- default:
- LoadVT = MVT::Other;
- LoadTy = nullptr;
- ActuallyDoIt = false;
- break;
- case 2:
- LoadVT = MVT::i16;
- LoadTy = Type::getInt16Ty(CSize->getContext());
- break;
- case 4:
- LoadVT = MVT::i32;
- LoadTy = Type::getInt32Ty(CSize->getContext());
- break;
- case 8:
- LoadVT = MVT::i64;
- LoadTy = Type::getInt64Ty(CSize->getContext());
- break;
- /*
- case 16:
- LoadVT = MVT::v4i32;
- LoadTy = Type::getInt32Ty(CSize->getContext());
- LoadTy = VectorType::get(LoadTy, 4);
- break;
- */
- }
-
- // This turns into unaligned loads. We only do this if the target natively
- // supports the MVT we'll be loading or if it is small enough (<= 4) that
- // we'll only produce a small number of byte loads.
+ if (!CSize || !IsOnlyUsedInZeroEqualityComparison(&I))
+ return false;
- // Require that we can find a legal MVT, and only do this if the target
- // supports unaligned loads of that type. Expanding into byte loads would
- // bloat the code.
+ // If the target has a fast compare for the given size, it will return a
+ // preferred load type for that size. Require that the load VT is legal and
+ // that the target supports unaligned loads of that type. Otherwise, return
+ // INVALID.
+ auto hasFastLoadsAndCompare = [&](unsigned NumBits) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (ActuallyDoIt && CSize->getZExtValue() > 4) {
- unsigned DstAS = LHS->getType()->getPointerAddressSpace();
- unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
+ MVT LVT = TLI.hasFastEqualityCompare(NumBits);
+ if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) {
// TODO: Handle 5 byte compare as 4-byte + 1 byte.
// TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
// TODO: Check alignment of src and dest ptrs.
- if (!TLI.isTypeLegal(LoadVT) ||
- !TLI.allowsMisalignedMemoryAccesses(LoadVT, SrcAS) ||
- !TLI.allowsMisalignedMemoryAccesses(LoadVT, DstAS))
- ActuallyDoIt = false;
+ unsigned DstAS = LHS->getType()->getPointerAddressSpace();
+ unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
+ if (!TLI.isTypeLegal(LVT) ||
+ !TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) ||
+ !TLI.allowsMisalignedMemoryAccesses(LVT, DstAS))
+ LVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
}
- if (ActuallyDoIt) {
- SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
- SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
+ return LVT;
+ };
- SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal,
- ISD::SETNE);
- processIntegerCallValue(I, Res, false);
- return true;
- }
+ // This turns into unaligned loads. We only do this if the target natively
+ // supports the MVT we'll be loading or if it is small enough (<= 4) that
+ // we'll only produce a small number of byte loads.
+ MVT LoadVT;
+ unsigned NumBitsToCompare = CSize->getZExtValue() * 8;
+ switch (NumBitsToCompare) {
+ default:
+ return false;
+ case 16:
+ LoadVT = MVT::i16;
+ break;
+ case 32:
+ LoadVT = MVT::i32;
+ break;
+ case 64:
+ case 128:
+ case 256:
+ LoadVT = hasFastLoadsAndCompare(NumBitsToCompare);
+ break;
}
+ if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE)
+ return false;
- return false;
+ SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this);
+ SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this);
+
+ // Bitcast to a wide integer type if the loads are vectors.
+ if (LoadVT.isVector()) {
+ EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits());
+ LoadL = DAG.getBitcast(CmpVT, LoadL);
+ LoadR = DAG.getBitcast(CmpVT, LoadR);
+ }
+
+ SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE);
+ processIntegerCallValue(I, Cmp, false);
+ return true;
}
-/// visitMemChrCall -- See if we can lower a memchr call into an optimized
-/// form. If so, return true and lower it, otherwise return false and it
-/// will be lowered like a normal call.
+/// See if we can lower a memchr call into an optimized form. If so, return
+/// true and lower it. Otherwise return false, and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
- // Verify that the prototype makes sense. void *memchr(void *, int, size_t)
- if (I.getNumArgOperands() != 3)
- return false;
-
const Value *Src = I.getArgOperand(0);
const Value *Char = I.getArgOperand(1);
const Value *Length = I.getArgOperand(2);
- if (!Src->getType()->isPointerTy() ||
- !Char->getType()->isIntegerTy() ||
- !Length->getType()->isIntegerTy() ||
- !I.getType()->isPointerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6115,15 +6123,12 @@ bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
return false;
}
-///
-/// visitMemPCpyCall -- lower a mempcpy call as a memcpy followed by code to
-/// to adjust the dst pointer by the size of the copied memory.
+/// See if we can lower a mempcpy call into an optimized form. If so, return
+/// true and lower it. Otherwise return false, and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
-
- // Verify argument count: void *mempcpy(void *, const void *, size_t)
- if (I.getNumArgOperands() != 3)
- return false;
-
SDValue Dst = getValue(I.getArgOperand(0));
SDValue Src = getValue(I.getArgOperand(1));
SDValue Size = getValue(I.getArgOperand(2));
@@ -6158,19 +6163,13 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
return true;
}
-/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an
-/// optimized form. If so, return true and lower it, otherwise return false
-/// and it will be lowered like a normal call.
+/// See if we can lower a strcpy call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
- // Verify that the prototype makes sense. char *strcpy(char *, char *)
- if (I.getNumArgOperands() != 2)
- return false;
-
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
- if (!Arg0->getType()->isPointerTy() ||
- !Arg1->getType()->isPointerTy() ||
- !I.getType()->isPointerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6187,19 +6186,13 @@ bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
return false;
}
-/// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form.
-/// If so, return true and lower it, otherwise return false and it will be
-/// lowered like a normal call.
+/// See if we can lower a strcmp call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
- // Verify that the prototype makes sense. int strcmp(void*,void*)
- if (I.getNumArgOperands() != 2)
- return false;
-
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
- if (!Arg0->getType()->isPointerTy() ||
- !Arg1->getType()->isPointerTy() ||
- !I.getType()->isIntegerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6216,17 +6209,13 @@ bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
return false;
}
-/// visitStrLenCall -- See if we can lower a strlen call into an optimized
-/// form. If so, return true and lower it, otherwise return false and it
-/// will be lowered like a normal call.
+/// See if we can lower a strlen call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
- // Verify that the prototype makes sense. size_t strlen(char *)
- if (I.getNumArgOperands() != 1)
- return false;
-
const Value *Arg0 = I.getArgOperand(0);
- if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6241,19 +6230,13 @@ bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
return false;
}
-/// visitStrNLenCall -- See if we can lower a strnlen call into an optimized
-/// form. If so, return true and lower it, otherwise return false and it
-/// will be lowered like a normal call.
+/// See if we can lower a strnlen call into an optimized form. If so, return
+/// true and lower it, otherwise return false and it will be lowered like a
+/// normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
- // Verify that the prototype makes sense. size_t strnlen(char *, size_t)
- if (I.getNumArgOperands() != 2)
- return false;
-
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
- if (!Arg0->getType()->isPointerTy() ||
- !Arg1->getType()->isIntegerTy() ||
- !I.getType()->isIntegerTy())
- return false;
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
@@ -6269,16 +6252,15 @@ bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
return false;
}
-/// visitUnaryFloatCall - If a call instruction is a unary floating-point
-/// operation (as expected), translate it to an SDNode with the specified opcode
-/// and return true.
+/// See if we can lower a unary floating-point operation into an SDNode with
+/// the specified Opcode. If so, return true and lower it, otherwise return
+/// false and it will be lowered like a normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
unsigned Opcode) {
- // Sanity check that it really is a unary floating-point call.
- if (I.getNumArgOperands() != 1 ||
- !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
- I.getType() != I.getArgOperand(0)->getType() ||
- !I.onlyReadsMemory())
+ // We already checked this call's prototype; verify it doesn't modify errno.
+ if (!I.onlyReadsMemory())
return false;
SDValue Tmp = getValue(I.getArgOperand(0));
@@ -6286,17 +6268,15 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
return true;
}
-/// visitBinaryFloatCall - If a call instruction is a binary floating-point
-/// operation (as expected), translate it to an SDNode with the specified opcode
-/// and return true.
+/// See if we can lower a binary floating-point operation into an SDNode with
+/// the specified Opcode. If so, return true and lower it. Otherwise return
+/// false, and it will be lowered like a normal call.
+/// The caller already checked that \p I calls the appropriate LibFunc with a
+/// correct prototype.
bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
unsigned Opcode) {
- // Sanity check that it really is a binary floating-point call.
- if (I.getNumArgOperands() != 2 ||
- !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
- I.getType() != I.getArgOperand(0)->getType() ||
- I.getType() != I.getArgOperand(1)->getType() ||
- !I.onlyReadsMemory())
+ // We already checked this call's prototype; verify it doesn't modify errno.
+ if (!I.onlyReadsMemory())
return false;
SDValue Tmp0 = getValue(I.getArgOperand(0));
@@ -6336,20 +6316,18 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Check for well-known libc/libm calls. If the function is internal, it
// can't be a library call. Don't do the check if marked as nobuiltin for
// some reason.
- LibFunc::Func Func;
+ LibFunc Func;
if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() &&
- LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->getLibFunc(*F, Func) &&
LibInfo->hasOptimizedCodeGen(Func)) {
switch (Func) {
default: break;
- case LibFunc::copysign:
- case LibFunc::copysignf:
- case LibFunc::copysignl:
- if (I.getNumArgOperands() == 2 && // Basic sanity checks.
- I.getArgOperand(0)->getType()->isFloatingPointTy() &&
- I.getType() == I.getArgOperand(0)->getType() &&
- I.getType() == I.getArgOperand(1)->getType() &&
- I.onlyReadsMemory()) {
+ case LibFunc_copysign:
+ case LibFunc_copysignf:
+ case LibFunc_copysignl:
+ // We already checked this call's prototype; verify it doesn't modify
+ // errno.
+ if (I.onlyReadsMemory()) {
SDValue LHS = getValue(I.getArgOperand(0));
SDValue RHS = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(),
@@ -6357,122 +6335,122 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
return;
}
break;
- case LibFunc::fabs:
- case LibFunc::fabsf:
- case LibFunc::fabsl:
+ case LibFunc_fabs:
+ case LibFunc_fabsf:
+ case LibFunc_fabsl:
if (visitUnaryFloatCall(I, ISD::FABS))
return;
break;
- case LibFunc::fmin:
- case LibFunc::fminf:
- case LibFunc::fminl:
+ case LibFunc_fmin:
+ case LibFunc_fminf:
+ case LibFunc_fminl:
if (visitBinaryFloatCall(I, ISD::FMINNUM))
return;
break;
- case LibFunc::fmax:
- case LibFunc::fmaxf:
- case LibFunc::fmaxl:
+ case LibFunc_fmax:
+ case LibFunc_fmaxf:
+ case LibFunc_fmaxl:
if (visitBinaryFloatCall(I, ISD::FMAXNUM))
return;
break;
- case LibFunc::sin:
- case LibFunc::sinf:
- case LibFunc::sinl:
+ case LibFunc_sin:
+ case LibFunc_sinf:
+ case LibFunc_sinl:
if (visitUnaryFloatCall(I, ISD::FSIN))
return;
break;
- case LibFunc::cos:
- case LibFunc::cosf:
- case LibFunc::cosl:
+ case LibFunc_cos:
+ case LibFunc_cosf:
+ case LibFunc_cosl:
if (visitUnaryFloatCall(I, ISD::FCOS))
return;
break;
- case LibFunc::sqrt:
- case LibFunc::sqrtf:
- case LibFunc::sqrtl:
- case LibFunc::sqrt_finite:
- case LibFunc::sqrtf_finite:
- case LibFunc::sqrtl_finite:
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
+ case LibFunc_sqrtl:
+ case LibFunc_sqrt_finite:
+ case LibFunc_sqrtf_finite:
+ case LibFunc_sqrtl_finite:
if (visitUnaryFloatCall(I, ISD::FSQRT))
return;
break;
- case LibFunc::floor:
- case LibFunc::floorf:
- case LibFunc::floorl:
+ case LibFunc_floor:
+ case LibFunc_floorf:
+ case LibFunc_floorl:
if (visitUnaryFloatCall(I, ISD::FFLOOR))
return;
break;
- case LibFunc::nearbyint:
- case LibFunc::nearbyintf:
- case LibFunc::nearbyintl:
+ case LibFunc_nearbyint:
+ case LibFunc_nearbyintf:
+ case LibFunc_nearbyintl:
if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
return;
break;
- case LibFunc::ceil:
- case LibFunc::ceilf:
- case LibFunc::ceill:
+ case LibFunc_ceil:
+ case LibFunc_ceilf:
+ case LibFunc_ceill:
if (visitUnaryFloatCall(I, ISD::FCEIL))
return;
break;
- case LibFunc::rint:
- case LibFunc::rintf:
- case LibFunc::rintl:
+ case LibFunc_rint:
+ case LibFunc_rintf:
+ case LibFunc_rintl:
if (visitUnaryFloatCall(I, ISD::FRINT))
return;
break;
- case LibFunc::round:
- case LibFunc::roundf:
- case LibFunc::roundl:
+ case LibFunc_round:
+ case LibFunc_roundf:
+ case LibFunc_roundl:
if (visitUnaryFloatCall(I, ISD::FROUND))
return;
break;
- case LibFunc::trunc:
- case LibFunc::truncf:
- case LibFunc::truncl:
+ case LibFunc_trunc:
+ case LibFunc_truncf:
+ case LibFunc_truncl:
if (visitUnaryFloatCall(I, ISD::FTRUNC))
return;
break;
- case LibFunc::log2:
- case LibFunc::log2f:
- case LibFunc::log2l:
+ case LibFunc_log2:
+ case LibFunc_log2f:
+ case LibFunc_log2l:
if (visitUnaryFloatCall(I, ISD::FLOG2))
return;
break;
- case LibFunc::exp2:
- case LibFunc::exp2f:
- case LibFunc::exp2l:
+ case LibFunc_exp2:
+ case LibFunc_exp2f:
+ case LibFunc_exp2l:
if (visitUnaryFloatCall(I, ISD::FEXP2))
return;
break;
- case LibFunc::memcmp:
+ case LibFunc_memcmp:
if (visitMemCmpCall(I))
return;
break;
- case LibFunc::mempcpy:
+ case LibFunc_mempcpy:
if (visitMemPCpyCall(I))
return;
break;
- case LibFunc::memchr:
+ case LibFunc_memchr:
if (visitMemChrCall(I))
return;
break;
- case LibFunc::strcpy:
+ case LibFunc_strcpy:
if (visitStrCpyCall(I, false))
return;
break;
- case LibFunc::stpcpy:
+ case LibFunc_stpcpy:
if (visitStrCpyCall(I, true))
return;
break;
- case LibFunc::strcmp:
+ case LibFunc_strcmp:
if (visitStrCmpCall(I))
return;
break;
- case LibFunc::strlen:
+ case LibFunc_strlen:
if (visitStrLenCall(I))
return;
break;
- case LibFunc::strnlen:
+ case LibFunc_strnlen:
if (visitStrNLenCall(I))
return;
break;
@@ -7361,7 +7339,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
// Populate the argument list.
// Attributes for args start at offset 1, after the return attribute.
- for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1;
+ for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs;
ArgI != ArgE; ++ArgI) {
const Value *V = CS->getOperand(ArgI);
@@ -7370,7 +7348,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
TargetLowering::ArgListEntry Entry;
Entry.Node = getValue(V);
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, AttrI);
+ Entry.setAttributes(&CS, ArgIdx);
Args.push_back(Entry);
}
@@ -7631,9 +7609,9 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
FuncInfo.MF->getFrameInfo().setHasPatchPoint();
}
-/// Returns an AttributeSet representing the attributes applied to the return
+/// Returns an AttributeList representing the attributes applied to the return
/// value of the given call.
-static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
+static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
SmallVector<Attribute::AttrKind, 2> Attrs;
if (CLI.RetSExt)
Attrs.push_back(Attribute::SExt);
@@ -7642,8 +7620,8 @@ static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
if (CLI.IsInReg)
Attrs.push_back(Attribute::InReg);
- return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex,
- Attrs);
+ return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
+ Attrs);
}
/// TargetLowering::LowerCallTo - This is the default LowerCallTo
@@ -7683,15 +7661,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
ArgListEntry Entry;
Entry.Node = DemoteStackSlot;
Entry.Ty = StackSlotPtrType;
- Entry.isSExt = false;
- Entry.isZExt = false;
- Entry.isInReg = false;
- Entry.isSRet = true;
- Entry.isNest = false;
- Entry.isByVal = false;
- Entry.isReturned = false;
- Entry.isSwiftSelf = false;
- Entry.isSwiftError = false;
+ Entry.IsSExt = false;
+ Entry.IsZExt = false;
+ Entry.IsInReg = false;
+ Entry.IsSRet = true;
+ Entry.IsNest = false;
+ Entry.IsByVal = false;
+ Entry.IsReturned = false;
+ Entry.IsSwiftSelf = false;
+ Entry.IsSwiftError = false;
Entry.Alignment = Align;
CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
@@ -7724,7 +7702,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
ArgListTy &Args = CLI.getArgs();
if (supportSwiftError()) {
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
- if (Args[i].isSwiftError) {
+ if (Args[i].IsSwiftError) {
ISD::InputArg MyFlags;
MyFlags.VT = getPointerTy(DL);
MyFlags.ArgVT = EVT(getPointerTy(DL));
@@ -7741,7 +7719,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
Type *FinalType = Args[i].Ty;
- if (Args[i].isByVal)
+ if (Args[i].IsByVal)
FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
FinalType, CLI.CallConv, CLI.IsVarArg);
@@ -7754,11 +7732,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
ISD::ArgFlagsTy Flags;
unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
- if (Args[i].isZExt)
+ if (Args[i].IsZExt)
Flags.setZExt();
- if (Args[i].isSExt)
+ if (Args[i].IsSExt)
Flags.setSExt();
- if (Args[i].isInReg) {
+ if (Args[i].IsInReg) {
// If we are using vectorcall calling convention, a structure that is
// passed InReg - is surely an HVA
if (CLI.CallConv == CallingConv::X86_VectorCall &&
@@ -7771,15 +7749,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// Set InReg Flag
Flags.setInReg();
}
- if (Args[i].isSRet)
+ if (Args[i].IsSRet)
Flags.setSRet();
- if (Args[i].isSwiftSelf)
+ if (Args[i].IsSwiftSelf)
Flags.setSwiftSelf();
- if (Args[i].isSwiftError)
+ if (Args[i].IsSwiftError)
Flags.setSwiftError();
- if (Args[i].isByVal)
+ if (Args[i].IsByVal)
Flags.setByVal();
- if (Args[i].isInAlloca) {
+ if (Args[i].IsInAlloca) {
Flags.setInAlloca();
// Set the byval flag for CCAssignFn callbacks that don't know about
// inalloca. This way we can know how many bytes we should've allocated
@@ -7788,7 +7766,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// in the various CC lowering callbacks.
Flags.setByVal();
}
- if (Args[i].isByVal || Args[i].isInAlloca) {
+ if (Args[i].IsByVal || Args[i].IsInAlloca) {
PointerType *Ty = cast<PointerType>(Args[i].Ty);
Type *ElementTy = Ty->getElementType();
Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
@@ -7801,7 +7779,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
FrameAlign = getByValTypeAlignment(ElementTy, DL);
Flags.setByValAlign(FrameAlign);
}
- if (Args[i].isNest)
+ if (Args[i].IsNest)
Flags.setNest();
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
@@ -7812,13 +7790,13 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
SmallVector<SDValue, 4> Parts(NumParts);
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
- if (Args[i].isSExt)
+ if (Args[i].IsSExt)
ExtendKind = ISD::SIGN_EXTEND;
- else if (Args[i].isZExt)
+ else if (Args[i].IsZExt)
ExtendKind = ISD::ZERO_EXTEND;
// Conservatively only handle 'returned' on non-vectors for now
- if (Args[i].isReturned && !Op.getValueType().isVector()) {
+ if (Args[i].IsReturned && !Op.getValueType().isVector()) {
assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues &&
"unexpected use of 'returned'");
// Before passing 'returned' to the target lowering code, ensure that
@@ -7832,9 +7810,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// parameter extension method is not compatible with the return
// extension method
if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) ||
- (ExtendKind != ISD::ANY_EXTEND &&
- CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt))
- Flags.setReturned();
+ (ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt &&
+ CLI.RetZExt == Args[i].IsZExt))
+ Flags.setReturned();
}
getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
@@ -8010,6 +7988,173 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
return true;
}
+typedef DenseMap<const Argument *,
+ std::pair<const AllocaInst *, const StoreInst *>>
+ ArgCopyElisionMapTy;
+
+/// Scan the entry block of the function in FuncInfo for arguments that look
+/// like copies into a local alloca. Record any copied arguments in
+/// ArgCopyElisionCandidates.
+static void
+findArgumentCopyElisionCandidates(const DataLayout &DL,
+ FunctionLoweringInfo *FuncInfo,
+ ArgCopyElisionMapTy &ArgCopyElisionCandidates) {
+ // Record the state of every static alloca used in the entry block. Argument
+ // allocas are all used in the entry block, so we need approximately as many
+ // entries as we have arguments.
+ enum StaticAllocaInfo { Unknown, Clobbered, Elidable };
+ SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas;
+ unsigned NumArgs = FuncInfo->Fn->arg_size();
+ StaticAllocas.reserve(NumArgs * 2);
+
+ auto GetInfoIfStaticAlloca = [&](const Value *V) -> StaticAllocaInfo * {
+ if (!V)
+ return nullptr;
+ V = V->stripPointerCasts();
+ const auto *AI = dyn_cast<AllocaInst>(V);
+ if (!AI || !AI->isStaticAlloca() || !FuncInfo->StaticAllocaMap.count(AI))
+ return nullptr;
+ auto Iter = StaticAllocas.insert({AI, Unknown});
+ return &Iter.first->second;
+ };
+
+ // Look for stores of arguments to static allocas. Look through bitcasts and
+ // GEPs to handle type coercions, as long as the alloca is fully initialized
+ // by the store. Any non-store use of an alloca escapes it and any subsequent
+ // unanalyzed store might write it.
+ // FIXME: Handle structs initialized with multiple stores.
+ for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) {
+ // Look for stores, and handle non-store uses conservatively.
+ const auto *SI = dyn_cast<StoreInst>(&I);
+ if (!SI) {
+ // We will look through cast uses, so ignore them completely.
+ if (I.isCast())
+ continue;
+ // Ignore debug info intrinsics, they don't escape or store to allocas.
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+ // This is an unknown instruction. Assume it escapes or writes to all
+ // static alloca operands.
+ for (const Use &U : I.operands()) {
+ if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U))
+ *Info = StaticAllocaInfo::Clobbered;
+ }
+ continue;
+ }
+
+ // If the stored value is a static alloca, mark it as escaped.
+ if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand()))
+ *Info = StaticAllocaInfo::Clobbered;
+
+ // Check if the destination is a static alloca.
+ const Value *Dst = SI->getPointerOperand()->stripPointerCasts();
+ StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst);
+ if (!Info)
+ continue;
+ const AllocaInst *AI = cast<AllocaInst>(Dst);
+
+ // Skip allocas that have been initialized or clobbered.
+ if (*Info != StaticAllocaInfo::Unknown)
+ continue;
+
+ // Check if the stored value is an argument, and that this store fully
+ // initializes the alloca. Don't elide copies from the same argument twice.
+ const Value *Val = SI->getValueOperand()->stripPointerCasts();
+ const auto *Arg = dyn_cast<Argument>(Val);
+ if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() ||
+ Arg->getType()->isEmptyTy() ||
+ DL.getTypeStoreSize(Arg->getType()) !=
+ DL.getTypeAllocSize(AI->getAllocatedType()) ||
+ ArgCopyElisionCandidates.count(Arg)) {
+ *Info = StaticAllocaInfo::Clobbered;
+ continue;
+ }
+
+ DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI << '\n');
+
+ // Mark this alloca and store for argument copy elision.
+ *Info = StaticAllocaInfo::Elidable;
+ ArgCopyElisionCandidates.insert({Arg, {AI, SI}});
+
+ // Stop scanning if we've seen all arguments. This will happen early in -O0
+ // builds, which is useful, because -O0 builds have large entry blocks and
+ // many allocas.
+ if (ArgCopyElisionCandidates.size() == NumArgs)
+ break;
+ }
+}
+
+/// Try to elide argument copies from memory into a local alloca. Succeeds if
+/// ArgVal is a load from a suitable fixed stack object.
+static void tryToElideArgumentCopy(
+ FunctionLoweringInfo *FuncInfo, SmallVectorImpl<SDValue> &Chains,
+ DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
+ SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
+ ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
+ SDValue ArgVal, bool &ArgHasUses) {
+ // Check if this is a load from a fixed stack object.
+ auto *LNode = dyn_cast<LoadSDNode>(ArgVal);
+ if (!LNode)
+ return;
+ auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode());
+ if (!FINode)
+ return;
+
+ // Check that the fixed stack object is the right size and alignment.
+ // Look at the alignment that the user wrote on the alloca instead of looking
+ // at the stack object.
+ auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg);
+ assert(ArgCopyIter != ArgCopyElisionCandidates.end());
+ const AllocaInst *AI = ArgCopyIter->second.first;
+ int FixedIndex = FINode->getIndex();
+ int &AllocaIndex = FuncInfo->StaticAllocaMap[AI];
+ int OldIndex = AllocaIndex;
+ MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo();
+ if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
+ DEBUG(dbgs() << " argument copy elision failed due to bad fixed stack "
+ "object size\n");
+ return;
+ }
+ unsigned RequiredAlignment = AI->getAlignment();
+ if (!RequiredAlignment) {
+ RequiredAlignment = FuncInfo->MF->getDataLayout().getABITypeAlignment(
+ AI->getAllocatedType());
+ }
+ if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) {
+ DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
+ "greater than stack argument alignment ("
+ << RequiredAlignment << " vs "
+ << MFI.getObjectAlignment(FixedIndex) << ")\n");
+ return;
+ }
+
+ // Perform the elision. Delete the old stack object and replace its only use
+ // in the variable info map. Mark the stack object as mutable.
+ DEBUG({
+ dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
+ << " Replacing frame index " << OldIndex << " with " << FixedIndex
+ << '\n';
+ });
+ MFI.RemoveStackObject(OldIndex);
+ MFI.setIsImmutableObjectIndex(FixedIndex, false);
+ AllocaIndex = FixedIndex;
+ ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex});
+ Chains.push_back(ArgVal.getValue(1));
+
+ // Avoid emitting code for the store implementing the copy.
+ const StoreInst *SI = ArgCopyIter->second.second;
+ ElidedArgCopyInstrs.insert(SI);
+
+ // Check for uses of the argument again so that we can avoid exporting ArgVal
+ // if it is't used by anything other than the store.
+ for (const Value *U : Arg.users()) {
+ if (U != SI) {
+ ArgHasUses = true;
+ break;
+ }
+ }
+}
+
void SelectionDAGISel::LowerArguments(const Function &F) {
SelectionDAG &DAG = SDB->DAG;
SDLoc dl = SDB->getCurSDLoc();
@@ -8032,15 +8177,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Ins.push_back(RetArg);
}
+ // Look for stores of arguments to static allocas. Mark such arguments with a
+ // flag to ask the target to give us the memory location of that argument if
+ // available.
+ ArgCopyElisionMapTy ArgCopyElisionCandidates;
+ findArgumentCopyElisionCandidates(DL, FuncInfo, ArgCopyElisionCandidates);
+
// Set up the incoming argument description vector.
- unsigned Idx = 1;
- for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
- I != E; ++I, ++Idx) {
+ unsigned Idx = 0;
+ for (const Argument &Arg : F.args()) {
+ ++Idx;
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs);
- bool isArgValueUsed = !I->use_empty();
+ ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
+ bool isArgValueUsed = !Arg.use_empty();
unsigned PartBase = 0;
- Type *FinalType = I->getType();
+ Type *FinalType = Arg.getType();
if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
FinalType = cast<PointerType>(FinalType)->getElementType();
bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
@@ -8060,7 +8211,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// If we are using vectorcall calling convention, a structure that is
// passed InReg - is surely an HVA
if (F.getCallingConv() == CallingConv::X86_VectorCall &&
- isa<StructType>(I->getType())) {
+ isa<StructType>(Arg.getType())) {
// The first value of a structure is marked
if (0 == Value)
Flags.setHvaStart();
@@ -8092,7 +8243,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Flags.setByVal();
}
if (Flags.isByVal() || Flags.isInAlloca()) {
- PointerType *Ty = cast<PointerType>(I->getType());
+ PointerType *Ty = cast<PointerType>(Arg.getType());
Type *ElementTy = Ty->getElementType();
Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
// For ByVal, alignment should be passed from FE. BE will guess if
@@ -8109,6 +8260,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
Flags.setOrigAlign(OriginalAlignment);
+ if (ArgCopyElisionCandidates.count(&Arg))
+ Flags.setCopyElisionCandidate();
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT);
@@ -8155,7 +8308,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Set up the argument values.
unsigned i = 0;
- Idx = 1;
+ Idx = 0;
if (!FuncInfo->CanLowerReturn) {
// Create a virtual register for the sret pointer, and put in a copy
// from the sret argument into it.
@@ -8181,25 +8334,39 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
++i;
}
- for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
- ++I, ++Idx) {
+ SmallVector<SDValue, 4> Chains;
+ DenseMap<int, int> ArgCopyElisionFrameIndexMap;
+ for (const Argument &Arg : F.args()) {
+ ++Idx;
SmallVector<SDValue, 4> ArgValues;
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs);
+ ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0)
+ continue;
+
+ bool ArgHasUses = !Arg.use_empty();
+
+ // Elide the copying store if the target loaded this argument from a
+ // suitable fixed stack object.
+ if (Ins[i].Flags.isCopyElisionCandidate()) {
+ tryToElideArgumentCopy(FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
+ ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
+ InVals[i], ArgHasUses);
+ }
// If this argument is unused then remember its value. It is used to generate
// debugging information.
bool isSwiftErrorArg =
TLI->supportSwiftError() &&
F.getAttributes().hasAttribute(Idx, Attribute::SwiftError);
- if (I->use_empty() && NumValues && !isSwiftErrorArg) {
- SDB->setUnusedArgValue(&*I, InVals[i]);
+ if (!ArgHasUses && !isSwiftErrorArg) {
+ SDB->setUnusedArgValue(&Arg, InVals[i]);
// Also remember any frame index for use in FastISel.
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(InVals[i].getNode()))
- FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
}
for (unsigned Val = 0; Val != NumValues; ++Val) {
@@ -8210,16 +8377,15 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Even an apparant 'unused' swifterror argument needs to be returned. So
// we do generate a copy for it that can be used on return from the
// function.
- if (!I->use_empty() || isSwiftErrorArg) {
+ if (ArgHasUses || isSwiftErrorArg) {
Optional<ISD::NodeType> AssertOp;
if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
AssertOp = ISD::AssertSext;
else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
AssertOp = ISD::AssertZext;
- ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
- NumParts, PartVT, VT,
- nullptr, AssertOp));
+ ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
+ PartVT, VT, nullptr, AssertOp));
}
i += NumParts;
@@ -8232,18 +8398,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Note down frame index.
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
- FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
SDB->getCurSDLoc());
- SDB->setValue(&*I, Res);
+ SDB->setValue(&Arg, Res);
if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
if (LoadSDNode *LNode =
dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
- FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
}
// Update the SwiftErrorVRegDefMap.
@@ -8263,18 +8429,36 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// uses with vregs.
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- FuncInfo->ValueMap[&*I] = Reg;
+ FuncInfo->ValueMap[&Arg] = Reg;
continue;
}
}
- if (!isOnlyUsedInEntryBlock(&*I, TM.Options.EnableFastISel)) {
- FuncInfo->InitializeRegForValue(&*I);
- SDB->CopyToExportRegsIfNeeded(&*I);
+ if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) {
+ FuncInfo->InitializeRegForValue(&Arg);
+ SDB->CopyToExportRegsIfNeeded(&Arg);
}
}
+ if (!Chains.empty()) {
+ Chains.push_back(NewRoot);
+ NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ }
+
+ DAG.setRoot(NewRoot);
+
assert(i == InVals.size() && "Argument register count mismatch!");
+ // If any argument copy elisions occurred and we have debug info, update the
+ // stale frame indices used in the dbg.declare variable info table.
+ MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo();
+ if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) {
+ for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) {
+ auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot);
+ if (I != ArgCopyElisionFrameIndexMap.end())
+ VI.Slot = I->second;
+ }
+ }
+
// Finally, if the target has anything special to do, allow it to do so.
EmitFunctionEntryCode();
}