diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:10:56 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:10:56 +0000 |
commit | 044eb2f6afba375a914ac9d8024f8f5142bb912e (patch) | |
tree | 1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | |
parent | eb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff) |
Notes
Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp')
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 664 |
1 files changed, 428 insertions, 236 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 127312076207..71cb8cb78f6d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1,4 +1,4 @@ -//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===// +//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===// // // The LLVM Compiler Infrastructure // @@ -12,63 +12,113 @@ //===----------------------------------------------------------------------===// #include "SelectionDAGBuilder.h" -#include "SDNodeDbgValue.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" -#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/Statepoint.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" -#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <limits> +#include <numeric> +#include <tuple> #include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "isel" @@ -78,11 +128,18 @@ using namespace llvm; static unsigned LimitFloatPrecision; static cl::opt<unsigned, true> -LimitFPPrecision("limit-float-precision", - cl::desc("Generate low-precision inline sequences " - "for some float libcalls"), - cl::location(LimitFloatPrecision), - cl::init(0)); + LimitFPPrecision("limit-float-precision", + cl::desc("Generate low-precision inline sequences " + "for some float libcalls"), + cl::location(LimitFloatPrecision), cl::Hidden, + cl::init(0)); + +static cl::opt<unsigned> SwitchPeelThreshold( + "switch-peel-threshold", cl::Hidden, cl::init(66), + cl::desc("Set the case probability threshold for peeling the case from a " + "switch statement. A value greater than 100 will void this " + "optimization")); + // Limit the width of DAG chains. This is important in general to prevent // DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to @@ -101,7 +158,7 @@ static const unsigned MaxParallelChains = 64; // True if the Value passed requires ABI mangling as it is a parameter to a // function or a return value from a function which is not an intrinsic. -static bool isABIRegCopy(const Value * V) { +static bool isABIRegCopy(const Value *V) { const bool IsRetInst = V && isa<ReturnInst>(V); const bool IsCallInst = V && isa<CallInst>(V); const bool IsInLineAsm = @@ -554,7 +611,6 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, bool IsABIRegCopy) { - EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -600,7 +656,6 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, Val = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); - } else { assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() && "lossy conversion of vector to scalar type"); @@ -677,8 +732,6 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, } } -RegsForValue::RegsForValue() { IsABIMangled = false; } - RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, EVT valuevt, bool IsABIMangledValue) : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), @@ -888,7 +941,24 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32); Ops.push_back(Res); - unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); + if (Code == InlineAsm::Kind_Clobber) { + // Clobbers should always have a 1:1 mapping with registers, and may + // reference registers that have illegal (e.g. vector) types. Hence, we + // shouldn't try to apply any sort of splitting logic to them. + assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() && + "No 1:1 mapping from clobbers to regs?"); + unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); + (void)SP; + for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) { + Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I])); + assert( + (Regs[I] != SP || + DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) && + "If we clobbered the stack pointer, MFI should know about it."); + } + return; + } + for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); MVT RegisterVT = RegVTs[Value]; @@ -896,11 +966,6 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, assert(Reg < Regs.size() && "Mismatch in # registers expected"); unsigned TheReg = Regs[Reg++]; Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); - - if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { - // If we clobbered the stack pointer, MFI should know about it. - assert(DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()); - } } } } @@ -1025,12 +1090,10 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, DIExpression *Expr = DI->getExpression(); assert(Variable->isValidLocationForIntrinsic(dl) && "Expected inlined-at fields to agree"); - uint64_t Offset = DI->getOffset(); SDDbgValue *SDV; if (Val.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false, - Val)) { - SDV = getDbgValue(Val, Variable, Expr, Offset, dl, DbgSDNodeOrder); + if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) { + SDV = getDbgValue(Val, Variable, Expr, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } } else @@ -1409,7 +1472,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // Leave Outs empty so that LowerReturn won't try to load return // registers the usual way. SmallVector<EVT, 1> PtrValueVTs; - ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()), + ComputeValueVTs(TLI, DL, + F->getReturnType()->getPointerTo( + DAG.getDataLayout().getAllocaAddrSpace()), PtrValueVTs); SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), @@ -1421,22 +1486,15 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); - // An aggregate return value cannot wrap around the address space, so - // offsets to its parts don't wrap either. - SDNodeFlags Flags; - Flags.setNoUnsignedWrap(true); - SmallVector<SDValue, 4> Chains(NumValues); for (unsigned i = 0; i != NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), - RetPtr.getValueType(), RetPtr, - DAG.getIntPtrConstant(Offsets[i], - getCurSDLoc()), - Flags); - Chains[i] = DAG.getStore(Chain, getCurSDLoc(), - SDValue(RetOp.getNode(), RetOp.getResNo() + i), - // FIXME: better loc info would be nice. - Add, MachinePointerInfo()); + // An aggregate return value cannot wrap around the address space, so + // offsets to its parts don't wrap either. + SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]); + Chains[i] = DAG.getStore( + Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), + // FIXME: better loc info would be nice. + Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); } Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), @@ -1515,9 +1573,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { EVT(TLI.getPointerTy(DL)))); } - bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg(); CallingConv::ID CallConv = - DAG.getMachineFunction().getFunction()->getCallingConv(); + DAG.getMachineFunction().getFunction().getCallingConv(); Chain = DAG.getTargetLoweringInfo().LowerReturn( Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG); @@ -1623,7 +1681,6 @@ static bool InBlock(const Value *V, const BasicBlock *BB) { /// EmitBranchForMergedCondition - Helper method for FindMergedConditions. /// This function emits a branch and is used at the leaves of an OR or an /// AND operator tree. -/// void SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, @@ -1659,7 +1716,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, } CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, - TBB, FBB, CurBB, TProb, FProb); + TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); SwitchCases.push_back(CB); return; } @@ -1668,7 +1725,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, // Create a CaseBlock record representing this branch. ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ; CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()), - nullptr, TBB, FBB, CurBB, TProb, FProb); + nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); SwitchCases.push_back(CB); } @@ -1712,7 +1769,7 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // If this node is not part of the or/and tree, emit it as a branch. if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || - BOpc != Opc || !BOp->hasOneUse() || + BOpc != unsigned(Opc) || !BOp->hasOneUse() || BOp->getParent() != CurBB->getBasicBlock() || !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { @@ -1867,7 +1924,6 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // je foo // cmp D, E // jle foo - // if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { Instruction::BinaryOps Opcode = BOp->getOpcode(); if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && @@ -1907,7 +1963,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), - nullptr, Succ0MBB, Succ1MBB, BrMBB); + nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc()); // Use visitSwitchCase to actually insert the fast branch sequence for this // cond branch. @@ -1920,7 +1976,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB) { SDValue Cond; SDValue CondLHS = getValue(CB.CmpLHS); - SDLoc dl = getCurSDLoc(); + SDLoc dl = CB.DL; // Build the setcc now. if (!CB.CmpMHS) { @@ -2054,7 +2110,7 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); - Value *Global = TLI.getSDagStackGuard(*MF.getFunction()->getParent()); + Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent()); MachineSDNode *Node = DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain); if (Global) { @@ -2088,15 +2144,18 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, SDValue Guard; SDLoc dl = getCurSDLoc(); SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); - const Module &M = *ParentBB->getParent()->getFunction()->getParent(); + const Module &M = *ParentBB->getParent()->getFunction().getParent(); unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext())); // Generate code to load the content of the guard slot. - SDValue StackSlot = DAG.getLoad( + SDValue GuardVal = DAG.getLoad( PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align, MachineMemOperand::MOVolatile); + if (TLI.useStackGuardXorFP()) + GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl); + // Retrieve guard check function, nullptr if instrumentation is inlined. if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) { // The target provides a guard check function to validate the guard value. @@ -2108,7 +2167,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Node = StackSlot; + Entry.Node = GuardVal; Entry.Ty = FnTy->getParamType(0); if (Fn->hasAttribute(1, Attribute::AttrKind::InReg)) Entry.IsInReg = true; @@ -2141,7 +2200,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal); SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), @@ -2151,7 +2210,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, // If the sub is not 0, then we know the guard/stackslot do not equal, so // branch to failure MBB. SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, - MVT::Other, StackSlot.getOperand(0), + MVT::Other, GuardVal.getOperand(0), Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); // Otherwise branch to success MBB. SDValue Br = DAG.getNode(ISD::BR, dl, @@ -2530,7 +2589,7 @@ static bool isVectorReductionOp(const User *I) { case Instruction::FAdd: case Instruction::FMul: if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) - if (FPOp->getFastMathFlags().unsafeAlgebra()) + if (FPOp->getFastMathFlags().isFast()) break; LLVM_FALLTHROUGH; default: @@ -2576,7 +2635,7 @@ static bool isVectorReductionOp(const User *I) { if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) { if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) - if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().unsafeAlgebra()) + if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast()) return false; UsersToVisit.push_back(U); } else if (const ShuffleVectorInst *ShufInst = @@ -2670,7 +2729,7 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { Flags.setNoInfs(FMF.noInfs()); Flags.setNoNaNs(FMF.noNaNs()); Flags.setNoSignedZeros(FMF.noSignedZeros()); - Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); + Flags.setUnsafeAlgebra(FMF.isFast()); SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, Flags); @@ -2779,7 +2838,7 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { // Check if the condition of the select has one use or two users that are both // selects with the same condition. static bool hasOnlySelectUsers(const Value *Cond) { - return all_of(Cond->users(), [](const Value *V) { + return llvm::all_of(Cond->users(), [](const Value *V) { return isa<SelectInst>(V); }); } @@ -3447,7 +3506,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { SDValue AllocSize = getValue(I.getArraySize()); - EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); + EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace()); if (AllocSize.getValueType() != IntPtr) AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); @@ -3468,17 +3527,15 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // an address inside an alloca. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); - AllocSize = DAG.getNode(ISD::ADD, dl, - AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(StackAlign - 1, dl), Flags); + AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, + DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags); // Mask out the low bits for alignment purposes. - AllocSize = DAG.getNode(ISD::AND, dl, - AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1), - dl)); + AllocSize = + DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, + DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr)); - SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) }; + SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)}; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops); setValue(&I, DSA); @@ -3807,18 +3864,16 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // // When the first GEP operand is a single pointer - it is the uniform base we // are looking for. If first operand of the GEP is a splat vector - we -// extract the spalt value and use it as a uniform base. +// extract the splat value and use it as a uniform base. // In all other cases the function returns 'false'. -// static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, SelectionDAGBuilder* SDB) { - SelectionDAG& DAG = SDB->DAG; LLVMContext &Context = *DAG.getContext(); assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); - if (!GEP || GEP->getNumOperands() > 2) + if (!GEP) return false; const Value *GEPPtr = GEP->getPointerOperand(); @@ -3827,7 +3882,15 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, else if (!(Ptr = getSplatValue(GEPPtr))) return false; - Value *IndexVal = GEP->getOperand(1); + unsigned FinalIndex = GEP->getNumOperands() - 1; + Value *IndexVal = GEP->getOperand(FinalIndex); + + // Ensure all the other indices are 0. + for (unsigned i = 1; i < FinalIndex; ++i) { + auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i)); + if (!C || !C->isZero()) + return false; + } // The operands of the GEP may be defined in another basic block. // In this case we'll not find nodes for the operands. @@ -3837,13 +3900,6 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, Base = SDB->getValue(Ptr); Index = SDB->getValue(IndexVal); - // Suppress sign extension. - if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) { - if (SDB->findValue(Sext->getOperand(0))) { - IndexVal = Sext->getOperand(0); - Index = SDB->getValue(IndexVal); - } - } if (!Index.getValueType().isVector()) { unsigned GEPWidth = GEP->getType()->getVectorNumElements(); EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); @@ -4082,7 +4138,8 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - if (I.getAlignment() < VT.getSizeInBits() / 8) + if (!TLI.supportsUnalignedAtomics() && + I.getAlignment() < VT.getStoreSize()) report_fatal_error("Cannot generate unaligned atomic load"); MachineMemOperand *MMO = @@ -4118,7 +4175,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); - if (I.getAlignment() < VT.getSizeInBits() / 8) + if (I.getAlignment() < VT.getStoreSize()) report_fatal_error("Cannot generate unaligned atomic store"); SDValue OutChain = @@ -4157,7 +4214,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Info is set by getTgtMemInstrinsic TargetLowering::IntrinsicInfo Info; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); + bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, + DAG.getMachineFunction(), + Intrinsic); // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || @@ -4183,11 +4242,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, SDValue Result; if (IsTgtIntrinsic) { // This is target intrinsic that touches memory - Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), - VTs, Ops, Info.memVT, - MachinePointerInfo(Info.ptrVal, Info.offset), - Info.align, Info.vol, - Info.readMem, Info.writeMem, Info.size); + Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, + Ops, Info.memVT, + MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, + Info.flags, Info.size); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -4370,7 +4428,6 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { - // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -4469,7 +4526,6 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { - // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -4567,7 +4623,6 @@ static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { - // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -4695,7 +4750,6 @@ static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); } - /// ExpandPowI - Expand a llvm.powi intrinsic. static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, SelectionDAG &DAG) { @@ -4712,8 +4766,8 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, if (Val == 0) return DAG.getConstantFP(1.0, DL, LHS.getValueType()); - const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->optForSize() || + const Function &F = DAG.getMachineFunction().getFunction(); + if (!F.optForSize() || // If optimizing for size, don't insert too many multiplies. // This inserts up to 5 multiplies. countPopulation(Val) + Log2_32(Val) < 7) { @@ -4766,12 +4820,12 @@ static unsigned getUnderlyingArgReg(const SDValue &N) { } } -/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function -/// argument, create the corresponding DBG_VALUE machine instruction for it now. -/// At the end of instruction selection, they will be inserted to the entry BB. +/// If the DbgValueInst is a dbg_value of a function argument, create the +/// corresponding DBG_VALUE machine instruction for it now. At the end of +/// instruction selection, they will be inserted to the entry BB. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( const Value *V, DILocalVariable *Variable, DIExpression *Expr, - DILocation *DL, int64_t Offset, bool IsDbgDeclare, const SDValue &N) { + DILocation *DL, bool IsDbgDeclare, const SDValue &N) { const Argument *Arg = dyn_cast<Argument>(V); if (!Arg) return false; @@ -4779,17 +4833,11 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); - // Ignore inlined function arguments here. - // - // FIXME: Should we be checking DL->inlinedAt() to determine this? - if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction())) - return false; - bool IsIndirect = false; Optional<MachineOperand> Op; // Some arguments' frame index is recorded during argument lowering. int FI = FuncInfo.getArgumentFrameIndex(Arg); - if (FI != INT_MAX) + if (FI != std::numeric_limits<int>::max()) Op = MachineOperand::CreateFI(FI); if (!Op && N.getNode()) { @@ -4806,22 +4854,48 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } } + if (!Op && N.getNode()) + // Check if frame index is available. + if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) + if (FrameIndexSDNode *FINode = + dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) + Op = MachineOperand::CreateFI(FINode->getIndex()); + if (!Op) { // Check if ValueMap has reg number. DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) { + const auto &TLI = DAG.getTargetLoweringInfo(); + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, + V->getType(), isABIRegCopy(V)); + unsigned NumRegs = + std::accumulate(RFV.RegCount.begin(), RFV.RegCount.end(), 0); + if (NumRegs > 1) { + unsigned I = 0; + unsigned Offset = 0; + auto RegisterVT = RFV.RegVTs.begin(); + for (auto RegCount : RFV.RegCount) { + unsigned RegisterSize = (RegisterVT++)->getSizeInBits(); + for (unsigned E = I + RegCount; I != E; ++I) { + // The vregs are guaranteed to be allocated in sequence. + Op = MachineOperand::CreateReg(VMI->second + I, false); + auto FragmentExpr = DIExpression::createFragmentExpression( + Expr, Offset, RegisterSize); + if (!FragmentExpr) + continue; + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, + Op->getReg(), Variable, *FragmentExpr)); + Offset += RegisterSize; + } + } + return true; + } Op = MachineOperand::CreateReg(VMI->second, false); IsIndirect = IsDbgDeclare; } } - if (!Op && N.getNode()) - // Check if frame index is available. - if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) - if (FrameIndexSDNode *FINode = - dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) - Op = MachineOperand::CreateFI(FINode->getIndex()); - if (!Op) return false; @@ -4830,12 +4904,12 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (Op->isReg()) FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, - Op->getReg(), Offset, Variable, Expr)); + Op->getReg(), Variable, Expr)); else FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE)) .add(*Op) - .addImm(Offset) + .addImm(0) .addMetadata(Variable) .addMetadata(Expr)); @@ -4845,18 +4919,18 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( /// Return the appropriate SDDbgValue based on N. SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, DILocalVariable *Variable, - DIExpression *Expr, int64_t Offset, + DIExpression *Expr, const DebugLoc &dl, unsigned DbgSDNodeOrder) { if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) { // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe // stack slot locations as such instead of as indirectly addressed // locations. - return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), 0, dl, + return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), dl, DbgSDNodeOrder); } - return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, - Offset, dl, DbgSDNodeOrder); + return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, dl, + DbgSDNodeOrder); } // VisualStudio defines setjmp as _setjmp @@ -4971,8 +5045,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memcpy_element_unordered_atomic: { - const ElementUnorderedAtomicMemCpyInst &MI = - cast<ElementUnorderedAtomicMemCpyInst>(I); + const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I); SDValue Dst = getValue(MI.getRawDest()); SDValue Src = getValue(MI.getRawSource()); SDValue Length = getValue(MI.getLength()); @@ -5010,7 +5083,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memmove_element_unordered_atomic: { - auto &MI = cast<ElementUnorderedAtomicMemMoveInst>(I); + auto &MI = cast<AtomicMemMoveInst>(I); SDValue Dst = getValue(MI.getRawDest()); SDValue Src = getValue(MI.getRawSource()); SDValue Length = getValue(MI.getLength()); @@ -5048,7 +5121,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memset_element_unordered_atomic: { - auto &MI = cast<ElementUnorderedAtomicMemSetInst>(I); + auto &MI = cast<AtomicMemSetInst>(I); SDValue Dst = getValue(MI.getRawDest()); SDValue Val = getValue(MI.getValue()); SDValue Length = getValue(MI.getLength()); @@ -5086,30 +5159,48 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(CallResult.second); return nullptr; } + case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: { - const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); + const DbgInfoIntrinsic &DI = cast<DbgInfoIntrinsic>(I); DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); - const Value *Address = DI.getAddress(); assert(Variable && "Missing variable"); - if (!Address) { - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - return nullptr; - } // Check if address has undef value. - if (isa<UndefValue>(Address) || + const Value *Address = DI.getVariableLocation(); + if (!Address || isa<UndefValue>(Address) || (Address->use_empty() && !isa<Argument>(Address))) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return nullptr; } - // Byval arguments with frame indices were already handled after argument - // lowering and before isel. - const auto *Arg = - dyn_cast<Argument>(Address->stripInBoundsConstantOffsets()); - if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX) + bool isParameter = Variable->isParameter() || isa<Argument>(Address); + + // Check if this variable can be described by a frame index, typically + // either as a static alloca or a byval parameter. + int FI = std::numeric_limits<int>::max(); + if (const auto *AI = + dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) { + if (AI->isStaticAlloca()) { + auto I = FuncInfo.StaticAllocaMap.find(AI); + if (I != FuncInfo.StaticAllocaMap.end()) + FI = I->second; + } + } else if (const auto *Arg = dyn_cast<Argument>( + Address->stripInBoundsConstantOffsets())) { + FI = FuncInfo.getArgumentFrameIndex(Arg); + } + + // llvm.dbg.addr is control dependent and always generates indirect + // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in + // the MachineFunction variable table. + if (FI != std::numeric_limits<int>::max()) { + if (Intrinsic == Intrinsic::dbg_addr) + DAG.AddDbgValue(DAG.getFrameIndexDbgValue(Variable, Expression, FI, dl, + SDNodeOrder), + getRoot().getNode(), isParameter); return nullptr; + } SDValue &N = NodeMap[Address]; if (!N.getNode() && isa<Argument>(Address)) @@ -5120,26 +5211,25 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) Address = BCI->getOperand(0); // Parameters are handled specially. - bool isParameter = Variable->isParameter() || isa<Argument>(Address); auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); if (isParameter && FINode) { // Byval parameter. We have a frame index at this point. SDV = DAG.getFrameIndexDbgValue(Variable, Expression, - FINode->getIndex(), 0, dl, SDNodeOrder); + FINode->getIndex(), dl, SDNodeOrder); } else if (isa<Argument>(Address)) { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true, N); + EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N); return nullptr; } else { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), - true, 0, dl, SDNodeOrder); + true, dl, SDNodeOrder); } DAG.AddDbgValue(SDV, N.getNode(), isParameter); } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true, + if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N)) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } @@ -5152,15 +5242,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); - uint64_t Offset = DI.getOffset(); const Value *V = DI.getValue(); if (!V) return nullptr; SDDbgValue *SDV; if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) { - SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl, - SDNodeOrder); + SDV = DAG.getConstantDbgValue(Variable, Expression, V, dl, SDNodeOrder); DAG.AddDbgValue(SDV, nullptr, false); return nullptr; } @@ -5171,10 +5259,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map. N = UnusedArgNodeMap[V]; if (N.getNode()) { - if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, false, - N)) + if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, false, N)) return nullptr; - SDV = getDbgValue(N, Variable, Expression, Offset, dl, SDNodeOrder); + SDV = getDbgValue(N, Variable, Expression, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); return nullptr; } @@ -5213,12 +5300,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_unwind_init: DAG.getMachineFunction().setCallsUnwindInit(true); return nullptr; - case Intrinsic::eh_dwarf_cfa: { + case Intrinsic::eh_dwarf_cfa: setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; - } case Intrinsic::eh_sjlj_callsite: { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0)); @@ -5247,17 +5333,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(Op.getValue(1)); return nullptr; } - case Intrinsic::eh_sjlj_longjmp: { + case Intrinsic::eh_sjlj_longjmp: DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, getRoot(), getValue(I.getArgOperand(0)))); return nullptr; - } - case Intrinsic::eh_sjlj_setup_dispatch: { + case Intrinsic::eh_sjlj_setup_dispatch: DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other, getRoot())); return nullptr; - } - case Intrinsic::masked_gather: visitMaskedGather(I); return nullptr; @@ -5430,6 +5513,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::experimental_constrained_fmul: case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: + case Intrinsic::experimental_constrained_fma: case Intrinsic::experimental_constrained_sqrt: case Intrinsic::experimental_constrained_pow: case Intrinsic::experimental_constrained_powi: @@ -5534,11 +5618,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(Res.getValue(1)); return nullptr; } - case Intrinsic::stackrestore: { + case Intrinsic::stackrestore: Res = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); return nullptr; - } case Intrinsic::get_dynamic_area_offset: { SDValue Op = getRoot(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); @@ -5557,7 +5640,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::stackguard: { EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); - const Module &M = *MF.getFunction()->getParent(); + const Module &M = *MF.getFunction().getParent(); SDValue Chain = getRoot(); if (TLI.useLoadStackGuardNode()) { Res = getLoadStackGuard(DAG, sdl, Chain); @@ -5568,6 +5651,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MachinePointerInfo(Global, 0), Align, MachineMemOperand::MOVolatile); } + if (TLI.useStackGuardXorFP()) + Res = TLI.emitStackGuardXorFP(DAG, Res, sdl); DAG.setRoot(Chain); setValue(&I, Res); return nullptr; @@ -5624,9 +5709,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; case Intrinsic::assume: case Intrinsic::var_annotation: - // Discard annotate attributes and assumptions + case Intrinsic::sideeffect: + // Discard annotate attributes, assumptions, and artificial side-effects. return nullptr; + case Intrinsic::codeview_annotation: { + // Emit a label associated with this metadata. + MachineFunction &MF = DAG.getMachineFunction(); + MCSymbol *Label = + MF.getMMI().getContext().createTempSymbol("annotation", true); + Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata(); + MF.addCodeViewAnnotation(Label, cast<MDNode>(MD)); + Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label); + DAG.setRoot(Res); + return nullptr; + } + case Intrinsic::init_trampoline: { const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts()); @@ -5643,17 +5741,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(Res); return nullptr; } - case Intrinsic::adjust_trampoline: { + case Intrinsic::adjust_trampoline: setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; - } case Intrinsic::gcroot: { - MachineFunction &MF = DAG.getMachineFunction(); - const Function *F = MF.getFunction(); - (void)F; - assert(F->hasGC() && + assert(DAG.getMachineFunction().getFunction().hasGC() && "only valid in functions with gc specified, enforced by Verifier"); assert(GFI && "implied by previous"); const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); @@ -5670,11 +5764,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); return nullptr; - case Intrinsic::expect: { + case Intrinsic::expect: // Just replace __builtin_expect(exp, c) with EXP. setValue(&I, getValue(I.getArgOperand(0))); return nullptr; - } case Intrinsic::debugtrap: case Intrinsic::trap: { @@ -5728,6 +5821,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::prefetch: { SDValue Ops[5]; unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); + auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore; Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); @@ -5738,9 +5832,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), 0, /* align */ - false, /* volatile */ - rw==0, /* read */ - rw==1)); /* write */ + Flags)); return nullptr; } case Intrinsic::lifetime_start: @@ -5792,27 +5884,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::donothing: // ignore return nullptr; - case Intrinsic::experimental_stackmap: { + case Intrinsic::experimental_stackmap: visitStackmap(I); return nullptr; - } case Intrinsic::experimental_patchpoint_void: - case Intrinsic::experimental_patchpoint_i64: { + case Intrinsic::experimental_patchpoint_i64: visitPatchpoint(&I); return nullptr; - } - case Intrinsic::experimental_gc_statepoint: { + case Intrinsic::experimental_gc_statepoint: LowerStatepoint(ImmutableStatepoint(&I)); return nullptr; - } - case Intrinsic::experimental_gc_result: { + case Intrinsic::experimental_gc_result: visitGCResult(cast<GCResultInst>(I)); return nullptr; - } - case Intrinsic::experimental_gc_relocate: { + case Intrinsic::experimental_gc_relocate: visitGCRelocate(cast<GCRelocateInst>(I)); return nullptr; - } case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); case Intrinsic::instrprof_value_profile: @@ -5851,7 +5938,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Get the symbol that defines the frame offset. auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); auto *Idx = cast<ConstantInt>(I.getArgOperand(2)); - unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX)); + unsigned IdxVal = + unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max())); MCSymbol *FrameAllocSym = MF.getMMI().getContext().getOrCreateFrameAllocSymbol( GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal); @@ -5932,12 +6020,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::experimental_vector_reduce_umax: case Intrinsic::experimental_vector_reduce_umin: case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: { + case Intrinsic::experimental_vector_reduce_fmin: visitVectorReduce(I, Intrinsic); return nullptr; } - - } } void SelectionDAGBuilder::visitConstrainedFPIntrinsic( @@ -5961,6 +6047,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_frem: Opcode = ISD::STRICT_FREM; break; + case Intrinsic::experimental_constrained_fma: + Opcode = ISD::STRICT_FMA; + break; case Intrinsic::experimental_constrained_sqrt: Opcode = ISD::STRICT_FSQRT; break; @@ -6007,10 +6096,15 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( SDVTList VTs = DAG.getVTList(ValueVTs); SDValue Result; if (FPI.isUnaryOp()) - Result = DAG.getNode(Opcode, sdl, VTs, + Result = DAG.getNode(Opcode, sdl, VTs, { Chain, getValue(FPI.getArgOperand(0)) }); + else if (FPI.isTernaryOp()) + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)), + getValue(FPI.getArgOperand(1)), + getValue(FPI.getArgOperand(2)) }); else - Result = DAG.getNode(Opcode, sdl, VTs, + Result = DAG.getNode(Opcode, sdl, VTs, { Chain, getValue(FPI.getArgOperand(0)), getValue(FPI.getArgOperand(1)) }); @@ -6081,7 +6175,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, if (MF.hasEHFunclets()) { assert(CLI.CS); WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); - EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS->getInstruction()), + EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()), BeginLabel, EndLabel); } else { MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); @@ -6189,7 +6283,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, SelectionDAGBuilder &Builder) { - // Check to see if this load can be trivially constant folded, e.g. if the // input is from a string literal. if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { @@ -6553,10 +6646,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Check for well-known libc/libm calls. If the function is internal, it // can't be a library call. Don't do the check if marked as nobuiltin for - // some reason. + // some reason or the call site requires strict floating point semantics. LibFunc Func; - if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() && - LibInfo->getLibFunc(*F, Func) && + if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() && + F->hasName() && LibInfo->getLibFunc(*F, Func) && LibInfo->hasOptimizedCodeGen(Func)) { switch (Func) { default: break; @@ -6735,7 +6828,7 @@ public: RegsForValue AssignedRegs; explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) - : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) { + : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) { } /// Whether or not this operand accesses memory @@ -6767,7 +6860,7 @@ public: // If this is an indirect operand, the operand is a pointer to the // accessed type. if (isIndirect) { - llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); + PointerType *PtrTy = dyn_cast<PointerType>(OpTy); if (!PtrTy) report_fatal_error("Indirect operand for inline asm not a pointer!"); OpTy = PtrTy->getElementType(); @@ -6799,7 +6892,7 @@ public: } }; -typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; +using SDISelAsmOperandInfoVector = SmallVector<SDISelAsmOperandInfo, 16>; } // end anonymous namespace @@ -6879,7 +6972,6 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, /// allocation. This produces generally horrible, but correct, code. /// /// OpInfo describes the operand. -/// static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, const SDLoc &DL, SDISelAsmOperandInfo &OpInfo) { @@ -7013,6 +7105,8 @@ static bool createVirtualRegs(SmallVector<unsigned, 4> &Regs, unsigned NumRegs, return true; } +namespace { + class ExtraFlags { unsigned Flags = 0; @@ -7028,7 +7122,7 @@ public: Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect; } - void update(const llvm::TargetLowering::AsmOperandInfo &OpInfo) { + void update(const TargetLowering::AsmOperandInfo &OpInfo) { // Ideally, we would only check against memory constraints. However, the // meaning of an Other constraint can be target-specific and we can't easily // reason about it. Therefore, be conservative and set MayLoad/MayStore @@ -7047,8 +7141,9 @@ public: unsigned get() const { return Flags; } }; +} // end anonymous namespace + /// visitInlineAsm - Handle a call to an InlineAsm object. -/// void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); @@ -7207,13 +7302,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { RegsForValue RetValRegs; // IndirectStoresToEmit - The set of stores to emit after the inline asm node. - std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit; + std::vector<std::pair<RegsForValue, Value *>> IndirectStoresToEmit; for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; switch (OpInfo.Type) { - case InlineAsm::isOutput: { + case InlineAsm::isOutput: if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && OpInfo.ConstraintType != TargetLowering::C_Register) { // Memory output, or 'other' output (e.g. 'X' constraint). @@ -7264,7 +7359,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { : InlineAsm::Kind_RegDef, false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; - } + case InlineAsm::isInput: { SDValue InOperandVal = OpInfo.CallOperand; @@ -7397,7 +7492,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { dl, DAG, AsmNodeOperands); break; } - case InlineAsm::isClobber: { + case InlineAsm::isClobber: // Add the clobbered value to the operand list, so that the register // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) @@ -7406,7 +7501,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands); break; } - } } // Finish up input operands. Set the input chain and add the flag last. @@ -7453,7 +7547,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { return; } - std::vector<std::pair<SDValue, const Value *> > StoresToEmit; + std::vector<std::pair<SDValue, const Value *>> StoresToEmit; // Process indirect outputs, first output all of the flagged copies out of // physregs. @@ -7865,13 +7959,13 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, switch (Intrinsic) { case Intrinsic::experimental_vector_reduce_fadd: - if (FMF.unsafeAlgebra()) + if (FMF.isFast()) Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2); else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); break; case Intrinsic::experimental_vector_reduce_fmul: - if (FMF.unsafeAlgebra()) + if (FMF.isFast()) Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2); else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); @@ -7903,14 +7997,12 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, case Intrinsic::experimental_vector_reduce_umin: Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_fmax: { + case Intrinsic::experimental_vector_reduce_fmax: Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags); break; - } - case Intrinsic::experimental_vector_reduce_fmin: { + case Intrinsic::experimental_vector_reduce_fmin: Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags); break; - } default: llvm_unreachable("Unhandled vector reduce intrinsic"); } @@ -7955,10 +8047,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { uint64_t Offset = OldOffsets[i]; MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT); unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT); - unsigned RegisterVTSize = RegisterVT.getSizeInBits(); + unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8; RetTys.append(NumRegs, RegisterVT); for (unsigned j = 0; j != NumRegs; ++j) - Offsets.push_back(Offset + j * RegisterVTSize); + Offsets.push_back(Offset + j * RegisterVTByteSZ); } } @@ -7996,6 +8088,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.IsSwiftError = false; Entry.Alignment = Align; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); + CLI.NumFixedArgs += 1; CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); // sret demotion isn't compatible with tail-calls, since the sret argument @@ -8148,8 +8241,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, - CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind, - true); + CLI.CS.getInstruction(), ExtendKind, true); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 @@ -8209,7 +8301,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // The instruction result is the result of loading from the // hidden sret parameter. SmallVector<EVT, 1> PVTs; - Type *PtrRetTy = PointerType::getUnqual(OrigRetTy); + Type *PtrRetTy = OrigRetTy->getPointerTo(DL.getAllocaAddrSpace()); ComputeValueVTs(*this, DL, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); @@ -8326,9 +8418,9 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { return true; } -typedef DenseMap<const Argument *, - std::pair<const AllocaInst *, const StoreInst *>> - ArgCopyElisionMapTy; +using ArgCopyElisionMapTy = + DenseMap<const Argument *, + std::pair<const AllocaInst *, const StoreInst *>>; /// Scan the entry block of the function in FuncInfo for arguments that look /// like copies into a local alloca. Record any copied arguments in @@ -8503,7 +8595,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Put in an sret pointer parameter before all the other parameters. SmallVector<EVT, 1> ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), - PointerType::getUnqual(F.getReturnType()), ValueVTs); + F.getReturnType()->getPointerTo( + DAG.getDataLayout().getAllocaAddrSpace()), + ValueVTs); // NOTE: Assuming that a pointer will never break down to more than one VT // or one register. @@ -8657,7 +8751,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // from the sret argument into it. SmallVector<EVT, 1> ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), - PointerType::getUnqual(F.getReturnType()), ValueVTs); + F.getReturnType()->getPointerTo( + DAG.getDataLayout().getAllocaAddrSpace()), + ValueVTs); MVT VT = ValueVTs[0].getSimpleVT(); MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); Optional<ISD::NodeType> AssertOp = None; @@ -8749,11 +8845,19 @@ void SelectionDAGISel::LowerArguments(const Function &F) { SDB->setValue(&Arg, Res); if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { + // We want to associate the argument with the frame index, among + // involved operands, that correspond to the lowest address. The + // getCopyFromParts function, called earlier, is swapping the order of + // the operands to BUILD_PAIR depending on endianness. The result of + // that swapping is that the least significant bits of the argument will + // be in the first operand of the BUILD_PAIR node, and the most + // significant bits will be in the second operand. + unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0; if (LoadSDNode *LNode = - dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) + dyn_cast<LoadSDNode>(Res.getOperand(LowAddressOp).getNode())) if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) - FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); } // Update the SwiftErrorVRegDefMap. @@ -8813,7 +8917,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) { /// directly add them, because expansion might result in multiple MBB's for one /// BB. As such, the start of the BB might correspond to a different MBB than /// the end. -/// void SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const TerminatorInst *TI = LLVMBB->getTerminator(); @@ -9249,10 +9352,12 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, BitTestInfo BTI; std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { - // Sort by probability first, number of bits second. + // Sort by probability first, number of bits second, bit mask third. if (a.ExtraProb != b.ExtraProb) return a.ExtraProb > b.ExtraProb; - return a.Bits > b.Bits; + if (a.Bits != b.Bits) + return a.Bits > b.Bits; + return a.Mask < b.Mask; }); for (auto &CB : CBV) { @@ -9441,10 +9546,15 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } if (TM.getOptLevel() != CodeGenOpt::None) { - // Order cases by probability so the most likely case will be checked first. + // Here, we order cases by probability so the most likely case will be + // checked first. However, two clusters can have the same probability in + // which case their relative ordering is non-deterministic. So we use Low + // as a tie-breaker as clusters are guaranteed to never overlap. std::sort(W.FirstCluster, W.LastCluster + 1, [](const CaseCluster &a, const CaseCluster &b) { - return a.Prob > b.Prob; + return a.Prob != b.Prob ? + a.Prob > b.Prob : + a.Low->getValue().slt(b.Low->getValue()); }); // Rearrange the case blocks so that the last one falls through if possible @@ -9570,8 +9680,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } // The false probability is the sum of all unhandled cases. - CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Prob, - UnhandledProbs); + CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, + getCurSDLoc(), I->Prob, UnhandledProbs); if (CurMBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); @@ -9627,7 +9737,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, I++; } - for (;;) { + while (true) { // Our binary search tree differs from a typical BST in that ours can have up // to three values in each leaf. The pivot selection above doesn't take that // into account, which means the tree might require more nodes and be less @@ -9722,7 +9832,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, // Create the CaseBlock record that will be used to lower the branch. CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, - LeftProb, RightProb); + getCurSDLoc(), LeftProb, RightProb); if (W.MBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); @@ -9730,6 +9840,76 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, SwitchCases.push_back(CB); } +// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb +// from the swith statement. +static BranchProbability scaleCaseProbality(BranchProbability CaseProb, + BranchProbability PeeledCaseProb) { + if (PeeledCaseProb == BranchProbability::getOne()) + return BranchProbability::getZero(); + BranchProbability SwitchProb = PeeledCaseProb.getCompl(); + + uint32_t Numerator = CaseProb.getNumerator(); + uint32_t Denominator = SwitchProb.scale(CaseProb.getDenominator()); + return BranchProbability(Numerator, std::max(Numerator, Denominator)); +} + +// Try to peel the top probability case if it exceeds the threshold. +// Return current MachineBasicBlock for the switch statement if the peeling +// does not occur. +// If the peeling is performed, return the newly created MachineBasicBlock +// for the peeled switch statement. Also update Clusters to remove the peeled +// case. PeeledCaseProb is the BranchProbability for the peeled case. +MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster( + const SwitchInst &SI, CaseClusterVector &Clusters, + BranchProbability &PeeledCaseProb) { + MachineBasicBlock *SwitchMBB = FuncInfo.MBB; + // Don't perform if there is only one cluster or optimizing for size. + if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 || + TM.getOptLevel() == CodeGenOpt::None || + SwitchMBB->getParent()->getFunction().optForMinSize()) + return SwitchMBB; + + BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100); + unsigned PeeledCaseIndex = 0; + bool SwitchPeeled = false; + for (unsigned Index = 0; Index < Clusters.size(); ++Index) { + CaseCluster &CC = Clusters[Index]; + if (CC.Prob < TopCaseProb) + continue; + TopCaseProb = CC.Prob; + PeeledCaseIndex = Index; + SwitchPeeled = true; + } + if (!SwitchPeeled) + return SwitchMBB; + + DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " << TopCaseProb + << "\n"); + + // Record the MBB for the peeled switch statement. + MachineFunction::iterator BBI(SwitchMBB); + ++BBI; + MachineBasicBlock *PeeledSwitchMBB = + FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock()); + FuncInfo.MF->insert(BBI, PeeledSwitchMBB); + + ExportFromCurrentBlock(SI.getCondition()); + auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex; + SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt, + nullptr, nullptr, TopCaseProb.getCompl()}; + lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB); + + Clusters.erase(PeeledCaseIt); + for (CaseCluster &CC : Clusters) { + DEBUG(dbgs() << "Scale the probablity for one cluster, before scaling: " + << CC.Prob << "\n"); + CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb); + DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n"); + } + PeeledCaseProb = TopCaseProb; + return PeeledSwitchMBB; +} + void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // Extract cases from the switch. BranchProbabilityInfo *BPI = FuncInfo.BPI; @@ -9783,9 +9963,15 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { } } + // The branch probablity of the peeled case. + BranchProbability PeeledCaseProb = BranchProbability::getZero(); + MachineBasicBlock *PeeledSwitchMBB = + peelDominantCaseCluster(SI, Clusters, PeeledCaseProb); + // If there is only the default destination, jump there directly. MachineBasicBlock *SwitchMBB = FuncInfo.MBB; if (Clusters.empty()) { + assert(PeeledSwitchMBB == SwitchMBB); SwitchMBB->addSuccessor(DefaultMBB); if (DefaultMBB != NextBlock(SwitchMBB)) { DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, @@ -9817,8 +10003,14 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { SwitchWorkList WorkList; CaseClusterIt First = Clusters.begin(); CaseClusterIt Last = Clusters.end() - 1; - auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB); - WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); + auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB); + // Scale the branchprobability for DefaultMBB if the peel occurs and + // DefaultMBB is not replaced. + if (PeeledCaseProb != BranchProbability::getZero() && + DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()]) + DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb); + WorkList.push_back( + {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); while (!WorkList.empty()) { SwitchWorkListItem W = WorkList.back(); @@ -9826,7 +10018,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None && - !DefaultMBB->getParent()->getFunction()->optForMinSize()) { + !DefaultMBB->getParent()->getFunction().optForMinSize()) { // For optimized builds, lower large range as a balanced binary tree. splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); continue; |