diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp')
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2305 |
1 files changed, 1166 insertions, 1139 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 871ab9b29881..e818dd27c05e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1,9 +1,8 @@ //===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -55,6 +54,7 @@ #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/SwiftErrorValueTracking.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -109,6 +109,7 @@ #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <cassert> #include <cstddef> @@ -123,6 +124,7 @@ using namespace llvm; using namespace PatternMatch; +using namespace SwitchCG; #define DEBUG_TYPE "isel" @@ -215,8 +217,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, unsigned ValueBits = ValueVT.getSizeInBits(); // Assemble the power of 2 part. - unsigned RoundParts = NumParts & (NumParts - 1) ? - 1 << Log2_32(NumParts) : NumParts; + unsigned RoundParts = + (NumParts & (NumParts - 1)) ? 1 << Log2_32(NumParts) : NumParts; unsigned RoundBits = PartBits * RoundParts; EVT RoundVT = RoundBits == ValueBits ? ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits); @@ -322,7 +324,15 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } - llvm_unreachable("Unknown mismatch!"); + // Handle MMX to a narrower integer type by bitcasting MMX to integer and + // then truncating. + if (PartEVT == MVT::x86mmx && ValueVT.isInteger() && + ValueVT.bitsLT(PartEVT)) { + Val = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Val); + return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + } + + report_fatal_error("Unknown mismatch in getCopyFromParts!"); } static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, @@ -573,7 +583,8 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, unsigned RoundBits = RoundParts * PartBits; unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, - DAG.getIntPtrConstant(RoundBits, DL)); + DAG.getShiftAmountConstant(RoundBits, ValueVT, DL, /*LegalTypes*/false)); + getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V, CallConv); @@ -1003,6 +1014,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa, DL = &DAG.getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); + SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout()); } void SelectionDAGBuilder::clear() { @@ -1032,19 +1044,7 @@ SDValue SelectionDAGBuilder::getRoot() { } // Otherwise, we have to make a token factor node. - // If we have >= 2^16 loads then split across multiple token factors as - // there's a 64k limit on the number of SDNode operands. - SDValue Root; - size_t Limit = (1 << 16) - 1; - while (PendingLoads.size() > Limit) { - unsigned SliceIdx = PendingLoads.size() - Limit; - auto ExtractedTFs = ArrayRef<SDValue>(PendingLoads).slice(SliceIdx, Limit); - SDValue NewTF = - DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, ExtractedTFs); - PendingLoads.erase(PendingLoads.begin() + SliceIdx, PendingLoads.end()); - PendingLoads.emplace_back(NewTF); - } - Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, PendingLoads); + SDValue Root = DAG.getTokenFactor(getCurSDLoc(), PendingLoads); PendingLoads.clear(); DAG.setRoot(Root); return Root; @@ -1144,6 +1144,13 @@ void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable, for (auto &DDIMI : DanglingDebugInfoMap) { DanglingDebugInfoVector &DDIV = DDIMI.second; + + // If debug info is to be dropped, run it through final checks to see + // whether it can be salvaged. + for (auto &DDI : DDIV) + if (isMatchingDbgValue(DDI)) + salvageUnresolvedDbgValue(DDI); + DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end()); } } @@ -1169,6 +1176,12 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, "Expected inlined-at fields to agree"); SDDbgValue *SDV; if (Val.getNode()) { + // FIXME: I doubt that it is correct to resolve a dangling DbgValue as a + // FuncArgumentDbgValue (it would be hoisted to the function entry, and if + // we couldn't resolve it directly when examining the DbgValue intrinsic + // in the first place we should not be more successful here). Unless we + // have some test case that prove this to be correct we should avoid + // calling EmitFuncArgumentDbgValue here. if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) { LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order=" << DbgSDNodeOrder << "] for:\n " << *DI << "\n"); @@ -1186,12 +1199,173 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, } else LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI << "in EmitFuncArgumentDbgValue\n"); - } else + } else { LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + auto Undef = + UndefValue::get(DDI.getDI()->getVariableLocation()->getType()); + auto SDV = + DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + } } DDIV.clear(); } +void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { + Value *V = DDI.getDI()->getValue(); + DILocalVariable *Var = DDI.getDI()->getVariable(); + DIExpression *Expr = DDI.getDI()->getExpression(); + DebugLoc DL = DDI.getdl(); + DebugLoc InstDL = DDI.getDI()->getDebugLoc(); + unsigned SDOrder = DDI.getSDNodeOrder(); + + // Currently we consider only dbg.value intrinsics -- we tell the salvager + // that DW_OP_stack_value is desired. + assert(isa<DbgValueInst>(DDI.getDI())); + bool StackValue = true; + + // Can this Value can be encoded without any further work? + if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder)) + return; + + // Attempt to salvage back through as many instructions as possible. Bail if + // a non-instruction is seen, such as a constant expression or global + // variable. FIXME: Further work could recover those too. + while (isa<Instruction>(V)) { + Instruction &VAsInst = *cast<Instruction>(V); + DIExpression *NewExpr = salvageDebugInfoImpl(VAsInst, Expr, StackValue); + + // If we cannot salvage any further, and haven't yet found a suitable debug + // expression, bail out. + if (!NewExpr) + break; + + // New value and expr now represent this debuginfo. + V = VAsInst.getOperand(0); + Expr = NewExpr; + + // Some kind of simplification occurred: check whether the operand of the + // salvaged debug expression can be encoded in this DAG. + if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder)) { + LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n " + << DDI.getDI() << "\nBy stripping back to:\n " << V); + return; + } + } + + // This was the final opportunity to salvage this debug information, and it + // couldn't be done. Place an undef DBG_VALUE at this location to terminate + // any earlier variable location. + auto Undef = UndefValue::get(DDI.getDI()->getVariableLocation()->getType()); + auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + + LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << DDI.getDI() + << "\n"); + LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0) + << "\n"); +} + +bool SelectionDAGBuilder::handleDebugValue(const Value *V, DILocalVariable *Var, + DIExpression *Expr, DebugLoc dl, + DebugLoc InstDL, unsigned Order) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDDbgValue *SDV; + if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) || + isa<ConstantPointerNull>(V)) { + SDV = DAG.getConstantDbgValue(Var, Expr, V, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + return true; + } + + // If the Value is a frame index, we can create a FrameIndex debug value + // without relying on the DAG at all. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + auto SI = FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) { + auto SDV = + DAG.getFrameIndexDbgValue(Var, Expr, SI->second, + /*IsIndirect*/ false, dl, SDNodeOrder); + // Do not attach the SDNodeDbgValue to an SDNode: this variable location + // is still available even if the SDNode gets optimized out. + DAG.AddDbgValue(SDV, nullptr, false); + return true; + } + } + + // Do not use getValue() in here; we don't want to generate code at + // this point if it hasn't been done yet. + SDValue N = NodeMap[V]; + if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map. + N = UnusedArgNodeMap[V]; + if (N.getNode()) { + if (EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N)) + return true; + SDV = getDbgValue(N, Var, Expr, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, N.getNode(), false); + return true; + } + + // Special rules apply for the first dbg.values of parameter variables in a + // function. Identify them by the fact they reference Argument Values, that + // they're parameters, and they are parameters of the current function. We + // need to let them dangle until they get an SDNode. + bool IsParamOfFunc = isa<Argument>(V) && Var->isParameter() && + !InstDL.getInlinedAt(); + if (!IsParamOfFunc) { + // The value is not used in this block yet (or it would have an SDNode). + // We still want the value to appear for the user if possible -- if it has + // an associated VReg, we can refer to that instead. + auto VMI = FuncInfo.ValueMap.find(V); + if (VMI != FuncInfo.ValueMap.end()) { + unsigned Reg = VMI->second; + // If this is a PHI node, it may be split up into several MI PHI nodes + // (in FunctionLoweringInfo::set). + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, + V->getType(), None); + if (RFV.occupiesMultipleRegs()) { + unsigned Offset = 0; + unsigned BitsToDescribe = 0; + if (auto VarSize = Var->getSizeInBits()) + BitsToDescribe = *VarSize; + if (auto Fragment = Expr->getFragmentInfo()) + BitsToDescribe = Fragment->SizeInBits; + for (auto RegAndSize : RFV.getRegsAndSizes()) { + unsigned RegisterSize = RegAndSize.second; + // Bail out if all bits are described already. + if (Offset >= BitsToDescribe) + break; + unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe) + ? BitsToDescribe - Offset + : RegisterSize; + auto FragmentExpr = DIExpression::createFragmentExpression( + Expr, Offset, FragmentSize); + if (!FragmentExpr) + continue; + SDV = DAG.getVRegDbgValue(Var, *FragmentExpr, RegAndSize.first, + false, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + Offset += RegisterSize; + } + } else { + SDV = DAG.getVRegDbgValue(Var, Expr, Reg, false, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + } + return true; + } + } + + return false; +} + +void SelectionDAGBuilder::resolveOrClearDbgInfo() { + // Try to fixup any remaining dangling debug info -- and drop it if we can't. + for (auto &Pair : DanglingDebugInfoMap) + for (auto &DDI : Pair.second) + salvageUnresolvedDbgValue(DDI); + clearDanglingDebugInfo(); +} + /// getCopyFromRegs - If there was virtual register allocated for the value V /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { @@ -1469,6 +1643,36 @@ void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { } } +// For wasm, there's alwyas a single catch pad attached to a catchswitch, and +// the control flow always stops at the single catch pad, as it does for a +// cleanup pad. In case the exception caught is not of the types the catch pad +// catches, it will be rethrown by a rethrow. +static void findWasmUnwindDestinations( + FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, + BranchProbability Prob, + SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>> + &UnwindDests) { + while (EHPadBB) { + const Instruction *Pad = EHPadBB->getFirstNonPHI(); + if (isa<CleanupPadInst>(Pad)) { + // Stop on cleanup pads. + UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); + UnwindDests.back().first->setIsEHScopeEntry(); + break; + } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { + // Add the catchpad handlers to the possible destinations. We don't + // continue to the unwind destination of the catchswitch for wasm. + for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { + UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob); + UnwindDests.back().first->setIsEHScopeEntry(); + } + break; + } else { + continue; + } + } +} + /// When an invoke or a cleanupret unwinds to the next EH pad, there are /// many places it could ultimately go. In the IR, we have a single unwind /// destination, but in the machine CFG, we enumerate all the possible blocks. @@ -1489,6 +1693,13 @@ static void findUnwindDestinations( bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX; bool IsSEH = isAsynchronousEHPersonality(Personality); + if (IsWasmCXX) { + findWasmUnwindDestinations(FuncInfo, EHPadBB, Prob, UnwindDests); + assert(UnwindDests.size() <= 1 && + "There should be at most one unwind destination for wasm"); + return; + } + while (EHPadBB) { const Instruction *Pad = EHPadBB->getFirstNonPHI(); BasicBlock *NewEHPadBB = nullptr; @@ -1501,8 +1712,7 @@ static void findUnwindDestinations( // personalities. UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); UnwindDests.back().first->setIsEHScopeEntry(); - if (!IsWasmCXX) - UnwindDests.back().first->setIsEHFuncletEntry(); + UnwindDests.back().first->setIsEHFuncletEntry(); break; } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { // Add the catchpad handlers to the possible destinations. @@ -1588,9 +1798,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { DemoteReg, PtrValueVTs[0]); SDValue RetOp = getValue(I.getOperand(0)); - SmallVector<EVT, 4> ValueVTs; + SmallVector<EVT, 4> ValueVTs, MemVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets); + ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs, + &Offsets); unsigned NumValues = ValueVTs.size(); SmallVector<SDValue, 4> Chains(NumValues); @@ -1598,8 +1809,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // An aggregate return value cannot wrap around the address space, so // offsets to its parts don't wrap either. SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]); - Chains[i] = DAG.getStore( - Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), + + SDValue Val = RetOp.getValue(i); + if (MemVTs[i] != ValueVTs[i]) + Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]); + Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val, // FIXME: better loc info would be nice. Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); } @@ -1615,6 +1829,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { const Function *F = I.getParent()->getParent(); + bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( + I.getOperand(0)->getType(), F->getCallingConv(), + /*IsVarArg*/ false); + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) @@ -1647,6 +1865,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { if (RetInReg) Flags.setInReg(); + if (I.getOperand(0)->getType()->isPointerTy()) { + Flags.setPointer(); + Flags.setPointerAddrSpace( + cast<PointerType>(I.getOperand(0)->getType())->getAddressSpace()); + } + + if (NeedsRegBlock) { + Flags.setInConsecutiveRegs(); + if (j == NumValues - 1) + Flags.setInConsecutiveRegsLast(); + } + // Propagate extension type if any if (ExtendKind == ISD::SIGN_EXTEND) Flags.setSExt(); @@ -1668,7 +1898,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { const Function *F = I.getParent()->getParent(); if (TLI.supportSwiftError() && F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) { - assert(FuncInfo.SwiftErrorArg && "Need a swift error argument"); + assert(SwiftError.getFunctionArg() && "Need a swift error argument"); ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); Flags.setSwiftError(); Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/, @@ -1677,8 +1907,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { 0 /*partOffs*/)); // Create SDNode for the swifterror virtual register. OutVals.push_back( - DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt( - &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first, + DAG.getRegister(SwiftError.getOrCreateVRegUseAt( + &I, FuncInfo.MBB, SwiftError.getFunctionArg()), EVT(TLI.getPointerTy(DL)))); } @@ -1825,7 +2055,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); - SwitchCases.push_back(CB); + SL->SwitchCases.push_back(CB); return; } } @@ -1834,7 +2064,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ; CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()), nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); - SwitchCases.push_back(CB); + SL->SwitchCases.push_back(CB); } void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, @@ -2043,27 +2273,27 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // If the compares in later blocks need to use values not currently // exported from this block, export them now. This block should always // be the first entry. - assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); + assert(SL->SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); // Allow some cases to be rejected. - if (ShouldEmitAsBranches(SwitchCases)) { - for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) { - ExportFromCurrentBlock(SwitchCases[i].CmpLHS); - ExportFromCurrentBlock(SwitchCases[i].CmpRHS); + if (ShouldEmitAsBranches(SL->SwitchCases)) { + for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) { + ExportFromCurrentBlock(SL->SwitchCases[i].CmpLHS); + ExportFromCurrentBlock(SL->SwitchCases[i].CmpRHS); } // Emit the branch for this block. - visitSwitchCase(SwitchCases[0], BrMBB); - SwitchCases.erase(SwitchCases.begin()); + visitSwitchCase(SL->SwitchCases[0], BrMBB); + SL->SwitchCases.erase(SL->SwitchCases.begin()); return; } // Okay, we decided not to do this, remove any inserted MBB's and clear // SwitchCases. - for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) - FuncInfo.MF->erase(SwitchCases[i].ThisBB); + for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) + FuncInfo.MF->erase(SL->SwitchCases[i].ThisBB); - SwitchCases.clear(); + SL->SwitchCases.clear(); } } @@ -2084,6 +2314,20 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, SDValue CondLHS = getValue(CB.CmpLHS); SDLoc dl = CB.DL; + if (CB.CC == ISD::SETTRUE) { + // Branch or fall through to TrueBB. + addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb); + SwitchBB->normalizeSuccProbs(); + if (CB.TrueBB != NextBlock(SwitchBB)) { + DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, getControlRoot(), + DAG.getBasicBlock(CB.TrueBB))); + } + return; + } + + auto &TLI = DAG.getTargetLoweringInfo(); + EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), CB.CmpLHS->getType()); + // Build the setcc now. if (!CB.CmpMHS) { // Fold "(X == true)" to X and "(X == false)" to !X to @@ -2095,8 +2339,18 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, CB.CC == ISD::SETEQ) { SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType()); Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); - } else - Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); + } else { + SDValue CondRHS = getValue(CB.CmpRHS); + + // If a pointer's DAG type is larger than its memory type then the DAG + // values are zero-extended. This breaks signed comparisons so truncate + // back to the underlying type before doing the compare. + if (CondLHS.getValueType() != MemVT) { + CondLHS = DAG.getPtrExtOrTrunc(CondLHS, getCurSDLoc(), MemVT); + CondRHS = DAG.getPtrExtOrTrunc(CondRHS, getCurSDLoc(), MemVT); + } + Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, CondRHS, CB.CC); + } } else { assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); @@ -2147,7 +2401,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, } /// visitJumpTable - Emit JumpTable node in the current MBB -void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { +void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) { // Emit the code for the jump table assert(JT.Reg != -1U && "Should lower JT Header first!"); EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); @@ -2162,14 +2416,12 @@ void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { /// visitJumpTableHeader - This function emits necessary code to produce index /// in the JumpTable from switch case. -void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, +void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB) { SDLoc dl = getCurSDLoc(); - // Subtract the lowest switch case value from the value being switched on and - // conditional branch to default mbb if the result is greater than the - // difference between smallest and largest cases. + // Subtract the lowest switch case value from the value being switched on. SDValue SwitchOp = getValue(JTH.SValue); EVT VT = SwitchOp.getValueType(); SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, @@ -2189,24 +2441,33 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; - // Emit the range check for the jump table, and branch to the default block - // for the switch statement if the value being switched on exceeds the largest - // case in the switch. - SDValue CMP = DAG.getSetCC( - dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT); - - SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, - MVT::Other, CopyTo, CMP, - DAG.getBasicBlock(JT.Default)); - - // Avoid emitting unnecessary branches to the next block. - if (JT.MBB != NextBlock(SwitchBB)) - BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, - DAG.getBasicBlock(JT.MBB)); - - DAG.setRoot(BrCond); + if (!JTH.OmitRangeCheck) { + // Emit the range check for the jump table, and branch to the default block + // for the switch statement if the value being switched on exceeds the + // largest case in the switch. + SDValue CMP = DAG.getSetCC( + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT); + + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, + MVT::Other, CopyTo, CMP, + DAG.getBasicBlock(JT.Default)); + + // Avoid emitting unnecessary branches to the next block. + if (JT.MBB != NextBlock(SwitchBB)) + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, + DAG.getBasicBlock(JT.MBB)); + + DAG.setRoot(BrCond); + } else { + // Avoid emitting unnecessary branches to the next block. + if (JT.MBB != NextBlock(SwitchBB)) + DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, CopyTo, + DAG.getBasicBlock(JT.MBB))); + else + DAG.setRoot(CopyTo); + } } /// Create a LOAD_STACK_GUARD node, and let it carry the target specific global @@ -2215,6 +2476,7 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, SDValue &Chain) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent()); MachineSDNode *Node = @@ -2227,6 +2489,8 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy)); DAG.setNodeMemRefs(Node, {MemRef}); } + if (PtrTy != PtrMemTy) + return DAG.getPtrExtOrTrunc(SDValue(Node, 0), DL, PtrMemTy); return SDValue(Node, 0); } @@ -2242,6 +2506,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, // First create the loads to the guard/stack slot for the comparison. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout()); MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo(); int FI = MFI.getStackProtectorIndex(); @@ -2254,7 +2519,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, // Generate code to load the content of the guard slot. SDValue GuardVal = DAG.getLoad( - PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, + PtrMemTy, dl, DAG.getEntryNode(), StackSlotPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align, MachineMemOperand::MOVolatile); @@ -2262,27 +2527,26 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl); // Retrieve guard check function, nullptr if instrumentation is inlined. - if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) { + if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) { // The target provides a guard check function to validate the guard value. // Generate a call to that function with the content of the guard slot as // argument. - auto *Fn = cast<Function>(GuardCheck); - FunctionType *FnTy = Fn->getFunctionType(); + FunctionType *FnTy = GuardCheckFn->getFunctionType(); assert(FnTy->getNumParams() == 1 && "Invalid function signature"); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = GuardVal; Entry.Ty = FnTy->getParamType(0); - if (Fn->hasAttribute(1, Attribute::AttrKind::InReg)) + if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg)) Entry.IsInReg = true; Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()) - .setChain(DAG.getEntryNode()) - .setCallee(Fn->getCallingConv(), FnTy->getReturnType(), - getValue(GuardCheck), std::move(Args)); + .setChain(DAG.getEntryNode()) + .setCallee(GuardCheckFn->getCallingConv(), FnTy->getReturnType(), + getValue(GuardCheckFn), std::move(Args)); std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); @@ -2298,9 +2562,9 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, const Value *IRGuard = TLI.getSDagStackGuard(M); SDValue GuardPtr = getValue(IRGuard); - Guard = - DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0), - Align, MachineMemOperand::MOVolatile); + Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr, + MachinePointerInfo(IRGuard, 0), Align, + MachineMemOperand::MOVolatile); } // Perform the comparison via a subtract/getsetcc. @@ -2339,6 +2603,12 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { SDValue Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, None, false, getCurSDLoc(), false, false).second; + // On PS4, the "return address" must still be within the calling function, + // even if it's at the very end, so emit an explicit TRAP here. + // Passing 'true' for doesNotReturn above won't generate the trap for us. + if (TM.getTargetTriple().isPS4CPU()) + Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain); + DAG.setRoot(Chain); } @@ -2493,6 +2763,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { case Intrinsic::experimental_gc_statepoint: LowerStatepoint(ImmutableStatepoint(&I), EHPadBB); break; + case Intrinsic::wasm_rethrow_in_catch: { + // This is usually done in visitTargetIntrinsic, but this intrinsic is + // special because it can be invoked, so we manually lower it to a DAG + // node here. + SmallVector<SDValue, 8> Ops; + Ops.push_back(getRoot()); // inchain + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + Ops.push_back( + DAG.getTargetConstant(Intrinsic::wasm_rethrow_in_catch, getCurSDLoc(), + TLI.getPointerTy(DAG.getDataLayout()))); + SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain + DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops)); + break; + } } } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) { // Currently we do not lower any intrinsic calls with deopt operand bundles. @@ -2528,6 +2812,35 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { InvokeMBB->normalizeSuccProbs(); // Drop into normal successor. + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), + DAG.getBasicBlock(Return))); +} + +void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { + MachineBasicBlock *CallBrMBB = FuncInfo.MBB; + + // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't + // have to do anything here to lower funclet bundles. + assert(!I.hasOperandBundlesOtherThan( + {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && + "Cannot lower callbrs with arbitrary operand bundles yet!"); + + assert(isa<InlineAsm>(I.getCalledValue()) && + "Only know how to handle inlineasm callbr"); + visitInlineAsm(&I); + + // Retrieve successors. + MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()]; + + // Update successor info. + addSuccessorWithProb(CallBrMBB, Return); + for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) { + MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)]; + addSuccessorWithProb(CallBrMBB, Target); + } + CallBrMBB->normalizeSuccProbs(); + + // Drop into default successor. DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Return))); @@ -2585,49 +2898,17 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { setValue(&LP, Res); } -void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) { -#ifndef NDEBUG - for (const CaseCluster &CC : Clusters) - assert(CC.Low == CC.High && "Input clusters must be single-case"); -#endif - - llvm::sort(Clusters, [](const CaseCluster &a, const CaseCluster &b) { - return a.Low->getValue().slt(b.Low->getValue()); - }); - - // Merge adjacent clusters with the same destination. - const unsigned N = Clusters.size(); - unsigned DstIndex = 0; - for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) { - CaseCluster &CC = Clusters[SrcIndex]; - const ConstantInt *CaseVal = CC.Low; - MachineBasicBlock *Succ = CC.MBB; - - if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ && - (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) { - // If this case has the same successor and is a neighbour, merge it into - // the previous cluster. - Clusters[DstIndex - 1].High = CaseVal; - Clusters[DstIndex - 1].Prob += CC.Prob; - } else { - std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex], - sizeof(Clusters[SrcIndex])); - } - } - Clusters.resize(DstIndex); -} - void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last) { // Update JTCases. - for (unsigned i = 0, e = JTCases.size(); i != e; ++i) - if (JTCases[i].first.HeaderBB == First) - JTCases[i].first.HeaderBB = Last; + for (unsigned i = 0, e = SL->JTCases.size(); i != e; ++i) + if (SL->JTCases[i].first.HeaderBB == First) + SL->JTCases[i].first.HeaderBB = Last; // Update BitTestCases. - for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) - if (BitTestCases[i].Parent == First) - BitTestCases[i].Parent = Last; + for (unsigned i = 0, e = SL->BitTestCases.size(); i != e; ++i) + if (SL->BitTestCases[i].Parent == First) + SL->BitTestCases[i].Parent = Last; } void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { @@ -2916,6 +3197,18 @@ void SelectionDAGBuilder::visitICmp(const User &I) { SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Opcode = getICmpCondCode(predicate); + auto &TLI = DAG.getTargetLoweringInfo(); + EVT MemVT = + TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); + + // If a pointer's DAG type is larger than its memory type then the DAG values + // are zero-extended. This breaks signed comparisons so truncate back to the + // underlying type before doing the compare. + if (Op1.getValueType() != MemVT) { + Op1 = DAG.getPtrExtOrTrunc(Op1, getCurSDLoc(), MemVT); + Op2 = DAG.getPtrExtOrTrunc(Op2, getCurSDLoc(), MemVT); + } + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); @@ -2963,6 +3256,8 @@ void SelectionDAGBuilder::visitSelect(const User &I) { ISD::NodeType OpCode = Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT; + bool IsUnaryAbs = false; + // Min/max matching is only viable if all output VTs are the same. if (is_splat(ValueVTs)) { EVT VT = ValueVTs[0]; @@ -3023,10 +3318,16 @@ void SelectionDAGBuilder::visitSelect(const User &I) { break; } break; + case SPF_ABS: + IsUnaryAbs = true; + Opc = ISD::ABS; + break; + case SPF_NABS: + // TODO: we need to produce sub(0, abs(X)). default: break; } - if (Opc != ISD::DELETED_NODE && + if (!IsUnaryAbs && Opc != ISD::DELETED_NODE && (TLI.isOperationLegalOrCustom(Opc, VT) || (UseScalarMinMax && TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) && @@ -3039,15 +3340,30 @@ void SelectionDAGBuilder::visitSelect(const User &I) { RHSVal = getValue(RHS); BaseOps = {}; } + + if (IsUnaryAbs) { + OpCode = Opc; + LHSVal = getValue(LHS); + BaseOps = {}; + } } - for (unsigned i = 0; i != NumValues; ++i) { - SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end()); - Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); - Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); - Values[i] = DAG.getNode(OpCode, getCurSDLoc(), - LHSVal.getNode()->getValueType(LHSVal.getResNo()+i), - Ops); + if (IsUnaryAbs) { + for (unsigned i = 0; i != NumValues; ++i) { + Values[i] = + DAG.getNode(OpCode, getCurSDLoc(), + LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), + SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); + } + } else { + for (unsigned i = 0; i != NumValues; ++i) { + SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end()); + Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); + Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); + Values[i] = DAG.getNode( + OpCode, getCurSDLoc(), + LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops); + } } setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), @@ -3135,18 +3451,26 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); + auto &TLI = DAG.getTargetLoweringInfo(); EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); - setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); + EVT PtrMemVT = + TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType()); + N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), PtrMemVT); + N = DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT); + setValue(&I, N); } void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), - I.getType()); - setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); + auto &TLI = DAG.getTargetLoweringInfo(); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType()); + N = DAG.getZExtOrTrunc(N, getCurSDLoc(), PtrMemVT); + N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), DestVT); + setValue(&I, N); } void SelectionDAGBuilder::visitBitCast(const User &I) { @@ -3284,12 +3608,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { MOps1[0] = Src1; MOps2[0] = Src2; - Src1 = Src1.isUndef() - ? DAG.getUNDEF(PaddedVT) - : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1); - Src2 = Src2.isUndef() - ? DAG.getUNDEF(PaddedVT) - : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2); + Src1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1); + Src2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2); // Readjust mask for new input vector length. SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1); @@ -3498,6 +3818,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace(); SDValue N = getValue(Op0); SDLoc dl = getCurSDLoc(); + auto &TLI = DAG.getTargetLoweringInfo(); + MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS); + MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS); // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. @@ -3555,6 +3878,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds()) Flags.setNoUnsignedWrap(true); + OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType()); + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags); continue; } @@ -3580,7 +3905,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { N.getValueType(), IdxN, DAG.getConstant(Amt, dl, IdxN.getValueType())); } else { - SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType()); + SDValue Scale = DAG.getConstant(ElementSize.getZExtValue(), dl, + IdxN.getValueType()); IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, Scale); } @@ -3591,6 +3917,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { } } + if (PtrMemTy != PtrTy && !cast<GEPOperator>(I).isInBounds()) + N = DAG.getPtrExtendInReg(N, dl, PtrMemTy); + setValue(&I, N); } @@ -3675,16 +4004,17 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; - bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout()); + bool isDereferenceable = + isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout()); unsigned Alignment = I.getAlignment(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); - SmallVector<EVT, 4> ValueVTs; + SmallVector<EVT, 4> ValueVTs, MemVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets); + ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -3750,12 +4080,15 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { MMOFlags |= MachineMemOperand::MODereferenceable; MMOFlags |= TLI.getMMOFlags(I); - SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A, + SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), Alignment, MMOFlags, AAInfo, Ranges); + Chains[ChainI] = L.getValue(1); + + if (MemVTs[i] != ValueVTs[i]) + L = DAG.getZExtOrTrunc(L, dl, ValueVTs[i]); Values[i] = L; - Chains[ChainI] = L.getValue(1); } if (!ConstantMemory) { @@ -3785,15 +4118,13 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { SDValue Src = getValue(SrcV); // Create a virtual register, then update the virtual register. - unsigned VReg; bool CreatedVReg; - std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I); + unsigned VReg = + SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand()); // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue // Chain can be getRoot or getControlRoot. SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg, SDValue(Src.getNode(), Src.getResNo())); DAG.setRoot(CopyNode); - if (CreatedVReg) - FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg); } void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { @@ -3826,8 +4157,7 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT SDValue L = DAG.getCopyFromReg( getRoot(), getCurSDLoc(), - FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first, - ValueVTs[0]); + SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), ValueVTs[0]); setValue(&I, L); } @@ -3854,10 +4184,10 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { } } - SmallVector<EVT, 4> ValueVTs; + SmallVector<EVT, 4> ValueVTs, MemVTs; SmallVector<uint64_t, 4> Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), - SrcV->getType(), ValueVTs, &Offsets); + SrcV->getType(), ValueVTs, &MemVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -3899,9 +4229,12 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { } SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, DAG.getConstant(Offsets[i], dl, PtrVT), Flags); - SDValue St = DAG.getStore( - Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add, - MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo); + SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i); + if (MemVTs[i] != ValueVTs[i]) + Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]); + SDValue St = + DAG.getStore(Root, dl, Val, Add, MachinePointerInfo(PtrV, Offsets[i]), + Alignment, MMOFlags, AAInfo); Chains[ChainI] = St; } @@ -4181,19 +4514,34 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); - AtomicOrdering SuccessOrder = I.getSuccessOrdering(); - AtomicOrdering FailureOrder = I.getFailureOrdering(); + AtomicOrdering SuccessOrdering = I.getSuccessOrdering(); + AtomicOrdering FailureOrdering = I.getFailureOrdering(); SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); - SDValue L = DAG.getAtomicCmpSwap( - ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, - getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), - getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), - /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID); + + auto Alignment = DAG.getEVTAlignment(MemVT); + + auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + if (I.isVolatile()) + Flags |= MachineMemOperand::MOVolatile; + Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), + Flags, MemVT.getStoreSize(), Alignment, + AAMDNodes(), nullptr, SSID, SuccessOrdering, + FailureOrdering); + + SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, + dl, MemVT, VTs, InChain, + getValue(I.getPointerOperand()), + getValue(I.getCompareOperand()), + getValue(I.getNewValOperand()), MMO); SDValue OutChain = L.getValue(2); @@ -4217,20 +4565,32 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break; case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break; case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; + case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break; + case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break; } - AtomicOrdering Order = I.getOrdering(); + AtomicOrdering Ordering = I.getOrdering(); SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); + auto MemVT = getValue(I.getValOperand()).getSimpleValueType(); + auto Alignment = DAG.getEVTAlignment(MemVT); + + auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + if (I.isVolatile()) + Flags |= MachineMemOperand::MOVolatile; + Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags, + MemVT.getStoreSize(), Alignment, AAMDNodes(), + nullptr, SSID, Ordering); + SDValue L = - DAG.getAtomic(NT, dl, - getValue(I.getValOperand()).getSimpleValueType(), - InChain, - getValue(I.getPointerOperand()), - getValue(I.getValOperand()), - I.getPointerOperand(), - /* Alignment=*/ 0, Order, SSID); + DAG.getAtomic(NT, dl, MemVT, InChain, + getValue(I.getPointerOperand()), getValue(I.getValOperand()), + MMO); SDValue OutChain = L.getValue(1); @@ -4259,27 +4619,39 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType()); if (!TLI.supportsUnalignedAtomics() && - I.getAlignment() < VT.getStoreSize()) + I.getAlignment() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); + auto Flags = MachineMemOperand::MOLoad; + if (I.isVolatile()) + Flags |= MachineMemOperand::MOVolatile; + if (I.getMetadata(LLVMContext::MD_invariant_load) != nullptr) + Flags |= MachineMemOperand::MOInvariant; + if (isDereferenceablePointer(I.getPointerOperand(), I.getType(), + DAG.getDataLayout())) + Flags |= MachineMemOperand::MODereferenceable; + + Flags |= TLI.getMMOFlags(I); + MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), - MachineMemOperand::MOVolatile | - MachineMemOperand::MOLoad, - VT.getStoreSize(), + Flags, MemVT.getStoreSize(), I.getAlignment() ? I.getAlignment() : - DAG.getEVTAlignment(VT), + DAG.getEVTAlignment(MemVT), AAMDNodes(), nullptr, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = - DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, + DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, getValue(I.getPointerOperand()), MMO); SDValue OutChain = L.getValue(1); + if (MemVT != VT) + L = DAG.getPtrExtOrTrunc(L, dl, VT); setValue(&I, L); DAG.setRoot(OutChain); @@ -4288,25 +4660,36 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDLoc dl = getCurSDLoc(); - AtomicOrdering Order = I.getOrdering(); + AtomicOrdering Ordering = I.getOrdering(); SyncScope::ID SSID = I.getSyncScopeID(); SDValue InChain = getRoot(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT VT = - TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); + EVT MemVT = + TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); - if (I.getAlignment() < VT.getStoreSize()) + if (I.getAlignment() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); - SDValue OutChain = - DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, - InChain, - getValue(I.getPointerOperand()), - getValue(I.getValueOperand()), - I.getPointerOperand(), I.getAlignment(), - Order, SSID); + auto Flags = MachineMemOperand::MOStore; + if (I.isVolatile()) + Flags |= MachineMemOperand::MOVolatile; + Flags |= TLI.getMMOFlags(I); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags, + MemVT.getStoreSize(), I.getAlignment(), AAMDNodes(), + nullptr, SSID, Ordering); + + SDValue Val = getValue(I.getValueOperand()); + if (Val.getValueType() != MemVT) + Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT); + + SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain, + getValue(I.getPointerOperand()), Val, MMO); + DAG.setRoot(OutChain); } @@ -4364,10 +4747,12 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, SDValue Result; if (IsTgtIntrinsic) { // This is target intrinsic that touches memory - Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, - Ops, Info.memVT, - MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, - Info.flags, Info.size); + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + Result = + DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, + MachinePointerInfo(Info.ptrVal, Info.offset), + Info.align, Info.flags, Info.size, AAInfo); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -4889,7 +5274,7 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, return DAG.getConstantFP(1.0, DL, LHS.getValueType()); const Function &F = DAG.getMachineFunction().getFunction(); - if (!F.optForSize() || + if (!F.hasOptSize() || // If optimizing for size, don't insert too many multiplies. // This inserts up to 5 multiplies. countPopulation(Val) + Log2_32(Val) < 7) { @@ -4952,6 +5337,71 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (!Arg) return false; + if (!IsDbgDeclare) { + // ArgDbgValues are hoisted to the beginning of the entry block. So we + // should only emit as ArgDbgValue if the dbg.value intrinsic is found in + // the entry block. + bool IsInEntryBlock = FuncInfo.MBB == &FuncInfo.MF->front(); + if (!IsInEntryBlock) + return false; + + // ArgDbgValues are hoisted to the beginning of the entry block. So we + // should only emit as ArgDbgValue if the dbg.value intrinsic describes a + // variable that also is a param. + // + // Although, if we are at the top of the entry block already, we can still + // emit using ArgDbgValue. This might catch some situations when the + // dbg.value refers to an argument that isn't used in the entry block, so + // any CopyToReg node would be optimized out and the only way to express + // this DBG_VALUE is by using the physical reg (or FI) as done in this + // method. ArgDbgValues are hoisted to the beginning of the entry block. So + // we should only emit as ArgDbgValue if the Variable is an argument to the + // current function, and the dbg.value intrinsic is found in the entry + // block. + bool VariableIsFunctionInputArg = Variable->isParameter() && + !DL->getInlinedAt(); + bool IsInPrologue = SDNodeOrder == LowestSDNodeOrder; + if (!IsInPrologue && !VariableIsFunctionInputArg) + return false; + + // Here we assume that a function argument on IR level only can be used to + // describe one input parameter on source level. If we for example have + // source code like this + // + // struct A { long x, y; }; + // void foo(struct A a, long b) { + // ... + // b = a.x; + // ... + // } + // + // and IR like this + // + // define void @foo(i32 %a1, i32 %a2, i32 %b) { + // entry: + // call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment + // call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment + // call void @llvm.dbg.value(metadata i32 %b, "b", + // ... + // call void @llvm.dbg.value(metadata i32 %a1, "b" + // ... + // + // then the last dbg.value is describing a parameter "b" using a value that + // is an argument. But since we already has used %a1 to describe a parameter + // we should not handle that last dbg.value here (that would result in an + // incorrect hoisting of the DBG_VALUE to the function entry). + // Notice that we allow one dbg.value per IR level argument, to accomodate + // for the situation with fragments above. + if (VariableIsFunctionInputArg) { + unsigned ArgNo = Arg->getArgNo(); + if (ArgNo >= FuncInfo.DescribedArgs.size()) + FuncInfo.DescribedArgs.resize(ArgNo + 1, false); + else if (!IsInPrologue && FuncInfo.DescribedArgs.test(ArgNo)) + return false; + FuncInfo.DescribedArgs.set(ArgNo); + } + } + MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); @@ -4976,12 +5426,14 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } } - if (!Op && N.getNode()) + if (!Op && N.getNode()) { // Check if frame index is available. - if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) + SDValue LCandidate = peekThroughBitcasts(N); + if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(LCandidate.getNode())) if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) Op = MachineOperand::CreateFI(FINode->getIndex()); + } if (!Op) { // Check if ValueMap has reg number. @@ -5055,11 +5507,29 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, # define setjmp_undefined_for_msvc #endif -/// Lower the call to the specified intrinsic function. If we want to emit this -/// as a call to a named external function, return the name. Otherwise, lower it -/// and return null. -const char * -SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { +static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) { + switch (Intrinsic) { + case Intrinsic::smul_fix: + return ISD::SMULFIX; + case Intrinsic::umul_fix: + return ISD::UMULFIX; + default: + llvm_unreachable("Unhandled fixed point intrinsic"); + } +} + +void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I, + const char *FunctionName) { + assert(FunctionName && "FunctionName must not be nullptr"); + SDValue Callee = DAG.getExternalSymbol( + FunctionName, + DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); + LowerCallTo(&I, Callee, I.isTailCall()); +} + +/// Lower the call to the specified intrinsic function. +void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, + unsigned Intrinsic) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc sdl = getCurSDLoc(); DebugLoc dl = getCurDebugLoc(); @@ -5069,28 +5539,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { default: // By default, turn this into a target intrinsic node. visitTargetIntrinsic(I, Intrinsic); - return nullptr; - case Intrinsic::vastart: visitVAStart(I); return nullptr; - case Intrinsic::vaend: visitVAEnd(I); return nullptr; - case Intrinsic::vacopy: visitVACopy(I); return nullptr; + return; + case Intrinsic::vastart: visitVAStart(I); return; + case Intrinsic::vaend: visitVAEnd(I); return; + case Intrinsic::vacopy: visitVACopy(I); return; case Intrinsic::returnaddress: setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); - return nullptr; + return; case Intrinsic::addressofreturnaddress: setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()))); - return nullptr; + return; case Intrinsic::sponentry: setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl, TLI.getPointerTy(DAG.getDataLayout()))); - return nullptr; + return; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); - return nullptr; + return; case Intrinsic::read_register: { Value *Reg = I.getArgOperand(0); SDValue Chain = getRoot(); @@ -5101,7 +5571,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.getVTList(VT, MVT::Other), Chain, RegName); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); - return nullptr; + return; } case Intrinsic::write_register: { Value *Reg = I.getArgOperand(0); @@ -5111,12 +5581,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, RegName, getValue(RegValue))); - return nullptr; + return; } case Intrinsic::setjmp: - return &"_setjmp"[!TLI.usesUnderscoreSetJmp()]; + lowerCallToExternalSymbol(I, &"_setjmp"[!TLI.usesUnderscoreSetJmp()]); + return; case Intrinsic::longjmp: - return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; + lowerCallToExternalSymbol(I, &"_longjmp"[!TLI.usesUnderscoreLongJmp()]); + return; case Intrinsic::memcpy: { const auto &MCI = cast<MemCpyInst>(I); SDValue Op1 = getValue(I.getArgOperand(0)); @@ -5135,7 +5607,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); updateDAGForMaybeTailCall(MC); - return nullptr; + return; } case Intrinsic::memset: { const auto &MSI = cast<MemSetInst>(I); @@ -5149,7 +5621,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, isTC, MachinePointerInfo(I.getArgOperand(0))); updateDAGForMaybeTailCall(MS); - return nullptr; + return; } case Intrinsic::memmove: { const auto &MMI = cast<MemMoveInst>(I); @@ -5168,7 +5640,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); updateDAGForMaybeTailCall(MM); - return nullptr; + return; } case Intrinsic::memcpy_element_unordered_atomic: { const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I); @@ -5186,7 +5658,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MachinePointerInfo(MI.getRawDest()), MachinePointerInfo(MI.getRawSource())); updateDAGForMaybeTailCall(MC); - return nullptr; + return; } case Intrinsic::memmove_element_unordered_atomic: { auto &MI = cast<AtomicMemMoveInst>(I); @@ -5204,7 +5676,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MachinePointerInfo(MI.getRawDest()), MachinePointerInfo(MI.getRawSource())); updateDAGForMaybeTailCall(MC); - return nullptr; + return; } case Intrinsic::memset_element_unordered_atomic: { auto &MI = cast<AtomicMemSetInst>(I); @@ -5220,7 +5692,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { LengthTy, ElemSz, isTC, MachinePointerInfo(MI.getRawDest())); updateDAGForMaybeTailCall(MC); - return nullptr; + return; } case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: { @@ -5235,7 +5707,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Address || isa<UndefValue>(Address) || (Address->use_empty() && !isa<Argument>(Address))) { LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - return nullptr; + return; } bool isParameter = Variable->isParameter() || isa<Argument>(Address); @@ -5264,7 +5736,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder); DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter); } - return nullptr; + return; } SDValue &N = NodeMap[Address]; @@ -5286,7 +5758,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N); - return nullptr; + return; } else { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), true, dl, SDNodeOrder); @@ -5300,7 +5772,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } - return nullptr; + return; } case Intrinsic::dbg_label: { const DbgLabelInst &DI = cast<DbgLabelInst>(I); @@ -5310,7 +5782,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDDbgLabel *SDV; SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder); DAG.AddDbgLabel(SDV); - return nullptr; + return; } case Intrinsic::dbg_value: { const DbgValueInst &DI = cast<DbgValueInst>(I); @@ -5321,88 +5793,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { dropDanglingDebugInfo(Variable, Expression); const Value *V = DI.getValue(); if (!V) - return nullptr; - - SDDbgValue *SDV; - if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) || - isa<ConstantPointerNull>(V)) { - SDV = DAG.getConstantDbgValue(Variable, Expression, V, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, nullptr, false); - return nullptr; - } - - // Do not use getValue() in here; we don't want to generate code at - // this point if it hasn't been done yet. - SDValue N = NodeMap[V]; - if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map. - N = UnusedArgNodeMap[V]; - if (N.getNode()) { - if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, false, N)) - return nullptr; - SDV = getDbgValue(N, Variable, Expression, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, N.getNode(), false); - return nullptr; - } - - // PHI nodes have already been selected, so we should know which VReg that - // is assigns to already. - if (isa<PHINode>(V)) { - auto VMI = FuncInfo.ValueMap.find(V); - if (VMI != FuncInfo.ValueMap.end()) { - unsigned Reg = VMI->second; - // The PHI node may be split up into several MI PHI nodes (in - // FunctionLoweringInfo::set). - RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, - V->getType(), None); - if (RFV.occupiesMultipleRegs()) { - unsigned Offset = 0; - unsigned BitsToDescribe = 0; - if (auto VarSize = Variable->getSizeInBits()) - BitsToDescribe = *VarSize; - if (auto Fragment = Expression->getFragmentInfo()) - BitsToDescribe = Fragment->SizeInBits; - for (auto RegAndSize : RFV.getRegsAndSizes()) { - unsigned RegisterSize = RegAndSize.second; - // Bail out if all bits are described already. - if (Offset >= BitsToDescribe) - break; - unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe) - ? BitsToDescribe - Offset - : RegisterSize; - auto FragmentExpr = DIExpression::createFragmentExpression( - Expression, Offset, FragmentSize); - if (!FragmentExpr) - continue; - SDV = DAG.getVRegDbgValue(Variable, *FragmentExpr, RegAndSize.first, - false, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, nullptr, false); - Offset += RegisterSize; - } - } else { - SDV = DAG.getVRegDbgValue(Variable, Expression, Reg, false, dl, - SDNodeOrder); - DAG.AddDbgValue(SDV, nullptr, false); - } - return nullptr; - } - } + return; - // TODO: When we get here we will either drop the dbg.value completely, or - // we try to move it forward by letting it dangle for awhile. So we should - // probably add an extra DbgValue to the DAG here, with a reference to - // "noreg", to indicate that we have lost the debug location for the - // variable. + if (handleDebugValue(V, Variable, Expression, dl, DI.getDebugLoc(), + SDNodeOrder)) + return; - if (!V->use_empty() ) { - // Do not call getValue(V) yet, as we don't want to generate code. - // Remember it for later. - DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder); - return nullptr; - } + // TODO: Dangling debug info will eventually either be resolved or produce + // an Undef DBG_VALUE. However in the resolution case, a gap may appear + // between the original dbg.value location and its resolved DBG_VALUE, which + // we should ideally fill with an extra Undef DBG_VALUE. - LLVM_DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); - LLVM_DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); - return nullptr; + DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder); + return; } case Intrinsic::eh_typeid_for: { @@ -5411,7 +5814,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV); Res = DAG.getConstant(TypeID, sdl, MVT::i32); setValue(&I, Res); - return nullptr; + return; } case Intrinsic::eh_return_i32: @@ -5422,15 +5825,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getControlRoot(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); - return nullptr; + return; case Intrinsic::eh_unwind_init: DAG.getMachineFunction().setCallsUnwindInit(true); - return nullptr; + return; case Intrinsic::eh_dwarf_cfa: setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); - return nullptr; + return; case Intrinsic::eh_sjlj_callsite: { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0)); @@ -5438,7 +5841,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); MMI.setCurrentCallSite(CI->getZExtValue()); - return nullptr; + return; } case Intrinsic::eh_sjlj_functioncontext: { // Get and store the index of the function context. @@ -5447,7 +5850,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts()); int FI = FuncInfo.StaticAllocaMap[FnCtx]; MFI.setFunctionContextIndex(FI); - return nullptr; + return; } case Intrinsic::eh_sjlj_setjmp: { SDValue Ops[2]; @@ -5457,34 +5860,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.getVTList(MVT::i32, MVT::Other), Ops); setValue(&I, Op.getValue(0)); DAG.setRoot(Op.getValue(1)); - return nullptr; + return; } case Intrinsic::eh_sjlj_longjmp: DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, getRoot(), getValue(I.getArgOperand(0)))); - return nullptr; + return; case Intrinsic::eh_sjlj_setup_dispatch: DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other, getRoot())); - return nullptr; + return; case Intrinsic::masked_gather: visitMaskedGather(I); - return nullptr; + return; case Intrinsic::masked_load: visitMaskedLoad(I); - return nullptr; + return; case Intrinsic::masked_scatter: visitMaskedScatter(I); - return nullptr; + return; case Intrinsic::masked_store: visitMaskedStore(I); - return nullptr; + return; case Intrinsic::masked_expandload: visitMaskedLoad(I, true /* IsExpanding */); - return nullptr; + return; case Intrinsic::masked_compressstore: visitMaskedStore(I, true /* IsCompressing */); - return nullptr; + return; case Intrinsic::x86_mmx_pslli_w: case Intrinsic::x86_mmx_pslli_d: case Intrinsic::x86_mmx_pslli_q: @@ -5496,7 +5899,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue ShAmt = getValue(I.getArgOperand(1)); if (isa<ConstantSDNode>(ShAmt)) { visitTargetIntrinsic(I, Intrinsic); - return nullptr; + return; } unsigned NewIntrinsic = 0; EVT ShAmtVT = MVT::v2i32; @@ -5542,31 +5945,31 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.getConstant(NewIntrinsic, sdl, MVT::i32), getValue(I.getArgOperand(0)), ShAmt); setValue(&I, Res); - return nullptr; + return; } case Intrinsic::powi: setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); - return nullptr; + return; case Intrinsic::log: setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); - return nullptr; + return; case Intrinsic::log2: setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); - return nullptr; + return; case Intrinsic::log10: setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); - return nullptr; + return; case Intrinsic::exp: setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); - return nullptr; + return; case Intrinsic::exp2: setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); - return nullptr; + return; case Intrinsic::pow: setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG, TLI)); - return nullptr; + return; case Intrinsic::sqrt: case Intrinsic::fabs: case Intrinsic::sin: @@ -5597,61 +6000,71 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, DAG.getNode(Opcode, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); - return nullptr; + return; } - case Intrinsic::minnum: { - auto VT = getValue(I.getArgOperand(0)).getValueType(); - unsigned Opc = - I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT) - ? ISD::FMINIMUM - : ISD::FMINNUM; - setValue(&I, DAG.getNode(Opc, sdl, VT, + case Intrinsic::lround: + case Intrinsic::llround: + case Intrinsic::lrint: + case Intrinsic::llrint: { + unsigned Opcode; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::lround: Opcode = ISD::LROUND; break; + case Intrinsic::llround: Opcode = ISD::LLROUND; break; + case Intrinsic::lrint: Opcode = ISD::LRINT; break; + case Intrinsic::llrint: Opcode = ISD::LLRINT; break; + } + + EVT RetVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + setValue(&I, DAG.getNode(Opcode, sdl, RetVT, + getValue(I.getArgOperand(0)))); + return; + } + case Intrinsic::minnum: + setValue(&I, DAG.getNode(ISD::FMINNUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); - return nullptr; - } - case Intrinsic::maxnum: { - auto VT = getValue(I.getArgOperand(0)).getValueType(); - unsigned Opc = - I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT) - ? ISD::FMAXIMUM - : ISD::FMAXNUM; - setValue(&I, DAG.getNode(Opc, sdl, VT, + return; + case Intrinsic::maxnum: + setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); - return nullptr; - } + return; case Intrinsic::minimum: setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); - return nullptr; + return; case Intrinsic::maximum: setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); - return nullptr; + return; case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); - return nullptr; + return; case Intrinsic::fma: setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); - return nullptr; + return; case Intrinsic::experimental_constrained_fadd: case Intrinsic::experimental_constrained_fsub: case Intrinsic::experimental_constrained_fmul: case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptrunc: + case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: case Intrinsic::experimental_constrained_pow: case Intrinsic::experimental_constrained_powi: @@ -5671,7 +6084,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_trunc: visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I)); - return nullptr; + return; case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && @@ -5693,7 +6106,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(2))); setValue(&I, Add); } - return nullptr; + return; } case Intrinsic::convert_to_fp16: setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, @@ -5701,17 +6114,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)), DAG.getTargetConstant(0, sdl, MVT::i32)))); - return nullptr; + return; case Intrinsic::convert_from_fp16: setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl, TLI.getValueType(DAG.getDataLayout(), I.getType()), DAG.getNode(ISD::BITCAST, sdl, MVT::f16, getValue(I.getArgOperand(0))))); - return nullptr; + return; case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); - return nullptr; + return; } case Intrinsic::readcyclecounter: { SDValue Op = getRoot(); @@ -5719,25 +6132,25 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.getVTList(MVT::i64, MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); - return nullptr; + return; } case Intrinsic::bitreverse: setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); - return nullptr; + return; case Intrinsic::bswap: setValue(&I, DAG.getNode(ISD::BSWAP, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); - return nullptr; + return; case Intrinsic::cttz: { SDValue Arg = getValue(I.getArgOperand(0)); ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, sdl, Ty, Arg)); - return nullptr; + return; } case Intrinsic::ctlz: { SDValue Arg = getValue(I.getArgOperand(0)); @@ -5745,13 +6158,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, sdl, Ty, Arg)); - return nullptr; + return; } case Intrinsic::ctpop: { SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); - return nullptr; + return; } case Intrinsic::fshl: case Intrinsic::fshr: { @@ -5767,7 +6180,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR; if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) { setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z)); - return nullptr; + return; } // When X == Y, this is rotate. If the data type has a power-of-2 size, we @@ -5777,7 +6190,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR; if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) { setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z)); - return nullptr; + return; } // Some targets only rotate one way. Try the opposite direction. @@ -5786,7 +6199,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Negate the shift amount because it is safe to ignore the high bits. SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z); setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt)); - return nullptr; + return; } // fshl (rotl): (X << (Z % BW)) | (X >> ((0 - Z) % BW)) @@ -5796,7 +6209,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : NShAmt); SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, X, IsFSHL ? NShAmt : ShAmt); setValue(&I, DAG.getNode(ISD::OR, sdl, VT, ShX, ShY)); - return nullptr; + return; } // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) @@ -5816,39 +6229,48 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // For fshr, 0-shift returns the 2nd arg (Y). SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ); setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or)); - return nullptr; + return; } case Intrinsic::sadd_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2)); - return nullptr; + return; } case Intrinsic::uadd_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2)); - return nullptr; + return; } case Intrinsic::ssub_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2)); - return nullptr; + return; } case Intrinsic::usub_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2)); - return nullptr; + return; } - case Intrinsic::smul_fix: { + case Intrinsic::smul_fix: + case Intrinsic::umul_fix: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); - setValue(&I, - DAG.getNode(ISD::SMULFIX, sdl, Op1.getValueType(), Op1, Op2, Op3)); - return nullptr; + setValue(&I, DAG.getNode(FixedPointIntrinsicToOpcode(Intrinsic), sdl, + Op1.getValueType(), Op1, Op2, Op3)); + return; + } + case Intrinsic::smul_fix_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + setValue(&I, DAG.getNode(ISD::SMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2, + Op3)); + return; } case Intrinsic::stacksave: { SDValue Op = getRoot(); @@ -5857,26 +6279,26 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); - return nullptr; + return; } case Intrinsic::stackrestore: Res = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); - return nullptr; + return; case Intrinsic::get_dynamic_area_offset: { SDValue Op = getRoot(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); // Result type for @llvm.get.dynamic.area.offset should match PtrTy for // target. - if (PtrTy != ResTy) + if (PtrTy.getSizeInBits() < ResTy.getSizeInBits()) report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset" " intrinsic!"); Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy), Op); DAG.setRoot(Op); setValue(&I, Res); - return nullptr; + return; } case Intrinsic::stackguard: { EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); @@ -5896,7 +6318,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Res = TLI.emitStackGuardXorFP(DAG, Res, sdl); DAG.setRoot(Chain); setValue(&I, Res); - return nullptr; + return; } case Intrinsic::stackprotector: { // Emit code into the DAG to store the stack guard onto the stack. @@ -5923,7 +6345,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { /* Alignment = */ 0, MachineMemOperand::MOVolatile); setValue(&I, Res); DAG.setRoot(Res); - return nullptr; + return; } case Intrinsic::objectsize: { // If we don't know by now, we're never going to know. @@ -5940,14 +6362,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Res = DAG.getConstant(0, sdl, Ty); setValue(&I, Res); - return nullptr; + return; } case Intrinsic::is_constant: // If this wasn't constant-folded away by now, then it's not a // constant. setValue(&I, DAG.getConstant(0, sdl, MVT::i1)); - return nullptr; + return; case Intrinsic::annotation: case Intrinsic::ptr_annotation: @@ -5955,12 +6377,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::strip_invariant_group: // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); - return nullptr; + return; case Intrinsic::assume: case Intrinsic::var_annotation: case Intrinsic::sideeffect: // Discard annotate attributes, assumptions, and artificial side-effects. - return nullptr; + return; case Intrinsic::codeview_annotation: { // Emit a label associated with this metadata. @@ -5971,7 +6393,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MF.addCodeViewAnnotation(Label, cast<MDNode>(MD)); Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label); DAG.setRoot(Res); - return nullptr; + return; } case Intrinsic::init_trampoline: { @@ -5988,13 +6410,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops); DAG.setRoot(Res); - return nullptr; + return; } case Intrinsic::adjust_trampoline: setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); - return nullptr; + return; case Intrinsic::gcroot: { assert(DAG.getMachineFunction().getFunction().hasGC() && "only valid in functions with gc specified, enforced by Verifier"); @@ -6004,19 +6426,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); GFI->addStackRoot(FI->getIndex(), TypeMap); - return nullptr; + return; } case Intrinsic::gcread: case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); - return nullptr; + return; case Intrinsic::expect: // Just replace __builtin_expect(exp, c) with EXP. setValue(&I, getValue(I.getArgOperand(0))); - return nullptr; + return; case Intrinsic::debugtrap: case Intrinsic::trap: { @@ -6028,7 +6450,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? ISD::TRAP : ISD::DEBUGTRAP; DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); - return nullptr; + return; } TargetLowering::ArgListTy Args; @@ -6041,7 +6463,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); - return nullptr; + return; } case Intrinsic::uadd_with_overflow: @@ -6063,9 +6485,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); - SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); + EVT ResultVT = Op1.getValueType(); + EVT OverflowVT = MVT::i1; + if (ResultVT.isVector()) + OverflowVT = EVT::getVectorVT( + *Context, OverflowVT, ResultVT.getVectorNumElements()); + + SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT); setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2)); - return nullptr; + return; } case Intrinsic::prefetch: { SDValue Ops[5]; @@ -6088,21 +6516,24 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { PendingLoads.push_back(Result); Result = getRoot(); DAG.setRoot(Result); - return nullptr; + return; } case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: { bool IsStart = (Intrinsic == Intrinsic::lifetime_start); // Stack coloring is not enabled in O0, discard region information. if (TM.getOptLevel() == CodeGenOpt::None) - return nullptr; + return; - SmallVector<Value *, 4> Allocas; - GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL); + const int64_t ObjectSize = + cast<ConstantInt>(I.getArgOperand(0))->getSExtValue(); + Value *const ObjectPtr = I.getArgOperand(1); + SmallVector<const Value *, 4> Allocas; + GetUnderlyingObjects(ObjectPtr, Allocas, *DL); - for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), + for (SmallVectorImpl<const Value*>::iterator Object = Allocas.begin(), E = Allocas.end(); Object != E; ++Object) { - AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); + const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); // Could not find an Alloca. if (!LifetimeObject) @@ -6112,49 +6543,50 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // valid frame index. auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject); if (SI == FuncInfo.StaticAllocaMap.end()) - return nullptr; - - int FI = SI->second; - - SDValue Ops[2]; - Ops[0] = getRoot(); - Ops[1] = - DAG.getFrameIndex(FI, TLI.getFrameIndexTy(DAG.getDataLayout()), true); - unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); + return; - Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); + const int FrameIndex = SI->second; + int64_t Offset; + if (GetPointerBaseWithConstantOffset( + ObjectPtr, Offset, DAG.getDataLayout()) != LifetimeObject) + Offset = -1; // Cannot determine offset from alloca to lifetime object. + Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex, ObjectSize, + Offset); DAG.setRoot(Res); } - return nullptr; + return; } case Intrinsic::invariant_start: // Discard region information. setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); - return nullptr; + return; case Intrinsic::invariant_end: // Discard region information. - return nullptr; + return; case Intrinsic::clear_cache: - return TLI.getClearCacheBuiltinName(); + /// FunctionName may be null. + if (const char *FunctionName = TLI.getClearCacheBuiltinName()) + lowerCallToExternalSymbol(I, FunctionName); + return; case Intrinsic::donothing: // ignore - return nullptr; + return; case Intrinsic::experimental_stackmap: visitStackmap(I); - return nullptr; + return; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: visitPatchpoint(&I); - return nullptr; + return; case Intrinsic::experimental_gc_statepoint: LowerStatepoint(ImmutableStatepoint(&I)); - return nullptr; + return; case Intrinsic::experimental_gc_result: visitGCResult(cast<GCResultInst>(I)); - return nullptr; + return; case Intrinsic::experimental_gc_relocate: visitGCRelocate(cast<GCRelocateInst>(I)); - return nullptr; + return; case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); case Intrinsic::instrprof_value_profile: @@ -6182,7 +6614,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { .addFrameIndex(FI); } - return nullptr; + return; } case Intrinsic::localrecover: { @@ -6211,7 +6643,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal); setValue(&I, Add); - return nullptr; + return; } case Intrinsic::eh_exceptionpointer: @@ -6226,7 +6658,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (Intrinsic == Intrinsic::eh_exceptioncode) N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); setValue(&I, N); - return nullptr; + return; } case Intrinsic::xray_customevent: { // Here we want to make sure that the intrinsic behaves as if it has a @@ -6234,7 +6666,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // FIXME: Support other platforms later. const auto &Triple = DAG.getTarget().getTargetTriple(); if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) - return nullptr; + return; SDLoc DL = getCurSDLoc(); SmallVector<SDValue, 8> Ops; @@ -6257,7 +6689,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue patchableNode = SDValue(MN, 0); DAG.setRoot(patchableNode); setValue(&I, patchableNode); - return nullptr; + return; } case Intrinsic::xray_typedevent: { // Here we want to make sure that the intrinsic behaves as if it has a @@ -6265,7 +6697,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // FIXME: Support other platforms later. const auto &Triple = DAG.getTarget().getTargetTriple(); if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) - return nullptr; + return; SDLoc DL = getCurSDLoc(); SmallVector<SDValue, 8> Ops; @@ -6292,14 +6724,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue patchableNode = SDValue(MN, 0); DAG.setRoot(patchableNode); setValue(&I, patchableNode); - return nullptr; + return; } case Intrinsic::experimental_deoptimize: LowerDeoptimizeCall(&I); - return nullptr; + return; - case Intrinsic::experimental_vector_reduce_fadd: - case Intrinsic::experimental_vector_reduce_fmul: + case Intrinsic::experimental_vector_reduce_v2_fadd: + case Intrinsic::experimental_vector_reduce_v2_fmul: case Intrinsic::experimental_vector_reduce_add: case Intrinsic::experimental_vector_reduce_mul: case Intrinsic::experimental_vector_reduce_and: @@ -6312,11 +6744,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::experimental_vector_reduce_fmax: case Intrinsic::experimental_vector_reduce_fmin: visitVectorReduce(I, Intrinsic); - return nullptr; + return; case Intrinsic::icall_branch_funnel: { SmallVector<SDValue, 16> Ops; - Ops.push_back(DAG.getRoot()); Ops.push_back(getValue(I.getArgOperand(0))); int64_t Offset; @@ -6359,20 +6790,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops.push_back(T.Target); } + Ops.push_back(DAG.getRoot()); // Chain SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, getCurSDLoc(), MVT::Other, Ops), 0); DAG.setRoot(N); setValue(&I, N); HasTailCall = true; - return nullptr; + return; } case Intrinsic::wasm_landingpad_index: // Information this intrinsic contained has been transferred to // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely // delete it now. - return nullptr; + return; + + case Intrinsic::aarch64_settag: + case Intrinsic::aarch64_settag_zero: { + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); + bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero; + SDValue Val = TSI.EmitTargetCodeForSetTag( + DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)), + ZeroMemory); + DAG.setRoot(Val); + setValue(&I, Val); + return; + } } } @@ -6400,6 +6845,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_fma: Opcode = ISD::STRICT_FMA; break; + case Intrinsic::experimental_constrained_fptrunc: + Opcode = ISD::STRICT_FP_ROUND; + break; + case Intrinsic::experimental_constrained_fpext: + Opcode = ISD::STRICT_FP_EXTEND; + break; case Intrinsic::experimental_constrained_sqrt: Opcode = ISD::STRICT_FSQRT; break; @@ -6463,7 +6914,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( SDVTList VTs = DAG.getVTList(ValueVTs); SDValue Result; - if (FPI.isUnaryOp()) + if (Opcode == ISD::STRICT_FP_ROUND) + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)), + DAG.getTargetConstant(0, sdl, + TLI.getPointerTy(DAG.getDataLayout())) }); + else if (FPI.isUnaryOp()) Result = DAG.getNode(Opcode, sdl, VTs, { Chain, getValue(FPI.getArgOperand(0)) }); else if (FPI.isTernaryOp()) @@ -6476,6 +6932,13 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( { Chain, getValue(FPI.getArgOperand(0)), getValue(FPI.getArgOperand(1)) }); + if (FPI.getExceptionBehavior() != + ConstrainedFPIntrinsic::ExceptionBehavior::ebIgnore) { + SDNodeFlags Flags; + Flags.setFPExcept(true); + Result->setFlags(Flags); + } + assert(Result.getNode()->getNumValues() == 2); SDValue OutChain = Result.getValue(1); DAG.setRoot(OutChain); @@ -6596,11 +7059,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SwiftErrorVal = V; // We find the virtual register for the actual swifterror argument. // Instead of using the Value, we use the virtual register instead. - Entry.Node = DAG.getRegister(FuncInfo - .getOrCreateSwiftErrorVRegUseAt( - CS.getInstruction(), FuncInfo.MBB, V) - .first, - EVT(TLI.getPointerTy(DL))); + Entry.Node = DAG.getRegister( + SwiftError.getOrCreateVRegUseAt(CS.getInstruction(), FuncInfo.MBB, V), + EVT(TLI.getPointerTy(DL))); } Args.push_back(Entry); @@ -6641,13 +7102,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, if (SwiftErrorVal && TLI.supportSwiftError()) { // Get the last element of InVals. SDValue Src = CLI.InVals.back(); - unsigned VReg; bool CreatedVReg; - std::tie(VReg, CreatedVReg) = - FuncInfo.getOrCreateSwiftErrorVRegDefAt(CS.getInstruction()); + unsigned VReg = SwiftError.getOrCreateVRegDefAt( + CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal); SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); - // We update the virtual register for the actual swifterror argument. - if (CreatedVReg) - FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg); DAG.setRoot(CopyNode); } } @@ -6995,10 +7452,6 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - computeUsesVAFloatArgument(I, MMI); - - const char *RenameFn = nullptr; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { // Is this an LLVM intrinsic or a target-specific intrinsic? @@ -7008,9 +7461,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { IID = II->getIntrinsicID(F); if (IID) { - RenameFn = visitIntrinsicCall(I, IID); - if (!RenameFn) - return; + visitIntrinsicCall(I, IID); + return; } } @@ -7159,20 +7611,14 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { } } - SDValue Callee; - if (!RenameFn) - Callee = getValue(I.getCalledValue()); - else - Callee = DAG.getExternalSymbol( - RenameFn, - DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); - // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. assert(!I.hasOperandBundlesOtherThan( {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && "Cannot lower calls with arbitrary operand bundles!"); + SDValue Callee = getValue(I.getCalledValue()); + if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) LowerCallSiteWithDeoptBundle(&I, Callee, nullptr); else @@ -7328,8 +7774,9 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL)); - Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot, - MachinePointerInfo::getFixedStack(MF, SSFI)); + Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot, + MachinePointerInfo::getFixedStack(MF, SSFI), + TLI.getMemValueType(DL, Ty)); OpInfo.CallOperand = StackSlot; return Chain; @@ -7353,6 +7800,10 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, SmallVector<unsigned, 4> Regs; const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + // No work to do for memory operations. + if (OpInfo.ConstraintType == TargetLowering::C_Memory) + return; + // If this is a constraint for a single physreg, or a constraint for a // register class, find it. unsigned AssignedReg; @@ -7435,7 +7886,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, for (; NumRegs; --NumRegs, ++I) { assert(I != RC->end() && "Ran out of registers to allocate!"); - auto R = (AssignedReg) ? *I : RegInfo.createVirtualRegister(RC); + Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RC); Regs.push_back(R); } @@ -7509,9 +7960,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints( DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS); - bool hasMemory = false; - - // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore + // First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack, + // AsmDialect, MayLoad, MayStore). + bool HasSideEffect = IA->hasSideEffects(); ExtraFlags ExtraInfo(CS); unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. @@ -7527,7 +7978,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Process the call argument. BasicBlocks are labels, currently appearing // only in asm's. - if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { + const Instruction *I = CS.getInstruction(); + if (isa<CallBrInst>(I) && + (ArgNo - 1) >= (cast<CallBrInst>(I)->getNumArgOperands() - + cast<CallBrInst>(I)->getNumIndirectDests())) { + const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal); + EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true); + OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT); + } else if (const auto *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); } else { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); @@ -7554,8 +8012,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.ConstraintVT = MVT::Other; } - if (!hasMemory) - hasMemory = OpInfo.hasMemory(TLI); + if (!HasSideEffect) + HasSideEffect = OpInfo.hasMemory(TLI); // Determine if this InlineAsm MayLoad or MayStore based on the constraints. // FIXME: Could we compute this on OpInfo rather than T? @@ -7566,17 +8024,20 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { ExtraInfo.update(T); } - SDValue Chain, Flag; // We won't need to flush pending loads if this asm doesn't touch // memory and is nonvolatile. - if (hasMemory || IA->hasSideEffects()) - Chain = getRoot(); - else - Chain = DAG.getRoot(); + SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot(); + + bool IsCallBr = isa<CallBrInst>(CS.getInstruction()); + if (IsCallBr) { + // If this is a callbr we need to flush pending exports since inlineasm_br + // is a terminator. We need to do this before nodes are glued to + // the inlineasm_br node. + Chain = getControlRoot(); + } - // Second pass over the constraints: compute which constraint option to use - // and assign registers to constraints that want a specific physreg. + // Second pass over the constraints: compute which constraint option to use. for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { // If this is an output operand with a matching input operand, look up the // matching input. If their types mismatch, e.g. one is an integer, the @@ -7612,28 +8073,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.isIndirect = true; } - // If this constraint is for a specific register, allocate it before - // anything else. - SDISelAsmOperandInfo &RefOpInfo = - OpInfo.isMatchingInputConstraint() - ? ConstraintOperands[OpInfo.getMatchedOperand()] - : OpInfo; - if (RefOpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); - } - - // Third pass - Loop over all of the operands, assigning virtual or physregs - // to register class operands. - for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { - SDISelAsmOperandInfo &RefOpInfo = - OpInfo.isMatchingInputConstraint() - ? ConstraintOperands[OpInfo.getMatchedOperand()] - : OpInfo; - - // C_Register operands have already been allocated, Other/Memory don't need - // to be. - if (RefOpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -7653,21 +8092,21 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands.push_back(DAG.getTargetConstant( ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); - // Loop over all of the inputs, copying the operand values into the - // appropriate registers and processing the output regs. - RegsForValue RetValRegs; - - // IndirectStoresToEmit - The set of stores to emit after the inline asm node. - std::vector<std::pair<RegsForValue, Value *>> IndirectStoresToEmit; - + // Third pass: Loop over operands to prepare DAG-level operands.. As part of + // this, assign virtual and physical registers for inputs and otput. for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { + // Assign Registers. + SDISelAsmOperandInfo &RefOpInfo = + OpInfo.isMatchingInputConstraint() + ? ConstraintOperands[OpInfo.getMatchedOperand()] + : OpInfo; + GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); + switch (OpInfo.Type) { case InlineAsm::isOutput: - if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && - OpInfo.ConstraintType != TargetLowering::C_Register) { - // Memory output, or 'other' output (e.g. 'X' constraint). - assert(OpInfo.isIndirect && "Memory output must be indirect operand"); - + if (OpInfo.ConstraintType == TargetLowering::C_Memory || + (OpInfo.ConstraintType == TargetLowering::C_Other && + OpInfo.isIndirect)) { unsigned ConstraintID = TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); assert(ConstraintID != InlineAsm::Constraint_Unknown && @@ -7680,38 +8119,27 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { MVT::i32)); AsmNodeOperands.push_back(OpInfo.CallOperand); break; - } - - // Otherwise, this is a register or register class output. - - // Copy the output from the appropriate register. Find a register that - // we can use. - if (OpInfo.AssignedRegs.Regs.empty()) { - emitInlineAsmError( - CS, "couldn't allocate output register for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); - return; - } + } else if ((OpInfo.ConstraintType == TargetLowering::C_Other && + !OpInfo.isIndirect) || + OpInfo.ConstraintType == TargetLowering::C_Register || + OpInfo.ConstraintType == TargetLowering::C_RegisterClass) { + // Otherwise, this outputs to a register (directly for C_Register / + // C_RegisterClass, and a target-defined fashion for C_Other). Find a + // register that we can use. + if (OpInfo.AssignedRegs.Regs.empty()) { + emitInlineAsmError( + CS, "couldn't allocate output register for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; + } - // If this is an indirect operand, store through the pointer after the - // asm. - if (OpInfo.isIndirect) { - IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs, - OpInfo.CallOperandVal)); - } else { - // This is the result value of the call. - assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); - // Concatenate this output onto the outputs list. - RetValRegs.append(OpInfo.AssignedRegs); + // Add information to the INLINEASM node to know that this register is + // set. + OpInfo.AssignedRegs.AddInlineAsmOperands( + OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber + : InlineAsm::Kind_RegDef, + false, 0, getCurSDLoc(), DAG, AsmNodeOperands); } - - // Add information to the INLINEASM node to know that this register is - // set. - OpInfo.AssignedRegs - .AddInlineAsmOperands(OpInfo.isEarlyClobber - ? InlineAsm::Kind_RegDefEarlyClobber - : InlineAsm::Kind_RegDef, - false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; case InlineAsm::isInput: { @@ -7865,98 +8293,117 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; if (Flag.getNode()) AsmNodeOperands.push_back(Flag); - Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(), + unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR : ISD::INLINEASM; + Chain = DAG.getNode(ISDOpc, getCurSDLoc(), DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); Flag = Chain.getValue(1); - // If this asm returns a register value, copy the result from that register - // and set it as the value of the call. - if (!RetValRegs.Regs.empty()) { - SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), - Chain, &Flag, CS.getInstruction()); - - llvm::Type *CSResultType = CS.getType(); - unsigned numRet; - ArrayRef<Type *> ResultTypes; - SmallVector<SDValue, 1> ResultValues(1); - if (StructType *StructResult = dyn_cast<StructType>(CSResultType)) { - numRet = StructResult->getNumElements(); - assert(Val->getNumOperands() == numRet && - "Mismatch in number of output operands in asm result"); - ResultTypes = StructResult->elements(); - ArrayRef<SDUse> ValueUses = Val->ops(); - ResultValues.resize(numRet); - std::transform(ValueUses.begin(), ValueUses.end(), ResultValues.begin(), - [](const SDUse &u) -> SDValue { return u.get(); }); - } else { - numRet = 1; - ResultValues[0] = Val; - ResultTypes = makeArrayRef(CSResultType); - } - SmallVector<EVT, 1> ResultVTs(numRet); - for (unsigned i = 0; i < numRet; i++) { - EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), ResultTypes[i]); - SDValue Val = ResultValues[i]; - assert(ResultTypes[i]->isSized() && "Unexpected unsized type"); - // If the type of the inline asm call site return value is different but - // has same size as the type of the asm output bitcast it. One example - // of this is for vectors with different width / number of elements. - // This can happen for register classes that can contain multiple - // different value types. The preg or vreg allocated may not have the - // same VT as was expected. - // - // This can also happen for a return value that disagrees with the - // register class it is put in, eg. a double in a general-purpose - // register on a 32-bit machine. - if (ResultVT != Val.getValueType() && - ResultVT.getSizeInBits() == Val.getValueSizeInBits()) - Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, Val); - else if (ResultVT != Val.getValueType() && ResultVT.isInteger() && - Val.getValueType().isInteger()) { - // If a result value was tied to an input value, the computed result - // may have a wider width than the expected result. Extract the - // relevant portion. - Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, Val); - } + // Do additional work to generate outputs. - assert(ResultVT == Val.getValueType() && "Asm result value mismatch!"); - ResultVTs[i] = ResultVT; - ResultValues[i] = Val; - } + SmallVector<EVT, 1> ResultVTs; + SmallVector<SDValue, 1> ResultValues; + SmallVector<SDValue, 8> OutChains; - Val = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(ResultVTs), ResultValues); - setValue(CS.getInstruction(), Val); - // Don't need to use this as a chain in this case. - if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty()) - return; - } + llvm::Type *CSResultType = CS.getType(); + ArrayRef<Type *> ResultTypes; + if (StructType *StructResult = dyn_cast<StructType>(CSResultType)) + ResultTypes = StructResult->elements(); + else if (!CSResultType->isVoidTy()) + ResultTypes = makeArrayRef(CSResultType); + + auto CurResultType = ResultTypes.begin(); + auto handleRegAssign = [&](SDValue V) { + assert(CurResultType != ResultTypes.end() && "Unexpected value"); + assert((*CurResultType)->isSized() && "Unexpected unsized type"); + EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), *CurResultType); + ++CurResultType; + // If the type of the inline asm call site return value is different but has + // same size as the type of the asm output bitcast it. One example of this + // is for vectors with different width / number of elements. This can + // happen for register classes that can contain multiple different value + // types. The preg or vreg allocated may not have the same VT as was + // expected. + // + // This can also happen for a return value that disagrees with the register + // class it is put in, eg. a double in a general-purpose register on a + // 32-bit machine. + if (ResultVT != V.getValueType() && + ResultVT.getSizeInBits() == V.getValueSizeInBits()) + V = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, V); + else if (ResultVT != V.getValueType() && ResultVT.isInteger() && + V.getValueType().isInteger()) { + // If a result value was tied to an input value, the computed result + // may have a wider width than the expected result. Extract the + // relevant portion. + V = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, V); + } + assert(ResultVT == V.getValueType() && "Asm result value mismatch!"); + ResultVTs.push_back(ResultVT); + ResultValues.push_back(V); + }; - std::vector<std::pair<SDValue, const Value *>> StoresToEmit; + // Deal with output operands. + for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { + if (OpInfo.Type == InlineAsm::isOutput) { + SDValue Val; + // Skip trivial output operands. + if (OpInfo.AssignedRegs.Regs.empty()) + continue; - // Process indirect outputs, first output all of the flagged copies out of - // physregs. - for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { - RegsForValue &OutRegs = IndirectStoresToEmit[i].first; - const Value *Ptr = IndirectStoresToEmit[i].second; - SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), - Chain, &Flag, IA); - StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); + switch (OpInfo.ConstraintType) { + case TargetLowering::C_Register: + case TargetLowering::C_RegisterClass: + Val = OpInfo.AssignedRegs.getCopyFromRegs( + DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); + break; + case TargetLowering::C_Other: + Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(), + OpInfo, DAG); + break; + case TargetLowering::C_Memory: + break; // Already handled. + case TargetLowering::C_Unknown: + assert(false && "Unexpected unknown constraint"); + } + + // Indirect output manifest as stores. Record output chains. + if (OpInfo.isIndirect) { + const Value *Ptr = OpInfo.CallOperandVal; + assert(Ptr && "Expected value CallOperandVal for indirect asm operand"); + SDValue Store = DAG.getStore(Chain, getCurSDLoc(), Val, getValue(Ptr), + MachinePointerInfo(Ptr)); + OutChains.push_back(Store); + } else { + // generate CopyFromRegs to associated registers. + assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); + if (Val.getOpcode() == ISD::MERGE_VALUES) { + for (const SDValue &V : Val->op_values()) + handleRegAssign(V); + } else + handleRegAssign(Val); + } + } } - // Emit the non-flagged stores from the physregs. - SmallVector<SDValue, 8> OutChains; - for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) { - SDValue Val = DAG.getStore(Chain, getCurSDLoc(), StoresToEmit[i].first, - getValue(StoresToEmit[i].second), - MachinePointerInfo(StoresToEmit[i].second)); - OutChains.push_back(Val); + // Set results. + if (!ResultValues.empty()) { + assert(CurResultType == ResultTypes.end() && + "Mismatch in number of ResultTypes"); + assert(ResultValues.size() == ResultTypes.size() && + "Mismatch in number of output operands in asm result"); + + SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + DAG.getVTList(ResultVTs), ResultValues); + setValue(CS.getInstruction(), V); } + // Collect store chains. if (!OutChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains); - DAG.setRoot(Chain); + // Only Update Root if inline assembly has a memory effect. + if (ResultValues.empty() || HasSideEffect || !OutChains.empty() || IsCallBr) + DAG.setRoot(Chain); } void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS, @@ -7989,12 +8436,16 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) { void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const DataLayout &DL = DAG.getDataLayout(); - SDValue V = DAG.getVAArg(TLI.getValueType(DAG.getDataLayout(), I.getType()), - getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), - DAG.getSrcValue(I.getOperand(0)), - DL.getABITypeAlignment(I.getType())); - setValue(&I, V); + SDValue V = DAG.getVAArg( + TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(), + getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), + DL.getABITypeAlignment(I.getType())); DAG.setRoot(V.getValue(1)); + + if (I.getType()->isPointerTy()) + V = DAG.getPtrExtOrTrunc( + V, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType())); + setValue(&I, V); } void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { @@ -8021,7 +8472,7 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, return Op; ConstantRange CR = getConstantRangeFromMetadata(*Range); - if (CR.isFullSet() || CR.isEmptySet() || CR.isWrappedSet()) + if (CR.isFullSet() || CR.isEmptySet() || CR.isUpperWrapped()) return Op; APInt Lo = CR.getUnsignedMin(); @@ -8058,7 +8509,7 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, /// convention or require stack pointer adjustment. Only a subset of the /// intrinsic's operands need to participate in the calling convention. void SelectionDAGBuilder::populateCallLoweringInfo( - TargetLowering::CallLoweringInfo &CLI, ImmutableCallSite CS, + TargetLowering::CallLoweringInfo &CLI, const CallBase *Call, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy, bool IsPatchPoint) { TargetLowering::ArgListTy Args; @@ -8068,21 +8519,21 @@ void SelectionDAGBuilder::populateCallLoweringInfo( // Attributes for args start at offset 1, after the return attribute. for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) { - const Value *V = CS->getOperand(ArgI); + const Value *V = Call->getOperand(ArgI); assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); TargetLowering::ArgListEntry Entry; Entry.Node = getValue(V); Entry.Ty = V->getType(); - Entry.setAttributes(&CS, ArgI); + Entry.setAttributes(Call, ArgI); Args.push_back(Entry); } CLI.setDebugLoc(getCurSDLoc()) .setChain(getRoot()) - .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args)) - .setDiscardResult(CS->use_empty()) + .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args)) + .setDiscardResult(Call->use_empty()) .setIsPatchPoint(IsPatchPoint); } @@ -8093,7 +8544,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo( /// avoid constant materialization and register allocation. /// /// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not -/// generate addess computation nodes, and so ExpandISelPseudo can convert the +/// generate addess computation nodes, and so FinalizeISel can convert the /// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids /// address materialization and register allocation, but may also be required /// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an @@ -8226,8 +8677,8 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); TargetLowering::CallLoweringInfo CLI(DAG); - populateCallLoweringInfo(CLI, CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, - true); + populateCallLoweringInfo(CLI, cast<CallBase>(CS.getInstruction()), + NumMetaOpers, NumCallArgs, Callee, ReturnTy, true); std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); SDNode *CallEnd = Result.second.getNode(); @@ -8351,15 +8802,17 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, FMF = I.getFastMathFlags(); switch (Intrinsic) { - case Intrinsic::experimental_vector_reduce_fadd: - if (FMF.isFast()) - Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2); + case Intrinsic::experimental_vector_reduce_v2_fadd: + if (FMF.allowReassoc()) + Res = DAG.getNode(ISD::FADD, dl, VT, Op1, + DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2)); else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); break; - case Intrinsic::experimental_vector_reduce_fmul: - if (FMF.isFast()) - Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2); + case Intrinsic::experimental_vector_reduce_v2_fmul: + if (FMF.allowReassoc()) + Res = DAG.getNode(ISD::FMUL, dl, VT, Op1, + DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2)); else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); break; @@ -8433,8 +8886,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { if (CLI.IsPostTypeLegalization) { // If we are lowering a libcall after legalization, split the return type. - SmallVector<EVT, 4> OldRetTys = std::move(RetTys); - SmallVector<uint64_t, 4> OldOffsets = std::move(Offsets); + SmallVector<EVT, 4> OldRetTys; + SmallVector<uint64_t, 4> OldOffsets; + RetTys.swap(OldRetTys); + Offsets.swap(OldOffsets); + for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) { EVT RetVT = OldRetTys[i]; uint64_t Offset = OldOffsets[i]; @@ -8489,7 +8945,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // points into the callers stack frame. CLI.IsTailCall = false; } else { + bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( + CLI.RetTy, CLI.CallConv, CLI.IsVarArg); for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + ISD::ArgFlagsTy Flags; + if (NeedsRegBlock) { + Flags.setInConsecutiveRegs(); + if (I == RetTys.size() - 1) + Flags.setInConsecutiveRegsLast(); + } EVT VT = RetTys[I]; MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), CLI.CallConv, VT); @@ -8497,9 +8961,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { CLI.CallConv, VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; + MyFlags.Flags = Flags; MyFlags.VT = RegisterVT; MyFlags.ArgVT = VT; MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetTy->isPointerTy()) { + MyFlags.Flags.setPointer(); + MyFlags.Flags.setPointerAddrSpace( + cast<PointerType>(CLI.RetTy)->getAddressSpace()); + } if (CLI.RetSExt) MyFlags.Flags.setSExt(); if (CLI.RetZExt) @@ -8550,6 +9020,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // specify the alignment it wants. unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL); + if (Args[i].Ty->isPointerTy()) { + Flags.setPointer(); + Flags.setPointerAddrSpace( + cast<PointerType>(Args[i].Ty)->getAddressSpace()); + } if (Args[i].IsZExt) Flags.setZExt(); if (Args[i].IsSExt) @@ -8587,8 +9062,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { if (Args[i].IsByVal || Args[i].IsInAlloca) { PointerType *Ty = cast<PointerType>(Args[i].Ty); Type *ElementTy = Ty->getElementType(); - Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); - // For ByVal, alignment should come from FE. BE will guess if this + + unsigned FrameSize = DL.getTypeAllocSize( + Args[i].ByValType ? Args[i].ByValType : ElementTy); + Flags.setByValSize(FrameSize); + // info is not there but there are cases it cannot get right. unsigned FrameAlign; if (Args[i].Alignment) @@ -8619,8 +9097,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // for now. if (Args[i].IsReturned && !Op.getValueType().isVector() && CanLowerReturn) { - assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues && - "unexpected use of 'returned'"); + assert((CLI.RetTy == Args[i].Ty || + (CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() && + CLI.RetTy->getPointerAddressSpace() == + Args[i].Ty->getPointerAddressSpace())) && + RetTys.size() == NumValues && "unexpected use of 'returned'"); // Before passing 'returned' to the target lowering code, ensure that // either the register MVT and the actual EVT are the same size or that // the return value and argument are extended in the same way; in these @@ -9023,7 +9504,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { unsigned PartBase = 0; Type *FinalType = Arg.getType(); if (Arg.hasAttribute(Attribute::ByVal)) - FinalType = cast<PointerType>(FinalType)->getElementType(); + FinalType = Arg.getParamByValType(); bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( FinalType, F.getCallingConv(), F.isVarArg()); for (unsigned Value = 0, NumValues = ValueVTs.size(); @@ -9038,6 +9519,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { unsigned OriginalAlignment = TLI->getABIAlignmentForCallingConv(ArgTy, DL); + if (Arg.getType()->isPointerTy()) { + Flags.setPointer(); + Flags.setPointerAddrSpace( + cast<PointerType>(Arg.getType())->getAddressSpace()); + } if (Arg.hasAttribute(Attribute::ZExt)) Flags.setZExt(); if (Arg.hasAttribute(Attribute::SExt)) @@ -9078,11 +9564,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setByVal(); } if (Flags.isByVal() || Flags.isInAlloca()) { - PointerType *Ty = cast<PointerType>(Arg.getType()); - Type *ElementTy = Ty->getElementType(); - Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); - // For ByVal, alignment should be passed from FE. BE will guess if - // this info is not there but there are cases it cannot get right. + Type *ElementTy = Arg.getParamByValType(); + + // For ByVal, size and alignment should be passed from FE. BE will + // guess if this info is not there but there are cases it cannot get + // right. + unsigned FrameSize = DL.getTypeAllocSize(Arg.getParamByValType()); + Flags.setByValSize(FrameSize); + unsigned FrameAlign; if (Arg.getParamAlignment()) FrameAlign = Arg.getParamAlignment(); @@ -9263,17 +9752,16 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) { unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) - FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, - FuncInfo->SwiftErrorArg, Reg); + SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(), + Reg); } // If this argument is live outside of the entry block, insert a copy from // wherever we got it to the vreg that other BB's will reference it as. - if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { + if (Res.getOpcode() == ISD::CopyFromReg) { // If we can, though, try to skip creating an unnecessary vreg. // FIXME: This isn't very clean... it would be nice to make this more - // general. It's also subtly incompatible with the hacks FastISel - // uses with vregs. + // general. unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { FuncInfo->ValueMap[&Arg] = Reg; @@ -9354,7 +9842,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { if (const Constant *C = dyn_cast<Constant>(PHIOp)) { unsigned &RegOut = ConstantsOut[C]; if (RegOut == 0) { - RegOut = FuncInfo.CreateRegs(C->getType()); + RegOut = FuncInfo.CreateRegs(C); CopyValueToVirtualRegister(C, RegOut); } Reg = RegOut; @@ -9367,7 +9855,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { assert(isa<AllocaInst>(PHIOp) && FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && "Didn't codegen value into a register!??"); - Reg = FuncInfo.CreateRegs(PHIOp->getType()); + Reg = FuncInfo.CreateRegs(PHIOp); CopyValueToVirtualRegister(PHIOp, Reg); } } @@ -9432,450 +9920,6 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { HasTailCall = true; } -uint64_t -SelectionDAGBuilder::getJumpTableRange(const CaseClusterVector &Clusters, - unsigned First, unsigned Last) const { - assert(Last >= First); - const APInt &LowCase = Clusters[First].Low->getValue(); - const APInt &HighCase = Clusters[Last].High->getValue(); - assert(LowCase.getBitWidth() == HighCase.getBitWidth()); - - // FIXME: A range of consecutive cases has 100% density, but only requires one - // comparison to lower. We should discriminate against such consecutive ranges - // in jump tables. - - return (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100) + 1; -} - -uint64_t SelectionDAGBuilder::getJumpTableNumCases( - const SmallVectorImpl<unsigned> &TotalCases, unsigned First, - unsigned Last) const { - assert(Last >= First); - assert(TotalCases[Last] >= TotalCases[First]); - uint64_t NumCases = - TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]); - return NumCases; -} - -bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters, - unsigned First, unsigned Last, - const SwitchInst *SI, - MachineBasicBlock *DefaultMBB, - CaseCluster &JTCluster) { - assert(First <= Last); - - auto Prob = BranchProbability::getZero(); - unsigned NumCmps = 0; - std::vector<MachineBasicBlock*> Table; - DenseMap<MachineBasicBlock*, BranchProbability> JTProbs; - - // Initialize probabilities in JTProbs. - for (unsigned I = First; I <= Last; ++I) - JTProbs[Clusters[I].MBB] = BranchProbability::getZero(); - - for (unsigned I = First; I <= Last; ++I) { - assert(Clusters[I].Kind == CC_Range); - Prob += Clusters[I].Prob; - const APInt &Low = Clusters[I].Low->getValue(); - const APInt &High = Clusters[I].High->getValue(); - NumCmps += (Low == High) ? 1 : 2; - if (I != First) { - // Fill the gap between this and the previous cluster. - const APInt &PreviousHigh = Clusters[I - 1].High->getValue(); - assert(PreviousHigh.slt(Low)); - uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1; - for (uint64_t J = 0; J < Gap; J++) - Table.push_back(DefaultMBB); - } - uint64_t ClusterSize = (High - Low).getLimitedValue() + 1; - for (uint64_t J = 0; J < ClusterSize; ++J) - Table.push_back(Clusters[I].MBB); - JTProbs[Clusters[I].MBB] += Clusters[I].Prob; - } - - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - unsigned NumDests = JTProbs.size(); - if (TLI.isSuitableForBitTests( - NumDests, NumCmps, Clusters[First].Low->getValue(), - Clusters[Last].High->getValue(), DAG.getDataLayout())) { - // Clusters[First..Last] should be lowered as bit tests instead. - return false; - } - - // Create the MBB that will load from and jump through the table. - // Note: We create it here, but it's not inserted into the function yet. - MachineFunction *CurMF = FuncInfo.MF; - MachineBasicBlock *JumpTableMBB = - CurMF->CreateMachineBasicBlock(SI->getParent()); - - // Add successors. Note: use table order for determinism. - SmallPtrSet<MachineBasicBlock *, 8> Done; - for (MachineBasicBlock *Succ : Table) { - if (Done.count(Succ)) - continue; - addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]); - Done.insert(Succ); - } - JumpTableMBB->normalizeSuccProbs(); - - unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding()) - ->createJumpTableIndex(Table); - - // Set up the jump table info. - JumpTable JT(-1U, JTI, JumpTableMBB, nullptr); - JumpTableHeader JTH(Clusters[First].Low->getValue(), - Clusters[Last].High->getValue(), SI->getCondition(), - nullptr, false); - JTCases.emplace_back(std::move(JTH), std::move(JT)); - - JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High, - JTCases.size() - 1, Prob); - return true; -} - -void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, - const SwitchInst *SI, - MachineBasicBlock *DefaultMBB) { -#ifndef NDEBUG - // Clusters must be non-empty, sorted, and only contain Range clusters. - assert(!Clusters.empty()); - for (CaseCluster &C : Clusters) - assert(C.Kind == CC_Range); - for (unsigned i = 1, e = Clusters.size(); i < e; ++i) - assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue())); -#endif - - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!TLI.areJTsAllowed(SI->getParent()->getParent())) - return; - - const int64_t N = Clusters.size(); - const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries(); - const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2; - - if (N < 2 || N < MinJumpTableEntries) - return; - - // TotalCases[i]: Total nbr of cases in Clusters[0..i]. - SmallVector<unsigned, 8> TotalCases(N); - for (unsigned i = 0; i < N; ++i) { - const APInt &Hi = Clusters[i].High->getValue(); - const APInt &Lo = Clusters[i].Low->getValue(); - TotalCases[i] = (Hi - Lo).getLimitedValue() + 1; - if (i != 0) - TotalCases[i] += TotalCases[i - 1]; - } - - // Cheap case: the whole range may be suitable for jump table. - uint64_t Range = getJumpTableRange(Clusters,0, N - 1); - uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1); - assert(NumCases < UINT64_MAX / 100); - assert(Range >= NumCases); - if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) { - CaseCluster JTCluster; - if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { - Clusters[0] = JTCluster; - Clusters.resize(1); - return; - } - } - - // The algorithm below is not suitable for -O0. - if (TM.getOptLevel() == CodeGenOpt::None) - return; - - // Split Clusters into minimum number of dense partitions. The algorithm uses - // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code - // for the Case Statement'" (1994), but builds the MinPartitions array in - // reverse order to make it easier to reconstruct the partitions in ascending - // order. In the choice between two optimal partitionings, it picks the one - // which yields more jump tables. - - // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. - SmallVector<unsigned, 8> MinPartitions(N); - // LastElement[i] is the last element of the partition starting at i. - SmallVector<unsigned, 8> LastElement(N); - // PartitionsScore[i] is used to break ties when choosing between two - // partitionings resulting in the same number of partitions. - SmallVector<unsigned, 8> PartitionsScore(N); - // For PartitionsScore, a small number of comparisons is considered as good as - // a jump table and a single comparison is considered better than a jump - // table. - enum PartitionScores : unsigned { - NoTable = 0, - Table = 1, - FewCases = 1, - SingleCase = 2 - }; - - // Base case: There is only one way to partition Clusters[N-1]. - MinPartitions[N - 1] = 1; - LastElement[N - 1] = N - 1; - PartitionsScore[N - 1] = PartitionScores::SingleCase; - - // Note: loop indexes are signed to avoid underflow. - for (int64_t i = N - 2; i >= 0; i--) { - // Find optimal partitioning of Clusters[i..N-1]. - // Baseline: Put Clusters[i] into a partition on its own. - MinPartitions[i] = MinPartitions[i + 1] + 1; - LastElement[i] = i; - PartitionsScore[i] = PartitionsScore[i + 1] + PartitionScores::SingleCase; - - // Search for a solution that results in fewer partitions. - for (int64_t j = N - 1; j > i; j--) { - // Try building a partition from Clusters[i..j]. - uint64_t Range = getJumpTableRange(Clusters, i, j); - uint64_t NumCases = getJumpTableNumCases(TotalCases, i, j); - assert(NumCases < UINT64_MAX / 100); - assert(Range >= NumCases); - if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) { - unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); - unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1]; - int64_t NumEntries = j - i + 1; - - if (NumEntries == 1) - Score += PartitionScores::SingleCase; - else if (NumEntries <= SmallNumberOfEntries) - Score += PartitionScores::FewCases; - else if (NumEntries >= MinJumpTableEntries) - Score += PartitionScores::Table; - - // If this leads to fewer partitions, or to the same number of - // partitions with better score, it is a better partitioning. - if (NumPartitions < MinPartitions[i] || - (NumPartitions == MinPartitions[i] && Score > PartitionsScore[i])) { - MinPartitions[i] = NumPartitions; - LastElement[i] = j; - PartitionsScore[i] = Score; - } - } - } - } - - // Iterate over the partitions, replacing some with jump tables in-place. - unsigned DstIndex = 0; - for (unsigned First = 0, Last; First < N; First = Last + 1) { - Last = LastElement[First]; - assert(Last >= First); - assert(DstIndex <= First); - unsigned NumClusters = Last - First + 1; - - CaseCluster JTCluster; - if (NumClusters >= MinJumpTableEntries && - buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) { - Clusters[DstIndex++] = JTCluster; - } else { - for (unsigned I = First; I <= Last; ++I) - std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I])); - } - } - Clusters.resize(DstIndex); -} - -bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, - unsigned First, unsigned Last, - const SwitchInst *SI, - CaseCluster &BTCluster) { - assert(First <= Last); - if (First == Last) - return false; - - BitVector Dests(FuncInfo.MF->getNumBlockIDs()); - unsigned NumCmps = 0; - for (int64_t I = First; I <= Last; ++I) { - assert(Clusters[I].Kind == CC_Range); - Dests.set(Clusters[I].MBB->getNumber()); - NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2; - } - unsigned NumDests = Dests.count(); - - APInt Low = Clusters[First].Low->getValue(); - APInt High = Clusters[Last].High->getValue(); - assert(Low.slt(High)); - - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - const DataLayout &DL = DAG.getDataLayout(); - if (!TLI.isSuitableForBitTests(NumDests, NumCmps, Low, High, DL)) - return false; - - APInt LowBound; - APInt CmpRange; - - const int BitWidth = TLI.getPointerTy(DL).getSizeInBits(); - assert(TLI.rangeFitsInWord(Low, High, DL) && - "Case range must fit in bit mask!"); - - // Check if the clusters cover a contiguous range such that no value in the - // range will jump to the default statement. - bool ContiguousRange = true; - for (int64_t I = First + 1; I <= Last; ++I) { - if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) { - ContiguousRange = false; - break; - } - } - - if (Low.isStrictlyPositive() && High.slt(BitWidth)) { - // Optimize the case where all the case values fit in a word without having - // to subtract minValue. In this case, we can optimize away the subtraction. - LowBound = APInt::getNullValue(Low.getBitWidth()); - CmpRange = High; - ContiguousRange = false; - } else { - LowBound = Low; - CmpRange = High - Low; - } - - CaseBitsVector CBV; - auto TotalProb = BranchProbability::getZero(); - for (unsigned i = First; i <= Last; ++i) { - // Find the CaseBits for this destination. - unsigned j; - for (j = 0; j < CBV.size(); ++j) - if (CBV[j].BB == Clusters[i].MBB) - break; - if (j == CBV.size()) - CBV.push_back( - CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero())); - CaseBits *CB = &CBV[j]; - - // Update Mask, Bits and ExtraProb. - uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue(); - uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue(); - assert(Hi >= Lo && Hi < 64 && "Invalid bit case!"); - CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo; - CB->Bits += Hi - Lo + 1; - CB->ExtraProb += Clusters[i].Prob; - TotalProb += Clusters[i].Prob; - } - - BitTestInfo BTI; - llvm::sort(CBV, [](const CaseBits &a, const CaseBits &b) { - // Sort by probability first, number of bits second, bit mask third. - if (a.ExtraProb != b.ExtraProb) - return a.ExtraProb > b.ExtraProb; - if (a.Bits != b.Bits) - return a.Bits > b.Bits; - return a.Mask < b.Mask; - }); - - for (auto &CB : CBV) { - MachineBasicBlock *BitTestBB = - FuncInfo.MF->CreateMachineBasicBlock(SI->getParent()); - BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb)); - } - BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), - SI->getCondition(), -1U, MVT::Other, false, - ContiguousRange, nullptr, nullptr, std::move(BTI), - TotalProb); - - BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, - BitTestCases.size() - 1, TotalProb); - return true; -} - -void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, - const SwitchInst *SI) { -// Partition Clusters into as few subsets as possible, where each subset has a -// range that fits in a machine word and has <= 3 unique destinations. - -#ifndef NDEBUG - // Clusters must be sorted and contain Range or JumpTable clusters. - assert(!Clusters.empty()); - assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable); - for (const CaseCluster &C : Clusters) - assert(C.Kind == CC_Range || C.Kind == CC_JumpTable); - for (unsigned i = 1; i < Clusters.size(); ++i) - assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue())); -#endif - - // The algorithm below is not suitable for -O0. - if (TM.getOptLevel() == CodeGenOpt::None) - return; - - // If target does not have legal shift left, do not emit bit tests at all. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - const DataLayout &DL = DAG.getDataLayout(); - - EVT PTy = TLI.getPointerTy(DL); - if (!TLI.isOperationLegal(ISD::SHL, PTy)) - return; - - int BitWidth = PTy.getSizeInBits(); - const int64_t N = Clusters.size(); - - // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. - SmallVector<unsigned, 8> MinPartitions(N); - // LastElement[i] is the last element of the partition starting at i. - SmallVector<unsigned, 8> LastElement(N); - - // FIXME: This might not be the best algorithm for finding bit test clusters. - - // Base case: There is only one way to partition Clusters[N-1]. - MinPartitions[N - 1] = 1; - LastElement[N - 1] = N - 1; - - // Note: loop indexes are signed to avoid underflow. - for (int64_t i = N - 2; i >= 0; --i) { - // Find optimal partitioning of Clusters[i..N-1]. - // Baseline: Put Clusters[i] into a partition on its own. - MinPartitions[i] = MinPartitions[i + 1] + 1; - LastElement[i] = i; - - // Search for a solution that results in fewer partitions. - // Note: the search is limited by BitWidth, reducing time complexity. - for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) { - // Try building a partition from Clusters[i..j]. - - // Check the range. - if (!TLI.rangeFitsInWord(Clusters[i].Low->getValue(), - Clusters[j].High->getValue(), DL)) - continue; - - // Check nbr of destinations and cluster types. - // FIXME: This works, but doesn't seem very efficient. - bool RangesOnly = true; - BitVector Dests(FuncInfo.MF->getNumBlockIDs()); - for (int64_t k = i; k <= j; k++) { - if (Clusters[k].Kind != CC_Range) { - RangesOnly = false; - break; - } - Dests.set(Clusters[k].MBB->getNumber()); - } - if (!RangesOnly || Dests.count() > 3) - break; - - // Check if it's a better partition. - unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); - if (NumPartitions < MinPartitions[i]) { - // Found a better partition. - MinPartitions[i] = NumPartitions; - LastElement[i] = j; - } - } - } - - // Iterate over the partitions, replacing with bit-test clusters in-place. - unsigned DstIndex = 0; - for (unsigned First = 0, Last; First < N; First = Last + 1) { - Last = LastElement[First]; - assert(First <= Last); - assert(DstIndex <= First); - - CaseCluster BitTestCluster; - if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) { - Clusters[DstIndex++] = BitTestCluster; - } else { - size_t NumClusters = Last - First + 1; - std::memmove(&Clusters[DstIndex], &Clusters[First], - sizeof(Clusters[0]) * NumClusters); - DstIndex += NumClusters; - } - } - Clusters.resize(DstIndex); -} - void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, MachineBasicBlock *SwitchMBB, MachineBasicBlock *DefaultMBB) { @@ -9977,10 +10021,13 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, MachineBasicBlock *CurMBB = W.MBB; for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) { + bool FallthroughUnreachable = false; MachineBasicBlock *Fallthrough; if (I == W.LastCluster) { // For the last cluster, fall through to the default destination. Fallthrough = DefaultMBB; + FallthroughUnreachable = isa<UnreachableInst>( + DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg()); } else { Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock()); CurMF->insert(BBI, Fallthrough); @@ -9992,8 +10039,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, switch (I->Kind) { case CC_JumpTable: { // FIXME: Optimize away range check based on pivot comparisons. - JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first; - JumpTable *JT = &JTCases[I->JTCasesIndex].second; + JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first; + SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second; // The jump block hasn't been inserted yet; insert it here. MachineBasicBlock *JumpMBB = JT->MBB; @@ -10017,7 +10064,13 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } } - addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); + if (FallthroughUnreachable) { + // Skip the range check if the fallthrough block is unreachable. + JTH->OmitRangeCheck = true; + } + + if (!JTH->OmitRangeCheck) + addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); addSuccessorWithProb(CurMBB, JumpMBB, JumpProb); CurMBB->normalizeSuccProbs(); @@ -10034,8 +10087,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, break; } case CC_BitTests: { + // FIXME: If Fallthrough is unreachable, skip the range check. + // FIXME: Optimize away range check based on pivot comparisons. - BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex]; + BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex]; // The bit test blocks haven't been inserted yet; insert them here. for (BitTestCase &BTC : BTB->Cases) @@ -10078,6 +10133,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, RHS = I->High; } + // If Fallthrough is unreachable, fold away the comparison. + if (FallthroughUnreachable) + CC = ISD::SETTRUE; + // The false probability is the sum of all unhandled cases. CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, getCurSDLoc(), I->Prob, UnhandledProbs); @@ -10085,7 +10144,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, if (CurMBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); else - SwitchCases.push_back(CB); + SL->SwitchCases.push_back(CB); break; } @@ -10236,7 +10295,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, if (W.MBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); else - SwitchCases.push_back(CB); + SL->SwitchCases.push_back(CB); } // Scale CaseProb after peeling a case with the probablity of PeeledCaseProb @@ -10265,7 +10324,7 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster( // Don't perform if there is only one cluster or optimizing for size. if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 || TM.getOptLevel() == CodeGenOpt::None || - SwitchMBB->getParent()->getFunction().optForMinSize()) + SwitchMBB->getParent()->getFunction().hasMinSize()) return SwitchMBB; BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100); @@ -10331,38 +10390,6 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // if there are many clusters. sortAndRangeify(Clusters); - if (TM.getOptLevel() != CodeGenOpt::None) { - // Replace an unreachable default with the most popular destination. - // FIXME: Exploit unreachable default more aggressively. - bool UnreachableDefault = - isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg()); - if (UnreachableDefault && !Clusters.empty()) { - DenseMap<const BasicBlock *, unsigned> Popularity; - unsigned MaxPop = 0; - const BasicBlock *MaxBB = nullptr; - for (auto I : SI.cases()) { - const BasicBlock *BB = I.getCaseSuccessor(); - if (++Popularity[BB] > MaxPop) { - MaxPop = Popularity[BB]; - MaxBB = BB; - } - } - // Set new default. - assert(MaxPop > 0 && MaxBB); - DefaultMBB = FuncInfo.MBBMap[MaxBB]; - - // Remove cases that were pointing to the destination that is now the - // default. - CaseClusterVector New; - New.reserve(Clusters.size()); - for (CaseCluster &CC : Clusters) { - if (CC.MBB != DefaultMBB) - New.push_back(CC); - } - Clusters = std::move(New); - } - } - // The branch probablity of the peeled case. BranchProbability PeeledCaseProb = BranchProbability::getZero(); MachineBasicBlock *PeeledSwitchMBB = @@ -10380,8 +10407,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { return; } - findJumpTables(Clusters, &SI, DefaultMBB); - findBitTestClusters(Clusters, &SI); + SL->findJumpTables(Clusters, &SI, DefaultMBB); + SL->findBitTestClusters(Clusters, &SI); LLVM_DEBUG({ dbgs() << "Case clusters: "; @@ -10420,7 +10447,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None && - !DefaultMBB->getParent()->getFunction().optForMinSize()) { + !DefaultMBB->getParent()->getFunction().hasMinSize()) { // For optimized builds, lower large range as a balanced binary tree. splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); continue; |