diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2021-02-16 20:13:02 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2021-02-16 20:13:02 +0000 |
commit | b60736ec1405bb0a8dd40989f67ef4c93da068ab (patch) | |
tree | 5c43fbb7c9fc45f0f87e0e6795a86267dbd12f9d /llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | |
parent | cfca06d7963fa0909f90483b42a6d7d194d01e08 (diff) |
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 805 |
1 files changed, 465 insertions, 340 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1d596c89c911..6638ff6a6358 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -14,15 +14,12 @@ #include "SDNodeDbgValue.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" -#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" @@ -40,7 +37,6 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" -#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -53,17 +49,14 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/SwiftErrorValueTracking.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -75,13 +68,11 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" @@ -99,31 +90,22 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/AtomicOrdering.h" -#include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Utils/Local.h" -#include <algorithm> -#include <cassert> #include <cstddef> -#include <cstdint> #include <cstring> #include <iterator> #include <limits> #include <numeric> #include <tuple> -#include <utility> -#include <vector> using namespace llvm; using namespace PatternMatch; @@ -169,32 +151,6 @@ static cl::opt<unsigned> SwitchPeelThreshold( // store [4096 x i8] %data, [4096 x i8]* %buffer static const unsigned MaxParallelChains = 64; -// Return the calling convention if the Value passed requires ABI mangling as it -// is a parameter to a function or a return value from a function which is not -// an intrinsic. -static Optional<CallingConv::ID> getABIRegCopyCC(const Value *V) { - if (auto *R = dyn_cast<ReturnInst>(V)) - return R->getParent()->getParent()->getCallingConv(); - - if (auto *CI = dyn_cast<CallInst>(V)) { - const bool IsInlineAsm = CI->isInlineAsm(); - const bool IsIndirectFunctionCall = - !IsInlineAsm && !CI->getCalledFunction(); - - // It is possible that the call instruction is an inline asm statement or an - // indirect function call in which case the return value of - // getCalledFunction() would be nullptr. - const bool IsInstrinsicCall = - !IsInlineAsm && !IsIndirectFunctionCall && - CI->getCalledFunction()->getIntrinsicID() != Intrinsic::not_intrinsic; - - if (!IsInlineAsm && !IsInstrinsicCall) - return CI->getCallingConv(); - } - - return None; -} - static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, @@ -409,7 +365,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, // as appropriate. for (unsigned i = 0; i != NumParts; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, - PartVT, IntermediateVT, V); + PartVT, IntermediateVT, V, CallConv); } else if (NumParts > 0) { // If the intermediate type was expanded, build the intermediate // operands from the parts. @@ -418,7 +374,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, unsigned Factor = NumParts / NumIntermediates; for (unsigned i = 0; i != NumIntermediates; ++i) Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, - PartVT, IntermediateVT, V); + PartVT, IntermediateVT, V, CallConv); } // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the @@ -448,10 +404,10 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the // elements we want. if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { - assert((PartEVT.getVectorElementCount().Min > - ValueVT.getVectorElementCount().Min) && - (PartEVT.getVectorElementCount().Scalable == - ValueVT.getVectorElementCount().Scalable) && + assert((PartEVT.getVectorElementCount().getKnownMinValue() > + ValueVT.getVectorElementCount().getKnownMinValue()) && + (PartEVT.getVectorElementCount().isScalable() == + ValueVT.getVectorElementCount().isScalable()) && "Cannot narrow, it would be a lossy transformation"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, DAG.getVectorIdxConstant(0, DL)); @@ -479,7 +435,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, // are the same size, this is an obvious bitcast. if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) { return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) { + } else if (ValueVT.bitsLT(PartEVT)) { // Bitcast Val back the original type and extract the corresponding // vector we want. unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits(); @@ -709,14 +665,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, // Promoted vector extract Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } else { - if (ValueVT.getVectorNumElements() == 1) { + if (ValueVT.getVectorElementCount().isScalar()) { Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getVectorIdxConstant(0, DL)); } else { - assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() && + uint64_t ValueSize = ValueVT.getFixedSizeInBits(); + assert(PartVT.getFixedSizeInBits() > ValueSize && "lossy conversion of vector to scalar type"); - EVT IntermediateType = - EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize); Val = DAG.getBitcast(IntermediateType, Val); Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } @@ -749,15 +705,15 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() && "Mixing scalable and fixed vectors when copying in parts"); - ElementCount DestEltCnt; + Optional<ElementCount> DestEltCnt; if (IntermediateVT.isVector()) DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates; else - DestEltCnt = ElementCount(NumIntermediates, false); + DestEltCnt = ElementCount::getFixed(NumIntermediates); EVT BuiltVectorTy = EVT::getVectorVT( - *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt); + *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt.getValue()); if (ValueVT != BuiltVectorTy) { if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) Val = Widened; @@ -1001,7 +957,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, // shouldn't try to apply any sort of splitting logic to them. assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() && "No 1:1 mapping from clobbers to regs?"); - unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); + Register SP = TLI.getStackPointerRegisterToSaveRestore(); (void)SP; for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) { Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I])); @@ -1024,14 +980,14 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, } } -SmallVector<std::pair<unsigned, unsigned>, 4> +SmallVector<std::pair<unsigned, TypeSize>, 4> RegsForValue::getRegsAndSizes() const { - SmallVector<std::pair<unsigned, unsigned>, 4> OutVec; + SmallVector<std::pair<unsigned, TypeSize>, 4> OutVec; unsigned I = 0; for (auto CountAndVT : zip_first(RegCount, RegVTs)) { unsigned RegCount = std::get<0>(CountAndVT); MVT RegisterVT = std::get<1>(CountAndVT); - unsigned RegisterSize = RegisterVT.getSizeInBits(); + TypeSize RegisterSize = RegisterVT.getSizeInBits(); for (unsigned E = I + RegCount; I != E; ++I) OutVec.push_back(std::make_pair(Regs[I], RegisterSize)); } @@ -1140,25 +1096,6 @@ void SelectionDAGBuilder::visit(const Instruction &I) { visit(I.getOpcode(), I); - if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) { - // ConstrainedFPIntrinsics handle their own FMF. - if (!isa<ConstrainedFPIntrinsic>(&I)) { - // Propagate the fast-math-flags of this IR instruction to the DAG node that - // maps to this instruction. - // TODO: We could handle all flags (nsw, etc) here. - // TODO: If an IR instruction maps to >1 node, only the final node will have - // flags set. - if (SDNode *Node = getNodeForIRValue(&I)) { - SDNodeFlags IncomingFlags; - IncomingFlags.copyFMF(*FPMO); - if (!Node->getFlags().isDefined()) - Node->setFlags(IncomingFlags); - else - Node->intersectFlagsWith(IncomingFlags); - } - } - } - if (!I.isTerminator() && !HasTailCall && !isa<GCStatepointInst>(I)) // statepoints handle their exports internally CopyToExportRegsIfNeeded(&I); @@ -1204,7 +1141,7 @@ void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable, if (isMatchingDbgValue(DDI)) salvageUnresolvedDbgValue(DDI); - DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end()); + erase_if(DDIV, isMatchingDbgValue); } } @@ -1577,6 +1514,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) return DAG.getBlockAddress(BA, VT); + if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(C)) + return getValue(Equiv->getGlobalValue()); + VectorType *VecTy = cast<VectorType>(V->getType()); // Now that we know the number and type of the elements, get that number of @@ -1624,7 +1564,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, - Inst->getType(), getABIRegCopyCC(V)); + Inst->getType(), None); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } @@ -1697,10 +1637,32 @@ void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { } } -// For wasm, there's alwyas a single catch pad attached to a catchswitch, and -// the control flow always stops at the single catch pad, as it does for a -// cleanup pad. In case the exception caught is not of the types the catch pad -// catches, it will be rethrown by a rethrow. +// In wasm EH, even though a catchpad may not catch an exception if a tag does +// not match, it is OK to add only the first unwind destination catchpad to the +// successors, because there will be at least one invoke instruction within the +// catch scope that points to the next unwind destination, if one exists, so +// CFGSort cannot mess up with BB sorting order. +// (All catchpads with 'catch (type)' clauses have a 'llvm.rethrow' intrinsic +// call within them, and catchpads only consisting of 'catch (...)' have a +// '__cxa_end_catch' call within them, both of which generate invokes in case +// the next unwind destination exists, i.e., the next unwind destination is not +// the caller.) +// +// Having at most one EH pad successor is also simpler and helps later +// transformations. +// +// For example, +// current: +// invoke void @foo to ... unwind label %catch.dispatch +// catch.dispatch: +// %0 = catchswitch within ... [label %catch.start] unwind label %next +// catch.start: +// ... +// ... in this BB or some other child BB dominated by this BB there will be an +// invoke that points to 'next' BB as an unwind destination +// +// next: ; We don't need to add this to 'current' BB's successor +// ... static void findWasmUnwindDestinations( FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, BranchProbability Prob, @@ -1863,7 +1825,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { for (unsigned i = 0; i != NumValues; ++i) { // An aggregate return value cannot wrap around the address space, so // offsets to its parts don't wrap either. - SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]); + SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, + TypeSize::Fixed(Offsets[i])); SDValue Val = RetOp.getValue(RetOp.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) @@ -2144,14 +2107,19 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, } const Instruction *BOp = dyn_cast<Instruction>(Cond); + const Value *BOpOp0, *BOpOp1; // Compute the effective opcode for Cond, taking into account whether it needs // to be inverted, e.g. // and (not (or A, B)), C // gets lowered as // and (and (not A, not B), C) - unsigned BOpc = 0; + Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0; if (BOp) { - BOpc = BOp->getOpcode(); + BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1))) + ? Instruction::And + : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1))) + ? Instruction::Or + : (Instruction::BinaryOps)0); if (InvertCond) { if (BOpc == Instruction::And) BOpc = Instruction::Or; @@ -2161,11 +2129,11 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, } // If this node is not part of the or/and tree, emit it as a branch. - if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || - BOpc != unsigned(Opc) || !BOp->hasOneUse() || - BOp->getParent() != CurBB->getBasicBlock() || - !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || - !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { + // Note that all nodes in the tree should have same opcode. + bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse(); + if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() || + !InBlock(BOpOp0, CurBB->getBasicBlock()) || + !InBlock(BOpOp1, CurBB->getBasicBlock())) { EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb, InvertCond); return; @@ -2201,15 +2169,15 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, auto NewTrueProb = TProb / 2; auto NewFalseProb = TProb / 2 + FProb; // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, - NewTrueProb, NewFalseProb, InvertCond); + FindMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb, + NewFalseProb, InvertCond); // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb}; BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, - Probs[0], Probs[1], InvertCond); + FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], + Probs[1], InvertCond); } else { assert(Opc == Instruction::And && "Unknown merge op!"); // Codegen X & Y as: @@ -2234,15 +2202,15 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, auto NewTrueProb = TProb + FProb / 2; auto NewFalseProb = FProb / 2; // Emit the LHS condition. - FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, - NewTrueProb, NewFalseProb, InvertCond); + FindMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb, + NewFalseProb, InvertCond); // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2}; BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); // Emit the RHS condition into TmpBB. - FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, - Probs[0], Probs[1], InvertCond); + FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], + Probs[1], InvertCond); } } @@ -2319,16 +2287,20 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // je foo // cmp D, E // jle foo - if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { - Instruction::BinaryOps Opcode = BOp->getOpcode(); - Value *Vec, *BOp0 = BOp->getOperand(0), *BOp1 = BOp->getOperand(1); - if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && - !I.hasMetadata(LLVMContext::MD_unpredictable) && - (Opcode == Instruction::And || Opcode == Instruction::Or) && - !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && - match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) { - FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, - Opcode, + const Instruction *BOp = dyn_cast<Instruction>(CondVal); + if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp && + BOp->hasOneUse() && !I.hasMetadata(LLVMContext::MD_unpredictable)) { + Value *Vec; + const Value *BOp0, *BOp1; + Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0; + if (match(BOp, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1)))) + Opcode = Instruction::And; + else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1)))) + Opcode = Instruction::Or; + + if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && + match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) { + FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode, getEdgeProbability(BrMBB, Succ0MBB), getEdgeProbability(BrMBB, Succ1MBB), /*InvertCond=*/false); @@ -2577,7 +2549,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, SDLoc dl = getCurSDLoc(); SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); const Module &M = *ParentBB->getParent()->getFunction().getParent(); - unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext())); + Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext())); // Generate code to load the content of the guard slot. SDValue GuardVal = DAG.getLoad( @@ -2835,7 +2807,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { case Intrinsic::experimental_gc_statepoint: LowerStatepoint(cast<GCStatepointInst>(I), EHPadBB); break; - case Intrinsic::wasm_rethrow_in_catch: { + case Intrinsic::wasm_rethrow: { // This is usually done in visitTargetIntrinsic, but this intrinsic is // special because it can be invoked, so we manually lower it to a DAG // node here. @@ -2843,7 +2815,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { Ops.push_back(getRoot()); // inchain const TargetLowering &TLI = DAG.getTargetLoweringInfo(); Ops.push_back( - DAG.getTargetConstant(Intrinsic::wasm_rethrow_in_catch, getCurSDLoc(), + DAG.getTargetConstant(Intrinsic::wasm_rethrow, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops)); @@ -3025,20 +2997,6 @@ void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); } -void SelectionDAGBuilder::visitFSub(const User &I) { - // -0.0 - X --> fneg - Type *Ty = I.getType(); - if (isa<Constant>(I.getOperand(0)) && - I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { - SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(), - Op2.getValueType(), Op2)); - return; - } - - visitBinary(I, ISD::FSUB); -} - void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) { SDNodeFlags Flags; @@ -3054,9 +3012,10 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap()); Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap()); } - if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) { + if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) Flags.setExact(ExactOp->isExact()); - } + if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) + Flags.copyFMF(*FPOp); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); @@ -3166,10 +3125,14 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Condition = getFCmpCondCode(predicate); - auto *FPMO = dyn_cast<FPMathOperator>(&I); - if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath) + auto *FPMO = cast<FPMathOperator>(&I); + if (FPMO->hasNoNaNs() || TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); + SDNodeFlags Flags; + Flags.copyFMF(*FPMO); + SelectionDAG::FlagInserter FlagsInserter(DAG, Flags); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); @@ -3199,6 +3162,11 @@ void SelectionDAGBuilder::visitSelect(const User &I) { Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT; bool IsUnaryAbs = false; + bool Negate = false; + + SDNodeFlags Flags; + if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) + Flags.copyFMF(*FPOp); // Min/max matching is only viable if all output VTs are the same. if (is_splat(ValueVTs)) { @@ -3259,12 +3227,13 @@ void SelectionDAGBuilder::visitSelect(const User &I) { break; } break; + case SPF_NABS: + Negate = true; + LLVM_FALLTHROUGH; case SPF_ABS: IsUnaryAbs = true; Opc = ISD::ABS; break; - case SPF_NABS: - // TODO: we need to produce sub(0, abs(X)). default: break; } @@ -3291,10 +3260,13 @@ void SelectionDAGBuilder::visitSelect(const User &I) { if (IsUnaryAbs) { for (unsigned i = 0; i != NumValues; ++i) { + SDLoc dl = getCurSDLoc(); + EVT VT = LHSVal.getNode()->getValueType(LHSVal.getResNo() + i); Values[i] = - DAG.getNode(OpCode, getCurSDLoc(), - LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), - SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); + DAG.getNode(OpCode, dl, VT, LHSVal.getValue(LHSVal.getResNo() + i)); + if (Negate) + Values[i] = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), + Values[i]); } } else { for (unsigned i = 0; i != NumValues; ++i) { @@ -3303,7 +3275,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); Values[i] = DAG.getNode( OpCode, getCurSDLoc(), - LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops); + LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops, Flags); } } @@ -3445,7 +3417,7 @@ void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { unsigned SrcAS = SV->getType()->getPointerAddressSpace(); unsigned DestAS = I.getType()->getPointerAddressSpace(); - if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) + if (!TM.isNoopAddrSpaceCast(SrcAS, DestAS)) N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS); setValue(&I, N); @@ -3773,20 +3745,18 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { SDValue N = getValue(Op0); SDLoc dl = getCurSDLoc(); auto &TLI = DAG.getTargetLoweringInfo(); - MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS); - MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS); // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. bool IsVectorGEP = I.getType()->isVectorTy(); ElementCount VectorElementCount = IsVectorGEP ? cast<VectorType>(I.getType())->getElementCount() - : ElementCount(0, false); + : ElementCount::getFixed(0); if (IsVectorGEP && !N.getValueType().isVector()) { LLVMContext &Context = *DAG.getContext(); EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount); - if (VectorElementCount.Scalable) + if (VectorElementCount.isScalable()) N = DAG.getSplatVector(VT, dl, N); else N = DAG.getSplatBuildVector(VT, dl, N); @@ -3859,7 +3829,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (!IdxN.getValueType().isVector() && IsVectorGEP) { EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorElementCount); - if (VectorElementCount.Scalable) + if (VectorElementCount.isScalable()) IdxN = DAG.getSplatVector(VT, dl, IdxN); else IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); @@ -3900,6 +3870,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { } } + MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS); + MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS); + if (IsVectorGEP) { + PtrTy = MVT::getVectorVT(PtrTy, VectorElementCount); + PtrMemTy = MVT::getVectorVT(PtrMemTy, VectorElementCount); + } + if (PtrMemTy != PtrTy && !cast<GEPOperator>(I).isInBounds()) N = DAG.getPtrExtendInReg(N, dl, PtrMemTy); @@ -4196,7 +4173,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { Root = Chain; ChainI = 0; } - SDValue Add = DAG.getMemBasePlusOffset(Ptr, Offsets[i], dl, Flags); + SDValue Add = + DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(Offsets[i]), dl, Flags); SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]); @@ -4358,12 +4336,12 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); - IndexType = ISD::SIGNED_SCALED; + IndexType = ISD::SIGNED_UNSCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale }; SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, - Ops, MMO, IndexType); + Ops, MMO, IndexType, false); DAG.setRoot(Scatter); setValue(&I, Scatter); } @@ -4411,7 +4389,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { // Do not serialize masked loads of constant memory with anything. MemoryLocation ML; if (VT.isScalableVector()) - ML = MemoryLocation(PtrOperand); + ML = MemoryLocation::getAfter(PtrOperand); else ML = MemoryLocation(PtrOperand, LocationSize::precise( DAG.getDataLayout().getTypeStoreSize(I.getType())), @@ -4469,12 +4447,12 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); - IndexType = ISD::SIGNED_SCALED; + IndexType = ISD::SIGNED_UNSCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale }; SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, - Ops, MMO, IndexType); + Ops, MMO, IndexType, ISD::NON_EXTLOAD); PendingLoads.push_back(Gather.getValue(1)); setValue(&I, Gather); @@ -4901,7 +4879,7 @@ static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl, /// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -4917,13 +4895,13 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op, Flags); } /// expandLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -5016,13 +4994,13 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op, Flags); } /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -5113,13 +5091,13 @@ static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op, Flags); } /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -5203,25 +5181,26 @@ static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, } // No special expansion. - return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op, Flags); } /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, - const TargetLowering &TLI) { + const TargetLowering &TLI, SDNodeFlags Flags) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) return getLimitedPrecisionExp2(Op, dl, DAG); // No special expansion. - return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); + return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op, Flags); } /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, - SelectionDAG &DAG, const TargetLowering &TLI) { + SelectionDAG &DAG, const TargetLowering &TLI, + SDNodeFlags Flags) { bool IsExp10 = false; if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -5244,7 +5223,7 @@ static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, } // No special expansion. - return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); + return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS, Flags); } /// ExpandPowI - Expand a llvm.powi intrinsic. @@ -5369,7 +5348,7 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL, // getUnderlyingArgRegs - Find underlying registers used for a truncated, // bitcasted, or split argument. Returns a list of <Register, size in bits> static void -getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs, +getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs, const SDValue &N) { switch (N.getOpcode()) { case ISD::CopyFromReg: { @@ -5480,7 +5459,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (FI != std::numeric_limits<int>::max()) Op = MachineOperand::CreateFI(FI); - SmallVector<std::pair<unsigned, unsigned>, 8> ArgRegsAndSizes; + SmallVector<std::pair<unsigned, TypeSize>, 8> ArgRegsAndSizes; if (!Op && N.getNode()) { getUnderlyingArgRegs(ArgRegsAndSizes, N); Register Reg; @@ -5510,8 +5489,8 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (!Op) { // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg - auto splitMultiRegDbgValue - = [&](ArrayRef<std::pair<unsigned, unsigned>> SplitRegs) { + auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>> + SplitRegs) { unsigned Offset = 0; for (auto RegAndSize : SplitRegs) { // If the expression is already a fragment, the current register @@ -5555,7 +5534,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (VMI != FuncInfo.ValueMap.end()) { const auto &TLI = DAG.getTargetLoweringInfo(); RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, - V->getType(), getABIRegCopyCC(V)); + V->getType(), None); if (RFV.occupiesMultipleRegs()) { splitMultiRegDbgValue(RFV.getRegsAndSizes()); return true; @@ -5665,6 +5644,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DebugLoc dl = getCurDebugLoc(); SDValue Res; + SDNodeFlags Flags; + if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) + Flags.copyFMF(*FPOp); + switch (Intrinsic) { default: // By default, turn this into a target intrinsic node. @@ -6079,23 +6062,26 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(1)), DAG)); return; case Intrinsic::log: - setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::log2: - setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, + expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::log10: - setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, + expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::exp: - setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::exp2: - setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, + expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags)); return; case Intrinsic::pow: setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), DAG, TLI)); + getValue(I.getArgOperand(1)), DAG, TLI, Flags)); return; case Intrinsic::sqrt: case Intrinsic::fabs: @@ -6128,7 +6114,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, DAG.getNode(Opcode, sdl, getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)))); + getValue(I.getArgOperand(0)), Flags)); return; } case Intrinsic::lround: @@ -6153,44 +6139,47 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, DAG.getNode(ISD::FMINNUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::maxnum: setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::minimum: setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::maximum: setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)))); + getValue(I.getArgOperand(1)), Flags)); return; case Intrinsic::fma: - setValue(&I, DAG.getNode(ISD::FMA, sdl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), - getValue(I.getArgOperand(2)))); + setValue(&I, DAG.getNode( + ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), Flags)); return; #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I)); return; +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: +#include "llvm/IR/VPIntrinsics.def" + visitVectorPredicationIntrinsic(cast<VPIntrinsic>(I)); + return; case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && @@ -6199,17 +6188,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), - getValue(I.getArgOperand(2)))); + getValue(I.getArgOperand(2)), Flags)); } else { // TODO: Intrinsic calls should have fast-math-flags. - SDValue Mul = DAG.getNode(ISD::FMUL, sdl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1))); + SDValue Mul = DAG.getNode( + ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags); SDValue Add = DAG.getNode(ISD::FADD, sdl, getValue(I.getArgOperand(0)).getValueType(), - Mul, - getValue(I.getArgOperand(2))); + Mul, getValue(I.getArgOperand(2)), Flags); setValue(&I, Add); } return; @@ -6227,6 +6214,20 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.getNode(ISD::BITCAST, sdl, MVT::f16, getValue(I.getArgOperand(0))))); return; + case Intrinsic::fptosi_sat: { + EVT Type = TLI.getValueType(DAG.getDataLayout(), I.getType()); + SDValue SatW = DAG.getConstant(Type.getScalarSizeInBits(), sdl, MVT::i32); + setValue(&I, DAG.getNode(ISD::FP_TO_SINT_SAT, sdl, Type, + getValue(I.getArgOperand(0)), SatW)); + return; + } + case Intrinsic::fptoui_sat: { + EVT Type = TLI.getValueType(DAG.getDataLayout(), I.getType()); + SDValue SatW = DAG.getConstant(Type.getScalarSizeInBits(), sdl, MVT::i32); + setValue(&I, DAG.getNode(ISD::FP_TO_UINT_SAT, sdl, Type, + getValue(I.getArgOperand(0)), SatW)); + return; + } case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); @@ -6279,62 +6280,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Y = getValue(I.getArgOperand(1)); SDValue Z = getValue(I.getArgOperand(2)); EVT VT = X.getValueType(); - SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT); - SDValue Zero = DAG.getConstant(0, sdl, VT); - SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC); - - auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR; - if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) { - setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z)); - return; - } - // When X == Y, this is rotate. If the data type has a power-of-2 size, we - // avoid the select that is necessary in the general case to filter out - // the 0-shift possibility that leads to UB. - if (X == Y && isPowerOf2_32(VT.getScalarSizeInBits())) { + if (X == Y) { auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR; - if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) { - setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z)); - return; - } - - // Some targets only rotate one way. Try the opposite direction. - RotateOpcode = IsFSHL ? ISD::ROTR : ISD::ROTL; - if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) { - // Negate the shift amount because it is safe to ignore the high bits. - SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z); - setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt)); - return; - } - - // fshl (rotl): (X << (Z % BW)) | (X >> ((0 - Z) % BW)) - // fshr (rotr): (X << ((0 - Z) % BW)) | (X >> (Z % BW)) - SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z); - SDValue NShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC); - SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : NShAmt); - SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, X, IsFSHL ? NShAmt : ShAmt); - setValue(&I, DAG.getNode(ISD::OR, sdl, VT, ShX, ShY)); - return; + setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z)); + } else { + auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR; + setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z)); } - - // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) - // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) - SDValue InvShAmt = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, ShAmt); - SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : InvShAmt); - SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, Y, IsFSHL ? InvShAmt : ShAmt); - SDValue Or = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY); - - // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth, - // and that is undefined. We must compare and select to avoid UB. - EVT CCVT = MVT::i1; - if (VT.isVector()) - CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements()); - - // For fshl, 0-shift returns the 1st arg (X). - // For fshr, 0-shift returns the 2nd arg (Y). - SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ); - setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or)); return; } case Intrinsic::sadd_sat: { @@ -6361,6 +6314,18 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2)); return; } + case Intrinsic::sshl_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::SSHLSAT, sdl, Op1.getValueType(), Op1, Op2)); + return; + } + case Intrinsic::ushl_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::USHLSAT, sdl, Op1.getValueType(), Op1, Op2)); + return; + } case Intrinsic::smul_fix: case Intrinsic::umul_fix: case Intrinsic::smul_fix_sat: @@ -6383,6 +6348,36 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Op1, Op2, Op3, DAG, TLI)); return; } + case Intrinsic::smax: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::SMAX, sdl, Op1.getValueType(), Op1, Op2)); + return; + } + case Intrinsic::smin: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::SMIN, sdl, Op1.getValueType(), Op1, Op2)); + return; + } + case Intrinsic::umax: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::UMAX, sdl, Op1.getValueType(), Op1, Op2)); + return; + } + case Intrinsic::umin: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::UMIN, sdl, Op1.getValueType(), Op1, Op2)); + return; + } + case Intrinsic::abs: { + // TODO: Preserve "int min is poison" arg in SDAG? + SDValue Op1 = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::ABS, sdl, Op1.getValueType(), Op1)); + return; + } case Intrinsic::stacksave: { SDValue Op = getRoot(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); @@ -6401,7 +6396,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); // Result type for @llvm.get.dynamic.area.offset should match PtrTy for // target. - if (PtrTy.getSizeInBits() < ResTy.getSizeInBits()) + if (PtrTy.getFixedSizeInBits() < ResTy.getFixedSizeInBits()) report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset" " intrinsic!"); Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy), @@ -6419,7 +6414,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } else { EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); const Value *Global = TLI.getSDagStackGuard(M); - unsigned Align = DL->getPrefTypeAlignment(Global->getType()); + Align Align = DL->getPrefTypeAlign(Global->getType()); Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global), MachinePointerInfo(Global, 0), Align, MachineMemOperand::MOVolatile); @@ -6450,9 +6445,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. - Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack( - DAG.getMachineFunction(), FI), - /* Alignment = */ 0, MachineMemOperand::MOVolatile); + Res = DAG.getStore( + Chain, sdl, Src, FIN, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), + MaybeAlign(), MachineMemOperand::MOVolatile); setValue(&I, Res); DAG.setRoot(Res); return; @@ -6470,10 +6466,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); return; + case Intrinsic::assume: + case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::var_annotation: case Intrinsic::sideeffect: - // Discard annotate attributes, assumptions, and artificial side-effects. + // Discard annotate attributes, noalias scope declarations, assumptions, and + // artificial side-effects. return; case Intrinsic::codeview_annotation: { @@ -6534,6 +6533,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, getValue(I.getArgOperand(0))); return; + case Intrinsic::ubsantrap: case Intrinsic::debugtrap: case Intrinsic::trap: { StringRef TrapFuncName = @@ -6541,12 +6541,31 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, .getAttribute(AttributeList::FunctionIndex, "trap-func-name") .getValueAsString(); if (TrapFuncName.empty()) { - ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? - ISD::TRAP : ISD::DEBUGTRAP; - DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); + switch (Intrinsic) { + case Intrinsic::trap: + DAG.setRoot(DAG.getNode(ISD::TRAP, sdl, MVT::Other, getRoot())); + break; + case Intrinsic::debugtrap: + DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, sdl, MVT::Other, getRoot())); + break; + case Intrinsic::ubsantrap: + DAG.setRoot(DAG.getNode( + ISD::UBSANTRAP, sdl, MVT::Other, getRoot(), + DAG.getTargetConstant( + cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(), sdl, + MVT::i32))); + break; + default: llvm_unreachable("unknown trap intrinsic"); + } return; } TargetLowering::ArgListTy Args; + if (Intrinsic == Intrinsic::ubsantrap) { + Args.push_back(TargetLoweringBase::ArgListEntry()); + Args[0].Val = I.getArgOperand(0); + Args[0].Node = getValue(Args[0].Val); + Args[0].Ty = Args[0].Val->getType(); + } TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( @@ -6583,7 +6602,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, EVT OverflowVT = MVT::i1; if (ResultVT.isVector()) OverflowVT = EVT::getVectorVT( - *Context, OverflowVT, ResultVT.getVectorNumElements()); + *Context, OverflowVT, ResultVT.getVectorElementCount()); SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT); setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2)); @@ -6621,7 +6640,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, cast<ConstantInt>(I.getArgOperand(0))->getSExtValue(); Value *const ObjectPtr = I.getArgOperand(1); SmallVector<const Value *, 4> Allocas; - GetUnderlyingObjects(ObjectPtr, Allocas, *DL); + getUnderlyingObjects(ObjectPtr, Allocas); for (SmallVectorImpl<const Value*>::iterator Object = Allocas.begin(), E = Allocas.end(); Object != E; ++Object) { @@ -6648,6 +6667,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } return; } + case Intrinsic::pseudoprobe: { + auto Guid = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(); + auto Index = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); + auto Attr = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue(); + Res = DAG.getPseudoProbeNode(sdl, getRoot(), Guid, Index, Attr); + DAG.setRoot(Res); + return; + } case Intrinsic::invariant_start: // Discard region information. setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); @@ -6758,7 +6785,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // specific calling convention, and only for x86_64. // FIXME: Support other platforms later. const auto &Triple = DAG.getTarget().getTargetTriple(); - if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + if (Triple.getArch() != Triple::x86_64) return; SDLoc DL = getCurSDLoc(); @@ -6789,7 +6816,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // specific calling convention, and only for x86_64. // FIXME: Support other platforms later. const auto &Triple = DAG.getTarget().getTargetTriple(); - if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + if (Triple.getArch() != Triple::x86_64) return; SDLoc DL = getCurSDLoc(); @@ -6823,19 +6850,19 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, LowerDeoptimizeCall(&I); return; - case Intrinsic::experimental_vector_reduce_v2_fadd: - case Intrinsic::experimental_vector_reduce_v2_fmul: - case Intrinsic::experimental_vector_reduce_add: - case Intrinsic::experimental_vector_reduce_mul: - case Intrinsic::experimental_vector_reduce_and: - case Intrinsic::experimental_vector_reduce_or: - case Intrinsic::experimental_vector_reduce_xor: - case Intrinsic::experimental_vector_reduce_smax: - case Intrinsic::experimental_vector_reduce_smin: - case Intrinsic::experimental_vector_reduce_umax: - case Intrinsic::experimental_vector_reduce_umin: - case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: + case Intrinsic::vector_reduce_fadd: + case Intrinsic::vector_reduce_fmul: + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_and: + case Intrinsic::vector_reduce_or: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umin: + case Intrinsic::vector_reduce_fmax: + case Intrinsic::vector_reduce_fmin: visitVectorReduce(I, Intrinsic); return; @@ -6923,36 +6950,57 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::get_active_lane_mask: { auto DL = getCurSDLoc(); SDValue Index = getValue(I.getOperand(0)); - SDValue BTC = getValue(I.getOperand(1)); + SDValue TripCount = getValue(I.getOperand(1)); Type *ElementTy = I.getOperand(0)->getType(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); unsigned VecWidth = VT.getVectorNumElements(); - SmallVector<SDValue, 16> OpsBTC; + SmallVector<SDValue, 16> OpsTripCount; SmallVector<SDValue, 16> OpsIndex; SmallVector<SDValue, 16> OpsStepConstants; for (unsigned i = 0; i < VecWidth; i++) { - OpsBTC.push_back(BTC); + OpsTripCount.push_back(TripCount); OpsIndex.push_back(Index); - OpsStepConstants.push_back(DAG.getConstant(i, DL, MVT::getVT(ElementTy))); + OpsStepConstants.push_back( + DAG.getConstant(i, DL, EVT::getEVT(ElementTy))); } - EVT CCVT = MVT::i1; - CCVT = EVT::getVectorVT(I.getContext(), CCVT, VecWidth); + EVT CCVT = EVT::getVectorVT(I.getContext(), MVT::i1, VecWidth); - auto VecTy = MVT::getVT(FixedVectorType::get(ElementTy, VecWidth)); + auto VecTy = EVT::getEVT(FixedVectorType::get(ElementTy, VecWidth)); SDValue VectorIndex = DAG.getBuildVector(VecTy, DL, OpsIndex); SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants); SDValue VectorInduction = DAG.getNode( ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep); - SDValue VectorBTC = DAG.getBuildVector(VecTy, DL, OpsBTC); + SDValue VectorTripCount = DAG.getBuildVector(VecTy, DL, OpsTripCount); SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0), - VectorBTC, ISD::CondCode::SETULE); + VectorTripCount, ISD::CondCode::SETULT); setValue(&I, DAG.getNode(ISD::AND, DL, CCVT, DAG.getNOT(DL, VectorInduction.getValue(1), CCVT), SetCC)); return; } + case Intrinsic::experimental_vector_insert: { + auto DL = getCurSDLoc(); + + SDValue Vec = getValue(I.getOperand(0)); + SDValue SubVec = getValue(I.getOperand(1)); + SDValue Index = getValue(I.getOperand(2)); + EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec, + Index)); + return; + } + case Intrinsic::experimental_vector_extract: { + auto DL = getCurSDLoc(); + + SDValue Vec = getValue(I.getOperand(0)); + SDValue Index = getValue(I.getOperand(1)); + EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + + setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index)); + return; + } } } @@ -7068,6 +7116,41 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( setValue(&FPI, FPResult); } +static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { + Optional<unsigned> ResOPC; + switch (VPIntrin.getIntrinsicID()) { +#define BEGIN_REGISTER_VP_INTRINSIC(INTRIN, ...) case Intrinsic::INTRIN: +#define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) ResOPC = ISD::VPSDID; +#define END_REGISTER_VP_INTRINSIC(...) break; +#include "llvm/IR/VPIntrinsics.def" + } + + if (!ResOPC.hasValue()) + llvm_unreachable( + "Inconsistency: no SDNode available for this VPIntrinsic!"); + + return ResOPC.getValue(); +} + +void SelectionDAGBuilder::visitVectorPredicationIntrinsic( + const VPIntrinsic &VPIntrin) { + unsigned Opcode = getISDForVPIntrinsic(VPIntrin); + + SmallVector<EVT, 4> ValueVTs; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + ComputeValueVTs(TLI, DAG.getDataLayout(), VPIntrin.getType(), ValueVTs); + SDVTList VTs = DAG.getVTList(ValueVTs); + + // Request operands. + SmallVector<SDValue, 7> OpValues; + for (int i = 0; i < (int)VPIntrin.getNumArgOperands(); ++i) + OpValues.push_back(getValue(VPIntrin.getArgOperand(i))); + + SDLoc DL = getCurSDLoc(); + SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues); + setValue(&VPIntrin, Result); +} + std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, const BasicBlock *EHPadBB) { @@ -7284,9 +7367,9 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, } SDValue Ptr = Builder.getValue(PtrVal); - SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, - Ptr, MachinePointerInfo(PtrVal), - /* Alignment = */ 1); + SDValue LoadVal = + Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr, + MachinePointerInfo(PtrVal), Align(1)); if (!ConstantMemory) Builder.PendingLoads.push_back(LoadVal.getValue(1)); @@ -7307,12 +7390,12 @@ void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, setValue(&I, Value); } -/// See if we can lower a memcmp call into an optimized form. If so, return +/// See if we can lower a memcmp/bcmp call into an optimized form. If so, return /// true and lower it. Otherwise return false, and it will be lowered like a /// normal call. /// The caller already checked that \p I calls the appropriate LibFunc with a /// correct prototype. -bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { +bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) { const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); const Value *Size = I.getArgOperand(2); const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); @@ -7563,8 +7646,12 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, if (!I.onlyReadsMemory()) return false; + SDNodeFlags Flags; + Flags.copyFMF(cast<FPMathOperator>(I)); + SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp)); + setValue(&I, + DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp, Flags)); return true; } @@ -7579,10 +7666,13 @@ bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, if (!I.onlyReadsMemory()) return false; + SDNodeFlags Flags; + Flags.copyFMF(cast<FPMathOperator>(I)); + SDValue Tmp0 = getValue(I.getArgOperand(0)); SDValue Tmp1 = getValue(I.getArgOperand(1)); EVT VT = Tmp0.getValueType(); - setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1)); + setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1, Flags)); return true; } @@ -7616,6 +7706,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { LibInfo->hasOptimizedCodeGen(Func)) { switch (Func) { default: break; + case LibFunc_bcmp: + if (visitMemCmpBCmpCall(I)) + return; + break; case LibFunc_copysign: case LibFunc_copysignf: case LibFunc_copysignl: @@ -7717,7 +7811,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; break; case LibFunc_memcmp: - if (visitMemCmpCall(I)) + if (visitMemCmpBCmpCall(I)) return; break; case LibFunc_mempcpy: @@ -8137,10 +8231,9 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpInfo.ConstraintVT = - OpInfo - .getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout()) - .getSimpleVT(); + EVT VT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, + DAG.getDataLayout()); + OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other; } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { // The return value of the call is this value. As such, there is no // corresponding argument. @@ -8402,7 +8495,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) { InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant( ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); - AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); + llvm::append_range(AsmNodeOperands, Ops); break; } @@ -8982,57 +9075,59 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, SDLoc dl = getCurSDLoc(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Res; - FastMathFlags FMF; - if (isa<FPMathOperator>(I)) - FMF = I.getFastMathFlags(); + SDNodeFlags SDFlags; + if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) + SDFlags.copyFMF(*FPMO); switch (Intrinsic) { - case Intrinsic::experimental_vector_reduce_v2_fadd: - if (FMF.allowReassoc()) + case Intrinsic::vector_reduce_fadd: + if (SDFlags.hasAllowReassociation()) Res = DAG.getNode(ISD::FADD, dl, VT, Op1, - DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2)); + DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2, SDFlags), + SDFlags); else - Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); + Res = DAG.getNode(ISD::VECREDUCE_SEQ_FADD, dl, VT, Op1, Op2, SDFlags); break; - case Intrinsic::experimental_vector_reduce_v2_fmul: - if (FMF.allowReassoc()) + case Intrinsic::vector_reduce_fmul: + if (SDFlags.hasAllowReassociation()) Res = DAG.getNode(ISD::FMUL, dl, VT, Op1, - DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2)); + DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2, SDFlags), + SDFlags); else - Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); + Res = DAG.getNode(ISD::VECREDUCE_SEQ_FMUL, dl, VT, Op1, Op2, SDFlags); break; - case Intrinsic::experimental_vector_reduce_add: + case Intrinsic::vector_reduce_add: Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_mul: + case Intrinsic::vector_reduce_mul: Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_and: + case Intrinsic::vector_reduce_and: Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_or: + case Intrinsic::vector_reduce_or: Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_xor: + case Intrinsic::vector_reduce_xor: Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_smax: + case Intrinsic::vector_reduce_smax: Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_smin: + case Intrinsic::vector_reduce_smin: Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_umax: + case Intrinsic::vector_reduce_umax: Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_umin: + case Intrinsic::vector_reduce_umin: Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_fmax: - Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1); + case Intrinsic::vector_reduce_fmax: + Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags); break; - case Intrinsic::experimental_vector_reduce_fmin: - Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1); + case Intrinsic::vector_reduce_fmin: + Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags); break; default: llvm_unreachable("Unhandled vector reduce intrinsic"); @@ -9119,6 +9214,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.IsSRet = true; Entry.IsNest = false; Entry.IsByVal = false; + Entry.IsByRef = false; Entry.IsReturned = false; Entry.IsSwiftSelf = false; Entry.IsSwiftError = false; @@ -9239,6 +9335,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setCFGuardTarget(); if (Args[i].IsByVal) Flags.setByVal(); + if (Args[i].IsByRef) + Flags.setByRef(); if (Args[i].IsPreallocated) { Flags.setPreallocated(); // Set the byval flag for CCAssignFn callbacks that don't know about @@ -9444,11 +9542,33 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { return std::make_pair(Res, CLI.Chain); } +/// Places new result values for the node in Results (their number +/// and types must exactly match those of the original return values of +/// the node), or leaves Results empty, which indicates that the node is not +/// to be custom lowered after all. void TargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { - if (SDValue Res = LowerOperation(SDValue(N, 0), DAG)) + SDValue Res = LowerOperation(SDValue(N, 0), DAG); + + if (!Res.getNode()) + return; + + // If the original node has one result, take the return value from + // LowerOperation as is. It might not be result number 0. + if (N->getNumValues() == 1) { Results.push_back(Res); + return; + } + + // If the original node has multiple results, then the return node should + // have the same number of results. + assert((N->getNumValues() == Res->getNumValues()) && + "Lowering returned the wrong number of results!"); + + // Places new result values base on N result number. + for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) + Results.push_back(Res.getValue(I)); } SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -9571,7 +9691,7 @@ findArgumentCopyElisionCandidates(const DataLayout &DL, // initializes the alloca. Don't elide copies from the same argument twice. const Value *Val = SI->getValueOperand()->stripPointerCasts(); const auto *Arg = dyn_cast<Argument>(Val); - if (!Arg || Arg->hasPassPointeeByValueAttr() || + if (!Arg || Arg->hasPassPointeeByValueCopyAttr() || Arg->getType()->isEmptyTy() || DL.getTypeStoreSize(Arg->getType()) != DL.getTypeAllocSize(AI->getAllocatedType()) || @@ -9752,6 +9872,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setSwiftError(); if (Arg.hasAttribute(Attribute::ByVal)) Flags.setByVal(); + if (Arg.hasAttribute(Attribute::ByRef)) + Flags.setByRef(); if (Arg.hasAttribute(Attribute::InAlloca)) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about @@ -9770,27 +9892,31 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // preallocated handling in the various CC lowering callbacks. Flags.setByVal(); } - if (F.getCallingConv() == CallingConv::X86_INTR) { - // IA Interrupt passes frame (1st parameter) by value in the stack. - if (ArgNo == 0) - Flags.setByVal(); - } - if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) { - Type *ElementTy = Arg.getParamByValType(); - // For ByVal, size and alignment should be passed from FE. BE will - // guess if this info is not there but there are cases it cannot get - // right. - unsigned FrameSize = DL.getTypeAllocSize(Arg.getParamByValType()); - Flags.setByValSize(FrameSize); + Type *ArgMemTy = nullptr; + if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated() || + Flags.isByRef()) { + if (!ArgMemTy) + ArgMemTy = Arg.getPointeeInMemoryValueType(); - unsigned FrameAlign; - if (Arg.getParamAlignment()) - FrameAlign = Arg.getParamAlignment(); - else - FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); - Flags.setByValAlign(Align(FrameAlign)); + uint64_t MemSize = DL.getTypeAllocSize(ArgMemTy); + + // For in-memory arguments, size and alignment should be passed from FE. + // BE will guess if this info is not there but there are cases it cannot + // get right. + MaybeAlign MemAlign = Arg.getParamAlign(); + if (!MemAlign) + MemAlign = Align(TLI->getByValTypeAlignment(ArgMemTy, DL)); + + if (Flags.isByRef()) { + Flags.setByRefSize(MemSize); + Flags.setByRefAlign(*MemAlign); + } else { + Flags.setByValSize(MemSize); + Flags.setByValAlign(*MemAlign); + } } + if (Arg.hasAttribute(Attribute::Nest)) Flags.setNest(); if (NeedsRegBlock) @@ -10667,8 +10793,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); while (!WorkList.empty()) { - SwitchWorkListItem W = WorkList.back(); - WorkList.pop_back(); + SwitchWorkListItem W = WorkList.pop_back_val(); unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None && |