diff options
Diffstat (limited to 'lib/CodeGen/GlobalISel/IRTranslator.cpp')
| -rw-r--r-- | lib/CodeGen/GlobalISel/IRTranslator.cpp | 1284 |
1 files changed, 894 insertions, 390 deletions
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 95f6274aa068..6e99bdbd8264 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1,9 +1,8 @@ //===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file @@ -16,8 +15,11 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/LowLevelType.h" @@ -106,9 +108,7 @@ static void reportTranslationError(MachineFunction &MF, ORE.emit(R); } -IRTranslator::IRTranslator() : MachineFunctionPass(ID) { - initializeIRTranslatorPass(*PassRegistry::getPassRegistry()); -} +IRTranslator::IRTranslator() : MachineFunctionPass(ID) { } #ifndef NDEBUG namespace { @@ -136,7 +136,11 @@ public: LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst << " was copied to " << MI); #endif - assert(CurrInst->getDebugLoc() == MI.getDebugLoc() && + // We allow insts in the entry block to have a debug loc line of 0 because + // they could have originated from constants, and we don't want a jumpy + // debug experience. + assert((CurrInst->getDebugLoc() == MI.getDebugLoc() || + MI.getDebugLoc().getLine() == 0) && "Line info was not transferred to all instructions"); } }; @@ -152,36 +156,6 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -static void computeValueLLTs(const DataLayout &DL, Type &Ty, - SmallVectorImpl<LLT> &ValueTys, - SmallVectorImpl<uint64_t> *Offsets = nullptr, - uint64_t StartingOffset = 0) { - // Given a struct type, recursively traverse the elements. - if (StructType *STy = dyn_cast<StructType>(&Ty)) { - const StructLayout *SL = DL.getStructLayout(STy); - for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I) - computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets, - StartingOffset + SL->getElementOffset(I)); - return; - } - // Given an array type, recursively traverse the elements. - if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) { - Type *EltTy = ATy->getElementType(); - uint64_t EltSize = DL.getTypeAllocSize(EltTy); - for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) - computeValueLLTs(DL, *EltTy, ValueTys, Offsets, - StartingOffset + i * EltSize); - return; - } - // Interpret void as zero return values. - if (Ty.isVoidTy()) - return; - // Base case: we can get an LLT for this LLVM IR type. - ValueTys.push_back(getLLTForType(Ty, DL)); - if (Offsets != nullptr) - Offsets->push_back(StartingOffset * 8); -} - IRTranslator::ValueToVRegInfo::VRegListT & IRTranslator::allocateVRegs(const Value &Val) { assert(!VMap.contains(Val) && "Value already allocated in VMap"); @@ -195,7 +169,7 @@ IRTranslator::allocateVRegs(const Value &Val) { return *Regs; } -ArrayRef<unsigned> IRTranslator::getOrCreateVRegs(const Value &Val) { +ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) { auto VRegsIt = VMap.findVRegs(Val); if (VRegsIt != VMap.vregs_end()) return *VRegsIt->second; @@ -249,7 +223,7 @@ int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) { if (FrameIndices.find(&AI) != FrameIndices.end()) return FrameIndices[&AI]; - unsigned ElementSize = DL->getTypeStoreSize(AI.getAllocatedType()); + unsigned ElementSize = DL->getTypeAllocSize(AI.getAllocatedType()); unsigned Size = ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue(); @@ -311,21 +285,20 @@ void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) { bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U, MachineIRBuilder &MIRBuilder) { - // FIXME: handle signed/unsigned wrapping flags. - // Get or create a virtual register for each value. // Unless the value is a Constant => loadimm cst? // or inline constant each time? // Creation of a virtual register needs to have a size. - unsigned Op0 = getOrCreateVReg(*U.getOperand(0)); - unsigned Op1 = getOrCreateVReg(*U.getOperand(1)); - unsigned Res = getOrCreateVReg(U); - auto FBinOp = MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1); + Register Op0 = getOrCreateVReg(*U.getOperand(0)); + Register Op1 = getOrCreateVReg(*U.getOperand(1)); + Register Res = getOrCreateVReg(U); + uint16_t Flags = 0; if (isa<Instruction>(U)) { - MachineInstr *FBinOpMI = FBinOp.getInstr(); const Instruction &I = cast<Instruction>(U); - FBinOpMI->copyIRFlags(I); + Flags = MachineInstr::copyFlagsFromInstruction(I); } + + MIRBuilder.buildInstr(Opcode, {Res}, {Op0, Op1}, Flags); return true; } @@ -333,27 +306,38 @@ bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) { // -0.0 - X --> G_FNEG if (isa<Constant>(U.getOperand(0)) && U.getOperand(0) == ConstantFP::getZeroValueForNegation(U.getType())) { - MIRBuilder.buildInstr(TargetOpcode::G_FNEG) - .addDef(getOrCreateVReg(U)) - .addUse(getOrCreateVReg(*U.getOperand(1))); + Register Op1 = getOrCreateVReg(*U.getOperand(1)); + Register Res = getOrCreateVReg(U); + uint16_t Flags = 0; + if (isa<Instruction>(U)) { + const Instruction &I = cast<Instruction>(U); + Flags = MachineInstr::copyFlagsFromInstruction(I); + } + // Negate the last operand of the FSUB + MIRBuilder.buildInstr(TargetOpcode::G_FNEG, {Res}, {Op1}, Flags); return true; } return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder); } bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { - MIRBuilder.buildInstr(TargetOpcode::G_FNEG) - .addDef(getOrCreateVReg(U)) - .addUse(getOrCreateVReg(*U.getOperand(1))); + Register Op0 = getOrCreateVReg(*U.getOperand(0)); + Register Res = getOrCreateVReg(U); + uint16_t Flags = 0; + if (isa<Instruction>(U)) { + const Instruction &I = cast<Instruction>(U); + Flags = MachineInstr::copyFlagsFromInstruction(I); + } + MIRBuilder.buildInstr(TargetOpcode::G_FNEG, {Res}, {Op0}, Flags); return true; } bool IRTranslator::translateCompare(const User &U, MachineIRBuilder &MIRBuilder) { const CmpInst *CI = dyn_cast<CmpInst>(&U); - unsigned Op0 = getOrCreateVReg(*U.getOperand(0)); - unsigned Op1 = getOrCreateVReg(*U.getOperand(1)); - unsigned Res = getOrCreateVReg(U); + Register Op0 = getOrCreateVReg(*U.getOperand(0)); + Register Op1 = getOrCreateVReg(*U.getOperand(1)); + Register Res = getOrCreateVReg(U); CmpInst::Predicate Pred = CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>( cast<ConstantExpr>(U).getPredicate()); @@ -366,8 +350,8 @@ bool IRTranslator::translateCompare(const User &U, MIRBuilder.buildCopy( Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType()))); else { - auto FCmp = MIRBuilder.buildFCmp(Pred, Res, Op0, Op1); - FCmp->copyIRFlags(*CI); + MIRBuilder.buildInstr(TargetOpcode::G_FCMP, {Res}, {Pred, Op0, Op1}, + MachineInstr::copyFlagsFromInstruction(*CI)); } return true; @@ -379,15 +363,20 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) { if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0) Ret = nullptr; - ArrayRef<unsigned> VRegs; + ArrayRef<Register> VRegs; if (Ret) VRegs = getOrCreateVRegs(*Ret); + Register SwiftErrorVReg = 0; + if (CLI->supportSwiftError() && SwiftError.getFunctionArg()) { + SwiftErrorVReg = SwiftError.getOrCreateVRegUseAt( + &RI, &MIRBuilder.getMBB(), SwiftError.getFunctionArg()); + } + // The target may mess up with the insertion point, but // this is not important as a return is the last instruction // of the block anyway. - - return CLI->lowerReturn(MIRBuilder, Ret, VRegs); + return CLI->lowerReturn(MIRBuilder, Ret, VRegs, SwiftErrorVReg); } bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { @@ -395,7 +384,7 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { unsigned Succ = 0; if (!BrInst.isUnconditional()) { // We want a G_BRCOND to the true BB followed by an unconditional branch. - unsigned Tst = getOrCreateVReg(*BrInst.getCondition()); + Register Tst = getOrCreateVReg(*BrInst.getCondition()); const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++)); MachineBasicBlock &TrueBB = getMBB(TrueTgt); MIRBuilder.buildBrCond(Tst, TrueBB); @@ -415,48 +404,429 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { return true; } -bool IRTranslator::translateSwitch(const User &U, - MachineIRBuilder &MIRBuilder) { - // For now, just translate as a chain of conditional branches. - // FIXME: could we share most of the logic/code in - // SelectionDAGBuilder::visitSwitch between SelectionDAG and GlobalISel? - // At first sight, it seems most of the logic in there is independent of - // SelectionDAG-specifics and a lot of work went in to optimize switch - // lowering in there. - - const SwitchInst &SwInst = cast<SwitchInst>(U); - const unsigned SwCondValue = getOrCreateVReg(*SwInst.getCondition()); - const BasicBlock *OrigBB = SwInst.getParent(); - - LLT LLTi1 = getLLTForType(*Type::getInt1Ty(U.getContext()), *DL); - for (auto &CaseIt : SwInst.cases()) { - const unsigned CaseValueReg = getOrCreateVReg(*CaseIt.getCaseValue()); - const unsigned Tst = MRI->createGenericVirtualRegister(LLTi1); - MIRBuilder.buildICmp(CmpInst::ICMP_EQ, Tst, CaseValueReg, SwCondValue); - MachineBasicBlock &CurMBB = MIRBuilder.getMBB(); - const BasicBlock *TrueBB = CaseIt.getCaseSuccessor(); - MachineBasicBlock &TrueMBB = getMBB(*TrueBB); - - MIRBuilder.buildBrCond(Tst, TrueMBB); - CurMBB.addSuccessor(&TrueMBB); - addMachineCFGPred({OrigBB, TrueBB}, &CurMBB); - - MachineBasicBlock *FalseMBB = - MF->CreateMachineBasicBlock(SwInst.getParent()); - // Insert the comparison blocks one after the other. - MF->insert(std::next(CurMBB.getIterator()), FalseMBB); - MIRBuilder.buildBr(*FalseMBB); - CurMBB.addSuccessor(FalseMBB); - - MIRBuilder.setMBB(*FalseMBB); - } - // handle default case - const BasicBlock *DefaultBB = SwInst.getDefaultDest(); - MachineBasicBlock &DefaultMBB = getMBB(*DefaultBB); - MIRBuilder.buildBr(DefaultMBB); - MachineBasicBlock &CurMBB = MIRBuilder.getMBB(); - CurMBB.addSuccessor(&DefaultMBB); - addMachineCFGPred({OrigBB, DefaultBB}, &CurMBB); +void IRTranslator::addSuccessorWithProb(MachineBasicBlock *Src, + MachineBasicBlock *Dst, + BranchProbability Prob) { + if (!FuncInfo.BPI) { + Src->addSuccessorWithoutProb(Dst); + return; + } + if (Prob.isUnknown()) + Prob = getEdgeProbability(Src, Dst); + Src->addSuccessor(Dst, Prob); +} + +BranchProbability +IRTranslator::getEdgeProbability(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { + const BasicBlock *SrcBB = Src->getBasicBlock(); + const BasicBlock *DstBB = Dst->getBasicBlock(); + if (!FuncInfo.BPI) { + // If BPI is not available, set the default probability as 1 / N, where N is + // the number of successors. + auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1); + return BranchProbability(1, SuccSize); + } + return FuncInfo.BPI->getEdgeProbability(SrcBB, DstBB); +} + +bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) { + using namespace SwitchCG; + // Extract cases from the switch. + const SwitchInst &SI = cast<SwitchInst>(U); + BranchProbabilityInfo *BPI = FuncInfo.BPI; + CaseClusterVector Clusters; + Clusters.reserve(SI.getNumCases()); + for (auto &I : SI.cases()) { + MachineBasicBlock *Succ = &getMBB(*I.getCaseSuccessor()); + assert(Succ && "Could not find successor mbb in mapping"); + const ConstantInt *CaseVal = I.getCaseValue(); + BranchProbability Prob = + BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex()) + : BranchProbability(1, SI.getNumCases() + 1); + Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob)); + } + + MachineBasicBlock *DefaultMBB = &getMBB(*SI.getDefaultDest()); + + // Cluster adjacent cases with the same destination. We do this at all + // optimization levels because it's cheap to do and will make codegen faster + // if there are many clusters. + sortAndRangeify(Clusters); + + MachineBasicBlock *SwitchMBB = &getMBB(*SI.getParent()); + + // If there is only the default destination, jump there directly. + if (Clusters.empty()) { + SwitchMBB->addSuccessor(DefaultMBB); + if (DefaultMBB != SwitchMBB->getNextNode()) + MIB.buildBr(*DefaultMBB); + return true; + } + + SL->findJumpTables(Clusters, &SI, DefaultMBB); + + LLVM_DEBUG({ + dbgs() << "Case clusters: "; + for (const CaseCluster &C : Clusters) { + if (C.Kind == CC_JumpTable) + dbgs() << "JT:"; + if (C.Kind == CC_BitTests) + dbgs() << "BT:"; + + C.Low->getValue().print(dbgs(), true); + if (C.Low != C.High) { + dbgs() << '-'; + C.High->getValue().print(dbgs(), true); + } + dbgs() << ' '; + } + dbgs() << '\n'; + }); + + assert(!Clusters.empty()); + SwitchWorkList WorkList; + CaseClusterIt First = Clusters.begin(); + CaseClusterIt Last = Clusters.end() - 1; + auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB); + WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); + + // FIXME: At the moment we don't do any splitting optimizations here like + // SelectionDAG does, so this worklist only has one entry. + while (!WorkList.empty()) { + SwitchWorkListItem W = WorkList.back(); + WorkList.pop_back(); + if (!lowerSwitchWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB, MIB)) + return false; + } + return true; +} + +void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT, + MachineBasicBlock *MBB) { + // Emit the code for the jump table + assert(JT.Reg != -1U && "Should lower JT Header first!"); + MachineIRBuilder MIB(*MBB->getParent()); + MIB.setMBB(*MBB); + MIB.setDebugLoc(CurBuilder->getDebugLoc()); + + Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext()); + const LLT PtrTy = getLLTForType(*PtrIRTy, *DL); + + auto Table = MIB.buildJumpTable(PtrTy, JT.JTI); + MIB.buildBrJT(Table.getReg(0), JT.JTI, JT.Reg); +} + +bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT, + SwitchCG::JumpTableHeader &JTH, + MachineBasicBlock *HeaderBB) { + MachineIRBuilder MIB(*HeaderBB->getParent()); + MIB.setMBB(*HeaderBB); + MIB.setDebugLoc(CurBuilder->getDebugLoc()); + + const Value &SValue = *JTH.SValue; + // Subtract the lowest switch case value from the value being switched on. + const LLT SwitchTy = getLLTForType(*SValue.getType(), *DL); + Register SwitchOpReg = getOrCreateVReg(SValue); + auto FirstCst = MIB.buildConstant(SwitchTy, JTH.First); + auto Sub = MIB.buildSub({SwitchTy}, SwitchOpReg, FirstCst); + + // This value may be smaller or larger than the target's pointer type, and + // therefore require extension or truncating. + Type *PtrIRTy = SValue.getType()->getPointerTo(); + const LLT PtrScalarTy = LLT::scalar(DL->getTypeSizeInBits(PtrIRTy)); + Sub = MIB.buildZExtOrTrunc(PtrScalarTy, Sub); + + JT.Reg = Sub.getReg(0); + + if (JTH.OmitRangeCheck) { + if (JT.MBB != HeaderBB->getNextNode()) + MIB.buildBr(*JT.MBB); + return true; + } + + // Emit the range check for the jump table, and branch to the default block + // for the switch statement if the value being switched on exceeds the + // largest case in the switch. + auto Cst = getOrCreateVReg( + *ConstantInt::get(SValue.getType(), JTH.Last - JTH.First)); + Cst = MIB.buildZExtOrTrunc(PtrScalarTy, Cst).getReg(0); + auto Cmp = MIB.buildICmp(CmpInst::ICMP_UGT, LLT::scalar(1), Sub, Cst); + + auto BrCond = MIB.buildBrCond(Cmp.getReg(0), *JT.Default); + + // Avoid emitting unnecessary branches to the next block. + if (JT.MBB != HeaderBB->getNextNode()) + BrCond = MIB.buildBr(*JT.MBB); + return true; +} + +void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, + MachineBasicBlock *SwitchBB, + MachineIRBuilder &MIB) { + Register CondLHS = getOrCreateVReg(*CB.CmpLHS); + Register Cond; + DebugLoc OldDbgLoc = MIB.getDebugLoc(); + MIB.setDebugLoc(CB.DbgLoc); + MIB.setMBB(*CB.ThisBB); + + if (CB.PredInfo.NoCmp) { + // Branch or fall through to TrueBB. + addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb); + addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()}, + CB.ThisBB); + CB.ThisBB->normalizeSuccProbs(); + if (CB.TrueBB != CB.ThisBB->getNextNode()) + MIB.buildBr(*CB.TrueBB); + MIB.setDebugLoc(OldDbgLoc); + return; + } + + const LLT i1Ty = LLT::scalar(1); + // Build the compare. + if (!CB.CmpMHS) { + Register CondRHS = getOrCreateVReg(*CB.CmpRHS); + Cond = MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); + } else { + assert(CB.PredInfo.Pred == CmpInst::ICMP_ULE && + "Can only handle ULE ranges"); + + const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); + const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); + + Register CmpOpReg = getOrCreateVReg(*CB.CmpMHS); + if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { + Register CondRHS = getOrCreateVReg(*CB.CmpRHS); + Cond = + MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, CmpOpReg, CondRHS).getReg(0); + } else { + const LLT &CmpTy = MRI->getType(CmpOpReg); + auto Sub = MIB.buildSub({CmpTy}, CmpOpReg, CondLHS); + auto Diff = MIB.buildConstant(CmpTy, High - Low); + Cond = MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, Sub, Diff).getReg(0); + } + } + + // Update successor info + addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb); + + addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()}, + CB.ThisBB); + + // TrueBB and FalseBB are always different unless the incoming IR is + // degenerate. This only happens when running llc on weird IR. + if (CB.TrueBB != CB.FalseBB) + addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb); + CB.ThisBB->normalizeSuccProbs(); + + // if (SwitchBB->getBasicBlock() != CB.FalseBB->getBasicBlock()) + addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()}, + CB.ThisBB); + + // If the lhs block is the next block, invert the condition so that we can + // fall through to the lhs instead of the rhs block. + if (CB.TrueBB == CB.ThisBB->getNextNode()) { + std::swap(CB.TrueBB, CB.FalseBB); + auto True = MIB.buildConstant(i1Ty, 1); + Cond = MIB.buildInstr(TargetOpcode::G_XOR, {i1Ty}, {Cond, True}, None) + .getReg(0); + } + + MIB.buildBrCond(Cond, *CB.TrueBB); + MIB.buildBr(*CB.FalseBB); + MIB.setDebugLoc(OldDbgLoc); +} + +bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W, + MachineBasicBlock *SwitchMBB, + MachineBasicBlock *CurMBB, + MachineBasicBlock *DefaultMBB, + MachineIRBuilder &MIB, + MachineFunction::iterator BBI, + BranchProbability UnhandledProbs, + SwitchCG::CaseClusterIt I, + MachineBasicBlock *Fallthrough, + bool FallthroughUnreachable) { + using namespace SwitchCG; + MachineFunction *CurMF = SwitchMBB->getParent(); + // FIXME: Optimize away range check based on pivot comparisons. + JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first; + SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second; + BranchProbability DefaultProb = W.DefaultProb; + + // The jump block hasn't been inserted yet; insert it here. + MachineBasicBlock *JumpMBB = JT->MBB; + CurMF->insert(BBI, JumpMBB); + + // Since the jump table block is separate from the switch block, we need + // to keep track of it as a machine predecessor to the default block, + // otherwise we lose the phi edges. + addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()}, + CurMBB); + addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()}, + JumpMBB); + + auto JumpProb = I->Prob; + auto FallthroughProb = UnhandledProbs; + + // If the default statement is a target of the jump table, we evenly + // distribute the default probability to successors of CurMBB. Also + // update the probability on the edge from JumpMBB to Fallthrough. + for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(), + SE = JumpMBB->succ_end(); + SI != SE; ++SI) { + if (*SI == DefaultMBB) { + JumpProb += DefaultProb / 2; + FallthroughProb -= DefaultProb / 2; + JumpMBB->setSuccProbability(SI, DefaultProb / 2); + JumpMBB->normalizeSuccProbs(); + } else { + // Also record edges from the jump table block to it's successors. + addMachineCFGPred({SwitchMBB->getBasicBlock(), (*SI)->getBasicBlock()}, + JumpMBB); + } + } + + // Skip the range check if the fallthrough block is unreachable. + if (FallthroughUnreachable) + JTH->OmitRangeCheck = true; + + if (!JTH->OmitRangeCheck) + addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); + addSuccessorWithProb(CurMBB, JumpMBB, JumpProb); + CurMBB->normalizeSuccProbs(); + + // The jump table header will be inserted in our current block, do the + // range check, and fall through to our fallthrough block. + JTH->HeaderBB = CurMBB; + JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader. + + // If we're in the right place, emit the jump table header right now. + if (CurMBB == SwitchMBB) { + if (!emitJumpTableHeader(*JT, *JTH, CurMBB)) + return false; + JTH->Emitted = true; + } + return true; +} +bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, + Value *Cond, + MachineBasicBlock *Fallthrough, + bool FallthroughUnreachable, + BranchProbability UnhandledProbs, + MachineBasicBlock *CurMBB, + MachineIRBuilder &MIB, + MachineBasicBlock *SwitchMBB) { + using namespace SwitchCG; + const Value *RHS, *LHS, *MHS; + CmpInst::Predicate Pred; + if (I->Low == I->High) { + // Check Cond == I->Low. + Pred = CmpInst::ICMP_EQ; + LHS = Cond; + RHS = I->Low; + MHS = nullptr; + } else { + // Check I->Low <= Cond <= I->High. + Pred = CmpInst::ICMP_ULE; + LHS = I->Low; + MHS = Cond; + RHS = I->High; + } + + // If Fallthrough is unreachable, fold away the comparison. + // The false probability is the sum of all unhandled cases. + CaseBlock CB(Pred, FallthroughUnreachable, LHS, RHS, MHS, I->MBB, Fallthrough, + CurMBB, MIB.getDebugLoc(), I->Prob, UnhandledProbs); + + emitSwitchCase(CB, SwitchMBB, MIB); + return true; +} + +bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W, + Value *Cond, + MachineBasicBlock *SwitchMBB, + MachineBasicBlock *DefaultMBB, + MachineIRBuilder &MIB) { + using namespace SwitchCG; + MachineFunction *CurMF = FuncInfo.MF; + MachineBasicBlock *NextMBB = nullptr; + MachineFunction::iterator BBI(W.MBB); + if (++BBI != FuncInfo.MF->end()) + NextMBB = &*BBI; + + if (EnableOpts) { + // Here, we order cases by probability so the most likely case will be + // checked first. However, two clusters can have the same probability in + // which case their relative ordering is non-deterministic. So we use Low + // as a tie-breaker as clusters are guaranteed to never overlap. + llvm::sort(W.FirstCluster, W.LastCluster + 1, + [](const CaseCluster &a, const CaseCluster &b) { + return a.Prob != b.Prob + ? a.Prob > b.Prob + : a.Low->getValue().slt(b.Low->getValue()); + }); + + // Rearrange the case blocks so that the last one falls through if possible + // without changing the order of probabilities. + for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster;) { + --I; + if (I->Prob > W.LastCluster->Prob) + break; + if (I->Kind == CC_Range && I->MBB == NextMBB) { + std::swap(*I, *W.LastCluster); + break; + } + } + } + + // Compute total probability. + BranchProbability DefaultProb = W.DefaultProb; + BranchProbability UnhandledProbs = DefaultProb; + for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) + UnhandledProbs += I->Prob; + + MachineBasicBlock *CurMBB = W.MBB; + for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) { + bool FallthroughUnreachable = false; + MachineBasicBlock *Fallthrough; + if (I == W.LastCluster) { + // For the last cluster, fall through to the default destination. + Fallthrough = DefaultMBB; + FallthroughUnreachable = isa<UnreachableInst>( + DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg()); + } else { + Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock()); + CurMF->insert(BBI, Fallthrough); + } + UnhandledProbs -= I->Prob; + + switch (I->Kind) { + case CC_BitTests: { + LLVM_DEBUG(dbgs() << "Switch to bit test optimization unimplemented"); + return false; // Bit tests currently unimplemented. + } + case CC_JumpTable: { + if (!lowerJumpTableWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI, + UnhandledProbs, I, Fallthrough, + FallthroughUnreachable)) { + LLVM_DEBUG(dbgs() << "Failed to lower jump table"); + return false; + } + break; + } + case CC_Range: { + if (!lowerSwitchRangeWorkItem(I, Cond, Fallthrough, + FallthroughUnreachable, UnhandledProbs, + CurMBB, MIB, SwitchMBB)) { + LLVM_DEBUG(dbgs() << "Failed to lower switch range"); + return false; + } + break; + } + } + CurMBB = Fallthrough; + } return true; } @@ -465,7 +835,7 @@ bool IRTranslator::translateIndirectBr(const User &U, MachineIRBuilder &MIRBuilder) { const IndirectBrInst &BrInst = cast<IndirectBrInst>(U); - const unsigned Tgt = getOrCreateVReg(*BrInst.getAddress()); + const Register Tgt = getOrCreateVReg(*BrInst.getAddress()); MIRBuilder.buildBrIndirect(Tgt); // Link successors. @@ -476,6 +846,14 @@ bool IRTranslator::translateIndirectBr(const User &U, return true; } +static bool isSwiftError(const Value *V) { + if (auto Arg = dyn_cast<Argument>(V)) + return Arg->hasSwiftErrorAttr(); + if (auto AI = dyn_cast<AllocaInst>(V)) + return AI->isSwiftError(); + return false; +} + bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { const LoadInst &LI = cast<LoadInst>(U); @@ -486,13 +864,25 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { if (DL->getTypeStoreSize(LI.getType()) == 0) return true; - ArrayRef<unsigned> Regs = getOrCreateVRegs(LI); + ArrayRef<Register> Regs = getOrCreateVRegs(LI); ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI); - unsigned Base = getOrCreateVReg(*LI.getPointerOperand()); + Register Base = getOrCreateVReg(*LI.getPointerOperand()); + + Type *OffsetIRTy = DL->getIntPtrType(LI.getPointerOperandType()); + LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); + + if (CLI->supportSwiftError() && isSwiftError(LI.getPointerOperand())) { + assert(Regs.size() == 1 && "swifterror should be single pointer"); + Register VReg = SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(), + LI.getPointerOperand()); + MIRBuilder.buildCopy(Regs[0], VReg); + return true; + } + for (unsigned i = 0; i < Regs.size(); ++i) { - unsigned Addr = 0; - MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8); + Register Addr; + MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8); MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8); unsigned BaseAlign = getMemOpAlignment(LI); @@ -515,13 +905,25 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0) return true; - ArrayRef<unsigned> Vals = getOrCreateVRegs(*SI.getValueOperand()); + ArrayRef<Register> Vals = getOrCreateVRegs(*SI.getValueOperand()); ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand()); - unsigned Base = getOrCreateVReg(*SI.getPointerOperand()); + Register Base = getOrCreateVReg(*SI.getPointerOperand()); + + Type *OffsetIRTy = DL->getIntPtrType(SI.getPointerOperandType()); + LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); + + if (CLI->supportSwiftError() && isSwiftError(SI.getPointerOperand())) { + assert(Vals.size() == 1 && "swifterror should be single pointer"); + + Register VReg = SwiftError.getOrCreateVRegDefAt(&SI, &MIRBuilder.getMBB(), + SI.getPointerOperand()); + MIRBuilder.buildCopy(VReg, Vals[0]); + return true; + } for (unsigned i = 0; i < Vals.size(); ++i) { - unsigned Addr = 0; - MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8); + Register Addr; + MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8); MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8); unsigned BaseAlign = getMemOpAlignment(SI); @@ -562,10 +964,9 @@ bool IRTranslator::translateExtractValue(const User &U, MachineIRBuilder &MIRBuilder) { const Value *Src = U.getOperand(0); uint64_t Offset = getOffsetFromIndices(U, *DL); - ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src); + ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src); ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*Src); - unsigned Idx = std::lower_bound(Offsets.begin(), Offsets.end(), Offset) - - Offsets.begin(); + unsigned Idx = llvm::lower_bound(Offsets, Offset) - Offsets.begin(); auto &DstRegs = allocateVRegs(U); for (unsigned i = 0; i < DstRegs.size(); ++i) @@ -580,8 +981,8 @@ bool IRTranslator::translateInsertValue(const User &U, uint64_t Offset = getOffsetFromIndices(U, *DL); auto &DstRegs = allocateVRegs(U); ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U); - ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src); - ArrayRef<unsigned> InsertedRegs = getOrCreateVRegs(*U.getOperand(1)); + ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src); + ArrayRef<Register> InsertedRegs = getOrCreateVRegs(*U.getOperand(1)); auto InsertedIt = InsertedRegs.begin(); for (unsigned i = 0; i < DstRegs.size(); ++i) { @@ -596,19 +997,19 @@ bool IRTranslator::translateInsertValue(const User &U, bool IRTranslator::translateSelect(const User &U, MachineIRBuilder &MIRBuilder) { - unsigned Tst = getOrCreateVReg(*U.getOperand(0)); - ArrayRef<unsigned> ResRegs = getOrCreateVRegs(U); - ArrayRef<unsigned> Op0Regs = getOrCreateVRegs(*U.getOperand(1)); - ArrayRef<unsigned> Op1Regs = getOrCreateVRegs(*U.getOperand(2)); + Register Tst = getOrCreateVReg(*U.getOperand(0)); + ArrayRef<Register> ResRegs = getOrCreateVRegs(U); + ArrayRef<Register> Op0Regs = getOrCreateVRegs(*U.getOperand(1)); + ArrayRef<Register> Op1Regs = getOrCreateVRegs(*U.getOperand(2)); const SelectInst &SI = cast<SelectInst>(U); - const CmpInst *Cmp = dyn_cast<CmpInst>(SI.getCondition()); + uint16_t Flags = 0; + if (const CmpInst *Cmp = dyn_cast<CmpInst>(SI.getCondition())) + Flags = MachineInstr::copyFlagsFromInstruction(*Cmp); + for (unsigned i = 0; i < ResRegs.size(); ++i) { - auto Select = - MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]); - if (Cmp && isa<FPMathOperator>(Cmp)) { - Select->copyIRFlags(*Cmp); - } + MIRBuilder.buildInstr(TargetOpcode::G_SELECT, {ResRegs[i]}, + {Tst, Op0Regs[i], Op1Regs[i]}, Flags); } return true; @@ -619,7 +1020,7 @@ bool IRTranslator::translateBitCast(const User &U, // If we're bitcasting to the source type, we can reuse the source vreg. if (getLLTForType(*U.getOperand(0)->getType(), *DL) == getLLTForType(*U.getType(), *DL)) { - unsigned SrcReg = getOrCreateVReg(*U.getOperand(0)); + Register SrcReg = getOrCreateVReg(*U.getOperand(0)); auto &Regs = *VMap.getVRegs(U); // If we already assigned a vreg for this bitcast, we can't change that. // Emit a copy to satisfy the users we already emitted. @@ -636,9 +1037,9 @@ bool IRTranslator::translateBitCast(const User &U, bool IRTranslator::translateCast(unsigned Opcode, const User &U, MachineIRBuilder &MIRBuilder) { - unsigned Op = getOrCreateVReg(*U.getOperand(0)); - unsigned Res = getOrCreateVReg(U); - MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op); + Register Op = getOrCreateVReg(*U.getOperand(0)); + Register Res = getOrCreateVReg(U); + MIRBuilder.buildInstr(Opcode, {Res}, {Op}); return true; } @@ -649,7 +1050,7 @@ bool IRTranslator::translateGetElementPtr(const User &U, return false; Value &Op0 = *U.getOperand(0); - unsigned BaseReg = getOrCreateVReg(Op0); + Register BaseReg = getOrCreateVReg(Op0); Type *PtrIRTy = Op0.getType(); LLT PtrTy = getLLTForType(*PtrIRTy, *DL); Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy); @@ -674,43 +1075,43 @@ bool IRTranslator::translateGetElementPtr(const User &U, } if (Offset != 0) { - unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); - unsigned OffsetReg = - getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset)); - MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg); + Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); + LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); + auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset); + MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetMIB.getReg(0)); BaseReg = NewBaseReg; Offset = 0; } - unsigned IdxReg = getOrCreateVReg(*Idx); + Register IdxReg = getOrCreateVReg(*Idx); if (MRI->getType(IdxReg) != OffsetTy) { - unsigned NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy); + Register NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy); MIRBuilder.buildSExtOrTrunc(NewIdxReg, IdxReg); IdxReg = NewIdxReg; } // N = N + Idx * ElementSize; // Avoid doing it for ElementSize of 1. - unsigned GepOffsetReg; + Register GepOffsetReg; if (ElementSize != 1) { - unsigned ElementSizeReg = - getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize)); - GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy); - MIRBuilder.buildMul(GepOffsetReg, ElementSizeReg, IdxReg); + auto ElementSizeMIB = MIRBuilder.buildConstant( + getLLTForType(*OffsetIRTy, *DL), ElementSize); + MIRBuilder.buildMul(GepOffsetReg, ElementSizeMIB.getReg(0), IdxReg); } else GepOffsetReg = IdxReg; - unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); + Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg); BaseReg = NewBaseReg; } } if (Offset != 0) { - unsigned OffsetReg = getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset)); - MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetReg); + auto OffsetMIB = + MIRBuilder.buildConstant(getLLTForType(*OffsetIRTy, *DL), Offset); + MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0)); return true; } @@ -721,6 +1122,19 @@ bool IRTranslator::translateGetElementPtr(const User &U, bool IRTranslator::translateMemfunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, unsigned ID) { + + // If the source is undef, then just emit a nop. + if (isa<UndefValue>(CI.getArgOperand(1))) { + switch (ID) { + case Intrinsic::memmove: + case Intrinsic::memcpy: + case Intrinsic::memset: + return true; + default: + break; + } + } + LLT SizeTy = getLLTForType(*CI.getArgOperand(2)->getType(), *DL); Type *DstTy = CI.getArgOperand(0)->getType(); if (cast<PointerType>(DstTy)->getAddressSpace() != 0 || @@ -752,10 +1166,10 @@ bool IRTranslator::translateMemfunc(const CallInst &CI, return CLI->lowerCall(MIRBuilder, CI.getCallingConv(), MachineOperand::CreateES(Callee), - CallLowering::ArgInfo(0, CI.getType()), Args); + CallLowering::ArgInfo({0}, CI.getType()), Args); } -void IRTranslator::getStackGuard(unsigned DstReg, +void IRTranslator::getStackGuard(Register DstReg, MachineIRBuilder &MIRBuilder) { const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF)); @@ -778,7 +1192,7 @@ void IRTranslator::getStackGuard(unsigned DstReg, bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op, MachineIRBuilder &MIRBuilder) { - ArrayRef<unsigned> ResRegs = getOrCreateVRegs(CI); + ArrayRef<Register> ResRegs = getOrCreateVRegs(CI); MIRBuilder.buildInstr(Op) .addDef(ResRegs[0]) .addDef(ResRegs[1]) @@ -788,19 +1202,123 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op, return true; } +unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { + switch (ID) { + default: + break; + case Intrinsic::bswap: + return TargetOpcode::G_BSWAP; + case Intrinsic::ceil: + return TargetOpcode::G_FCEIL; + case Intrinsic::cos: + return TargetOpcode::G_FCOS; + case Intrinsic::ctpop: + return TargetOpcode::G_CTPOP; + case Intrinsic::exp: + return TargetOpcode::G_FEXP; + case Intrinsic::exp2: + return TargetOpcode::G_FEXP2; + case Intrinsic::fabs: + return TargetOpcode::G_FABS; + case Intrinsic::copysign: + return TargetOpcode::G_FCOPYSIGN; + case Intrinsic::minnum: + return TargetOpcode::G_FMINNUM; + case Intrinsic::maxnum: + return TargetOpcode::G_FMAXNUM; + case Intrinsic::minimum: + return TargetOpcode::G_FMINIMUM; + case Intrinsic::maximum: + return TargetOpcode::G_FMAXIMUM; + case Intrinsic::canonicalize: + return TargetOpcode::G_FCANONICALIZE; + case Intrinsic::floor: + return TargetOpcode::G_FFLOOR; + case Intrinsic::fma: + return TargetOpcode::G_FMA; + case Intrinsic::log: + return TargetOpcode::G_FLOG; + case Intrinsic::log2: + return TargetOpcode::G_FLOG2; + case Intrinsic::log10: + return TargetOpcode::G_FLOG10; + case Intrinsic::nearbyint: + return TargetOpcode::G_FNEARBYINT; + case Intrinsic::pow: + return TargetOpcode::G_FPOW; + case Intrinsic::rint: + return TargetOpcode::G_FRINT; + case Intrinsic::round: + return TargetOpcode::G_INTRINSIC_ROUND; + case Intrinsic::sin: + return TargetOpcode::G_FSIN; + case Intrinsic::sqrt: + return TargetOpcode::G_FSQRT; + case Intrinsic::trunc: + return TargetOpcode::G_INTRINSIC_TRUNC; + } + return Intrinsic::not_intrinsic; +} + +bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI, + Intrinsic::ID ID, + MachineIRBuilder &MIRBuilder) { + + unsigned Op = getSimpleIntrinsicOpcode(ID); + + // Is this a simple intrinsic? + if (Op == Intrinsic::not_intrinsic) + return false; + + // Yes. Let's translate it. + SmallVector<llvm::SrcOp, 4> VRegs; + for (auto &Arg : CI.arg_operands()) + VRegs.push_back(getOrCreateVReg(*Arg)); + + MIRBuilder.buildInstr(Op, {getOrCreateVReg(CI)}, VRegs, + MachineInstr::copyFlagsFromInstruction(CI)); + return true; +} + bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder) { + + // If this is a simple intrinsic (that is, we just need to add a def of + // a vreg, and uses for each arg operand, then translate it. + if (translateSimpleIntrinsic(CI, ID, MIRBuilder)) + return true; + switch (ID) { default: break; case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - // Stack coloring is not enabled in O0 (which we care about now) so we can - // drop these. Make sure someone notices when we start compiling at higher - // opts though. - if (MF->getTarget().getOptLevel() != CodeGenOpt::None) - return false; + case Intrinsic::lifetime_end: { + // No stack colouring in O0, discard region information. + if (MF->getTarget().getOptLevel() == CodeGenOpt::None) + return true; + + unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START + : TargetOpcode::LIFETIME_END; + + // Get the underlying objects for the location passed on the lifetime + // marker. + SmallVector<const Value *, 4> Allocas; + GetUnderlyingObjects(CI.getArgOperand(1), Allocas, *DL); + + // Iterate over each underlying object, creating lifetime markers for each + // static alloca. Quit if we find a non-static alloca. + for (const Value *V : Allocas) { + const AllocaInst *AI = dyn_cast<AllocaInst>(V); + if (!AI) + continue; + + if (!AI->isStaticAlloca()) + return true; + + MIRBuilder.buildInstr(Op).addFrameIndex(getOrCreateFrameIndex(*AI)); + } return true; + } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI); assert(DI.getVariable() && "Missing variable"); @@ -848,10 +1366,11 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, Value *Ptr = CI.getArgOperand(0); unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8; + // FIXME: Get alignment MIRBuilder.buildInstr(TargetOpcode::G_VASTART) .addUse(getOrCreateVReg(*Ptr)) .addMemOperand(MF->getMachineMemOperand( - MachinePointerInfo(Ptr), MachineMemOperand::MOStore, ListSize, 0)); + MachinePointerInfo(Ptr), MachineMemOperand::MOStore, ListSize, 1)); return true; } case Intrinsic::dbg_value: { @@ -868,7 +1387,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, } else if (const auto *CI = dyn_cast<Constant>(V)) { MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression()); } else { - unsigned Reg = getOrCreateVReg(*V); + Register Reg = getOrCreateVReg(*V); // FIXME: This does not handle register-indirect values at offset 0. The // direct/indirect thing shouldn't really be handled by something as // implicit as reg+noreg vs reg+imm in the first palce, but it seems @@ -889,94 +1408,25 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder); case Intrinsic::smul_with_overflow: return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder); - case Intrinsic::pow: { - auto Pow = MIRBuilder.buildInstr(TargetOpcode::G_FPOW) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))) - .addUse(getOrCreateVReg(*CI.getArgOperand(1))); - Pow->copyIRFlags(CI); - return true; - } - case Intrinsic::exp: { - auto Exp = MIRBuilder.buildInstr(TargetOpcode::G_FEXP) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); - Exp->copyIRFlags(CI); - return true; - } - case Intrinsic::exp2: { - auto Exp2 = MIRBuilder.buildInstr(TargetOpcode::G_FEXP2) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); - Exp2->copyIRFlags(CI); - return true; - } - case Intrinsic::log: { - auto Log = MIRBuilder.buildInstr(TargetOpcode::G_FLOG) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); - Log->copyIRFlags(CI); - return true; - } - case Intrinsic::log2: { - auto Log2 = MIRBuilder.buildInstr(TargetOpcode::G_FLOG2) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); - Log2->copyIRFlags(CI); - return true; - } - case Intrinsic::log10: { - auto Log10 = MIRBuilder.buildInstr(TargetOpcode::G_FLOG10) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); - Log10->copyIRFlags(CI); - return true; - } - case Intrinsic::fabs: { - auto Fabs = MIRBuilder.buildInstr(TargetOpcode::G_FABS) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); - Fabs->copyIRFlags(CI); - return true; - } - case Intrinsic::trunc: - MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); - return true; - case Intrinsic::round: - MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); - return true; - case Intrinsic::fma: { - auto FMA = MIRBuilder.buildInstr(TargetOpcode::G_FMA) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))) - .addUse(getOrCreateVReg(*CI.getArgOperand(1))) - .addUse(getOrCreateVReg(*CI.getArgOperand(2))); - FMA->copyIRFlags(CI); - return true; - } case Intrinsic::fmuladd: { const TargetMachine &TM = MF->getTarget(); const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); - unsigned Dst = getOrCreateVReg(CI); - unsigned Op0 = getOrCreateVReg(*CI.getArgOperand(0)); - unsigned Op1 = getOrCreateVReg(*CI.getArgOperand(1)); - unsigned Op2 = getOrCreateVReg(*CI.getArgOperand(2)); + Register Dst = getOrCreateVReg(CI); + Register Op0 = getOrCreateVReg(*CI.getArgOperand(0)); + Register Op1 = getOrCreateVReg(*CI.getArgOperand(1)); + Register Op2 = getOrCreateVReg(*CI.getArgOperand(2)); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) { // TODO: Revisit this to see if we should move this part of the // lowering to the combiner. - auto FMA = MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2}); - FMA->copyIRFlags(CI); + MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2}, + MachineInstr::copyFlagsFromInstruction(CI)); } else { LLT Ty = getLLTForType(*CI.getType(), *DL); - auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, {Ty}, {Op0, Op1}); - FMul->copyIRFlags(CI); - auto FAdd = MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Dst}, {FMul, Op2}); - FAdd->copyIRFlags(CI); + auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, {Ty}, {Op0, Op1}, + MachineInstr::copyFlagsFromInstruction(CI)); + MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Dst}, {FMul, Op2}, + MachineInstr::copyFlagsFromInstruction(CI)); } return true; } @@ -986,7 +1436,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return translateMemfunc(CI, MIRBuilder, ID); case Intrinsic::eh_typeid_for: { GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0)); - unsigned Reg = getOrCreateVReg(CI); + Register Reg = getOrCreateVReg(CI); unsigned TypeID = MF->getTypeIDFor(GV); MIRBuilder.buildConstant(Reg, TypeID); return true; @@ -1008,7 +1458,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return true; case Intrinsic::stackprotector: { LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL); - unsigned GuardVal = MRI->createGenericVirtualRegister(PtrTy); + Register GuardVal = MRI->createGenericVirtualRegister(PtrTy); getStackGuard(GuardVal, MIRBuilder); AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1)); @@ -1023,6 +1473,34 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, PtrTy.getSizeInBits() / 8, 8)); return true; } + case Intrinsic::stacksave: { + // Save the stack pointer to the location provided by the intrinsic. + Register Reg = getOrCreateVReg(CI); + Register StackPtr = MF->getSubtarget() + .getTargetLowering() + ->getStackPointerRegisterToSaveRestore(); + + // If the target doesn't specify a stack pointer, then fall back. + if (!StackPtr) + return false; + + MIRBuilder.buildCopy(Reg, StackPtr); + return true; + } + case Intrinsic::stackrestore: { + // Restore the stack pointer from the location provided by the intrinsic. + Register Reg = getOrCreateVReg(*CI.getArgOperand(0)); + Register StackPtr = MF->getSubtarget() + .getTargetLowering() + ->getStackPointerRegisterToSaveRestore(); + + // If the target doesn't specify a stack pointer, then fall back. + if (!StackPtr) + return false; + + MIRBuilder.buildCopy(StackPtr, Reg); + return true; + } case Intrinsic::cttz: case Intrinsic::ctlz: { ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1)); @@ -1037,24 +1515,18 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, .addUse(getOrCreateVReg(*CI.getArgOperand(0))); return true; } - case Intrinsic::ctpop: { - MIRBuilder.buildInstr(TargetOpcode::G_CTPOP) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); - return true; - } case Intrinsic::invariant_start: { LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL); - unsigned Undef = MRI->createGenericVirtualRegister(PtrTy); + Register Undef = MRI->createGenericVirtualRegister(PtrTy); MIRBuilder.buildUndef(Undef); return true; } case Intrinsic::invariant_end: return true; - case Intrinsic::ceil: - MIRBuilder.buildInstr(TargetOpcode::G_FCEIL) - .addDef(getOrCreateVReg(CI)) - .addUse(getOrCreateVReg(*CI.getArgOperand(0))); + case Intrinsic::assume: + case Intrinsic::var_annotation: + case Intrinsic::sideeffect: + // Discard annotate attributes, assumptions, and artificial side-effects. return true; } return false; @@ -1079,34 +1551,6 @@ bool IRTranslator::translateInlineAsm(const CallInst &CI, return true; } -unsigned IRTranslator::packRegs(const Value &V, - MachineIRBuilder &MIRBuilder) { - ArrayRef<unsigned> Regs = getOrCreateVRegs(V); - ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V); - LLT BigTy = getLLTForType(*V.getType(), *DL); - - if (Regs.size() == 1) - return Regs[0]; - - unsigned Dst = MRI->createGenericVirtualRegister(BigTy); - MIRBuilder.buildUndef(Dst); - for (unsigned i = 0; i < Regs.size(); ++i) { - unsigned NewDst = MRI->createGenericVirtualRegister(BigTy); - MIRBuilder.buildInsert(NewDst, Dst, Regs[i], Offsets[i]); - Dst = NewDst; - } - return Dst; -} - -void IRTranslator::unpackRegs(const Value &V, unsigned Src, - MachineIRBuilder &MIRBuilder) { - ArrayRef<unsigned> Regs = getOrCreateVRegs(V); - ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V); - - for (unsigned i = 0; i < Regs.size(); ++i) - MIRBuilder.buildExtract(Regs[i], Src, Offsets[i]); -} - bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { const CallInst &CI = cast<CallInst>(U); auto TII = MF->getTarget().getIntrinsicInfo(); @@ -1126,23 +1570,32 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F)); } - bool IsSplitType = valueIsSplit(CI); if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) { - unsigned Res = IsSplitType ? MRI->createGenericVirtualRegister( - getLLTForType(*CI.getType(), *DL)) - : getOrCreateVReg(CI); - - SmallVector<unsigned, 8> Args; - for (auto &Arg: CI.arg_operands()) - Args.push_back(packRegs(*Arg, MIRBuilder)); + ArrayRef<Register> Res = getOrCreateVRegs(CI); + + SmallVector<ArrayRef<Register>, 8> Args; + Register SwiftInVReg = 0; + Register SwiftErrorVReg = 0; + for (auto &Arg: CI.arg_operands()) { + if (CLI->supportSwiftError() && isSwiftError(Arg)) { + assert(SwiftInVReg == 0 && "Expected only one swift error argument"); + LLT Ty = getLLTForType(*Arg->getType(), *DL); + SwiftInVReg = MRI->createGenericVirtualRegister(Ty); + MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt( + &CI, &MIRBuilder.getMBB(), Arg)); + Args.emplace_back(makeArrayRef(SwiftInVReg)); + SwiftErrorVReg = + SwiftError.getOrCreateVRegDefAt(&CI, &MIRBuilder.getMBB(), Arg); + continue; + } + Args.push_back(getOrCreateVRegs(*Arg)); + } MF->getFrameInfo().setHasCalls(true); - bool Success = CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() { - return getOrCreateVReg(*CI.getCalledValue()); - }); + bool Success = + CLI->lowerCall(MIRBuilder, &CI, Res, Args, SwiftErrorVReg, + [&]() { return getOrCreateVReg(*CI.getCalledValue()); }); - if (IsSplitType) - unpackRegs(CI, Res, MIRBuilder); return Success; } @@ -1151,35 +1604,39 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { if (translateKnownIntrinsic(CI, ID, MIRBuilder)) return true; - unsigned Res = 0; - if (!CI.getType()->isVoidTy()) { - if (IsSplitType) - Res = - MRI->createGenericVirtualRegister(getLLTForType(*CI.getType(), *DL)); - else - Res = getOrCreateVReg(CI); - } + ArrayRef<Register> ResultRegs; + if (!CI.getType()->isVoidTy()) + ResultRegs = getOrCreateVRegs(CI); + + // Ignore the callsite attributes. Backend code is most likely not expecting + // an intrinsic to sometimes have side effects and sometimes not. MachineInstrBuilder MIB = - MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory()); + MIRBuilder.buildIntrinsic(ID, ResultRegs, !F->doesNotAccessMemory()); + if (isa<FPMathOperator>(CI)) + MIB->copyIRFlags(CI); for (auto &Arg : CI.arg_operands()) { // Some intrinsics take metadata parameters. Reject them. if (isa<MetadataAsValue>(Arg)) return false; - MIB.addUse(packRegs(*Arg, MIRBuilder)); + ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg); + if (VRegs.size() > 1) + return false; + MIB.addUse(VRegs[0]); } - if (IsSplitType) - unpackRegs(CI, Res, MIRBuilder); - // Add a MachineMemOperand if it is a target mem intrinsic. const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); TargetLowering::IntrinsicInfo Info; // TODO: Add a GlobalISel version of getTgtMemIntrinsic. if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) { + unsigned Align = Info.align; + if (Align == 0) + Align = DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext())); + uint64_t Size = Info.memVT.getStoreSize(); MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), - Info.flags, Size, Info.align)); + Info.flags, Size, Align)); } return true; @@ -1215,18 +1672,32 @@ bool IRTranslator::translateInvoke(const User &U, MCSymbol *BeginSymbol = Context.createTempSymbol(); MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol); - unsigned Res = - MRI->createGenericVirtualRegister(getLLTForType(*I.getType(), *DL)); - SmallVector<unsigned, 8> Args; - for (auto &Arg: I.arg_operands()) - Args.push_back(packRegs(*Arg, MIRBuilder)); + ArrayRef<Register> Res; + if (!I.getType()->isVoidTy()) + Res = getOrCreateVRegs(I); + SmallVector<ArrayRef<Register>, 8> Args; + Register SwiftErrorVReg = 0; + Register SwiftInVReg = 0; + for (auto &Arg : I.arg_operands()) { + if (CLI->supportSwiftError() && isSwiftError(Arg)) { + assert(SwiftInVReg == 0 && "Expected only one swift error argument"); + LLT Ty = getLLTForType(*Arg->getType(), *DL); + SwiftInVReg = MRI->createGenericVirtualRegister(Ty); + MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt( + &I, &MIRBuilder.getMBB(), Arg)); + Args.push_back(makeArrayRef(SwiftInVReg)); + SwiftErrorVReg = + SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg); + continue; + } + + Args.push_back(getOrCreateVRegs(*Arg)); + } - if (!CLI->lowerCall(MIRBuilder, &I, Res, Args, + if (!CLI->lowerCall(MIRBuilder, &I, Res, Args, SwiftErrorVReg, [&]() { return getOrCreateVReg(*I.getCalledValue()); })) return false; - unpackRegs(I, Res, MIRBuilder); - MCSymbol *EndSymbol = Context.createTempSymbol(); MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol); @@ -1241,6 +1712,12 @@ bool IRTranslator::translateInvoke(const User &U, return true; } +bool IRTranslator::translateCallBr(const User &U, + MachineIRBuilder &MIRBuilder) { + // FIXME: Implement this. + return false; +} + bool IRTranslator::translateLandingPad(const User &U, MachineIRBuilder &MIRBuilder) { const LandingPadInst &LP = cast<LandingPadInst>(U); @@ -1270,7 +1747,7 @@ bool IRTranslator::translateLandingPad(const User &U, .addSym(MF->addLandingPad(&MBB)); LLT Ty = getLLTForType(*LP.getType(), *DL); - unsigned Undef = MRI->createGenericVirtualRegister(Ty); + Register Undef = MRI->createGenericVirtualRegister(Ty); MIRBuilder.buildUndef(Undef); SmallVector<LLT, 2> Tys; @@ -1279,20 +1756,20 @@ bool IRTranslator::translateLandingPad(const User &U, assert(Tys.size() == 2 && "Only two-valued landingpads are supported"); // Mark exception register as live in. - unsigned ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn); + Register ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn); if (!ExceptionReg) return false; MBB.addLiveIn(ExceptionReg); - ArrayRef<unsigned> ResRegs = getOrCreateVRegs(LP); + ArrayRef<Register> ResRegs = getOrCreateVRegs(LP); MIRBuilder.buildCopy(ResRegs[0], ExceptionReg); - unsigned SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn); + Register SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn); if (!SelectorReg) return false; MBB.addLiveIn(SelectorReg); - unsigned PtrVReg = MRI->createGenericVirtualRegister(Tys[0]); + Register PtrVReg = MRI->createGenericVirtualRegister(Tys[0]); MIRBuilder.buildCopy(PtrVReg, SelectorReg); MIRBuilder.buildCast(ResRegs[1], PtrVReg); @@ -1304,10 +1781,10 @@ bool IRTranslator::translateAlloca(const User &U, auto &AI = cast<AllocaInst>(U); if (AI.isSwiftError()) - return false; + return true; if (AI.isStaticAlloca()) { - unsigned Res = getOrCreateVReg(AI); + Register Res = getOrCreateVReg(AI); int FI = getOrCreateFrameIndex(AI); MIRBuilder.buildFrameIndex(Res, FI); return true; @@ -1322,29 +1799,29 @@ bool IRTranslator::translateAlloca(const User &U, unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI.getAlignment()); - unsigned NumElts = getOrCreateVReg(*AI.getArraySize()); + Register NumElts = getOrCreateVReg(*AI.getArraySize()); Type *IntPtrIRTy = DL->getIntPtrType(AI.getType()); LLT IntPtrTy = getLLTForType(*IntPtrIRTy, *DL); if (MRI->getType(NumElts) != IntPtrTy) { - unsigned ExtElts = MRI->createGenericVirtualRegister(IntPtrTy); + Register ExtElts = MRI->createGenericVirtualRegister(IntPtrTy); MIRBuilder.buildZExtOrTrunc(ExtElts, NumElts); NumElts = ExtElts; } - unsigned AllocSize = MRI->createGenericVirtualRegister(IntPtrTy); - unsigned TySize = + Register AllocSize = MRI->createGenericVirtualRegister(IntPtrTy); + Register TySize = getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, -DL->getTypeAllocSize(Ty))); MIRBuilder.buildMul(AllocSize, NumElts, TySize); LLT PtrTy = getLLTForType(*AI.getType(), *DL); auto &TLI = *MF->getSubtarget().getTargetLowering(); - unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); + Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); - unsigned SPTmp = MRI->createGenericVirtualRegister(PtrTy); + Register SPTmp = MRI->createGenericVirtualRegister(PtrTy); MIRBuilder.buildCopy(SPTmp, SPReg); - unsigned AllocTmp = MRI->createGenericVirtualRegister(PtrTy); + Register AllocTmp = MRI->createGenericVirtualRegister(PtrTy); MIRBuilder.buildGEP(AllocTmp, SPTmp, AllocSize); // Handle alignment. We have to realign if the allocation granule was smaller @@ -1357,7 +1834,7 @@ bool IRTranslator::translateAlloca(const User &U, // Round the size of the allocation up to the stack alignment size // by add SA-1 to the size. This doesn't overflow because we're computing // an address inside an alloca. - unsigned AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy); + Register AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy); MIRBuilder.buildPtrMask(AlignedAlloc, AllocTmp, Log2_32(Align)); AllocTmp = AlignedAlloc; } @@ -1387,7 +1864,7 @@ bool IRTranslator::translateInsertElement(const User &U, // If it is a <1 x Ty> vector, use the scalar as it is // not a legal vector type in LLT. if (U.getType()->getVectorNumElements() == 1) { - unsigned Elt = getOrCreateVReg(*U.getOperand(1)); + Register Elt = getOrCreateVReg(*U.getOperand(1)); auto &Regs = *VMap.getVRegs(U); if (Regs.empty()) { Regs.push_back(Elt); @@ -1398,10 +1875,10 @@ bool IRTranslator::translateInsertElement(const User &U, return true; } - unsigned Res = getOrCreateVReg(U); - unsigned Val = getOrCreateVReg(*U.getOperand(0)); - unsigned Elt = getOrCreateVReg(*U.getOperand(1)); - unsigned Idx = getOrCreateVReg(*U.getOperand(2)); + Register Res = getOrCreateVReg(U); + Register Val = getOrCreateVReg(*U.getOperand(0)); + Register Elt = getOrCreateVReg(*U.getOperand(1)); + Register Idx = getOrCreateVReg(*U.getOperand(2)); MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx); return true; } @@ -1411,7 +1888,7 @@ bool IRTranslator::translateExtractElement(const User &U, // If it is a <1 x Ty> vector, use the scalar as it is // not a legal vector type in LLT. if (U.getOperand(0)->getType()->getVectorNumElements() == 1) { - unsigned Elt = getOrCreateVReg(*U.getOperand(0)); + Register Elt = getOrCreateVReg(*U.getOperand(0)); auto &Regs = *VMap.getVRegs(U); if (Regs.empty()) { Regs.push_back(Elt); @@ -1421,11 +1898,11 @@ bool IRTranslator::translateExtractElement(const User &U, } return true; } - unsigned Res = getOrCreateVReg(U); - unsigned Val = getOrCreateVReg(*U.getOperand(0)); + Register Res = getOrCreateVReg(U); + Register Val = getOrCreateVReg(*U.getOperand(0)); const auto &TLI = *MF->getSubtarget().getTargetLowering(); unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits(); - unsigned Idx = 0; + Register Idx; if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) { if (CI->getBitWidth() != PreferredVecIdxWidth) { APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth); @@ -1481,11 +1958,11 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U, Type *ValType = ResType->Type::getStructElementType(0); auto Res = getOrCreateVRegs(I); - unsigned OldValRes = Res[0]; - unsigned SuccessRes = Res[1]; - unsigned Addr = getOrCreateVReg(*I.getPointerOperand()); - unsigned Cmp = getOrCreateVReg(*I.getCompareOperand()); - unsigned NewVal = getOrCreateVReg(*I.getNewValOperand()); + Register OldValRes = Res[0]; + Register SuccessRes = Res[1]; + Register Addr = getOrCreateVReg(*I.getPointerOperand()); + Register Cmp = getOrCreateVReg(*I.getCompareOperand()); + Register NewVal = getOrCreateVReg(*I.getNewValOperand()); MIRBuilder.buildAtomicCmpXchgWithSuccess( OldValRes, SuccessRes, Addr, Cmp, NewVal, @@ -1507,9 +1984,9 @@ bool IRTranslator::translateAtomicRMW(const User &U, Type *ResType = I.getType(); - unsigned Res = getOrCreateVReg(I); - unsigned Addr = getOrCreateVReg(*I.getPointerOperand()); - unsigned Val = getOrCreateVReg(*I.getValOperand()); + Register Res = getOrCreateVReg(I); + Register Addr = getOrCreateVReg(*I.getPointerOperand()); + Register Val = getOrCreateVReg(*I.getValOperand()); unsigned Opcode = 0; switch (I.getOperation()) { @@ -1560,6 +2037,14 @@ bool IRTranslator::translateAtomicRMW(const User &U, return true; } +bool IRTranslator::translateFence(const User &U, + MachineIRBuilder &MIRBuilder) { + const FenceInst &Fence = cast<FenceInst>(U); + MIRBuilder.buildFence(static_cast<unsigned>(Fence.getOrdering()), + Fence.getSyncScopeID()); + return true; +} + void IRTranslator::finishPendingPhis() { #ifndef NDEBUG DILocationVerifier Verifier; @@ -1569,27 +2054,20 @@ void IRTranslator::finishPendingPhis() { for (auto &Phi : PendingPHIs) { const PHINode *PI = Phi.first; ArrayRef<MachineInstr *> ComponentPHIs = Phi.second; + MachineBasicBlock *PhiMBB = ComponentPHIs[0]->getParent(); EntryBuilder->setDebugLoc(PI->getDebugLoc()); #ifndef NDEBUG Verifier.setCurrentInst(PI); #endif // ifndef NDEBUG - // All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator - // won't create extra control flow here, otherwise we need to find the - // dominating predecessor here (or perhaps force the weirder IRTranslators - // to provide a simple boundary). - SmallSet<const BasicBlock *, 4> HandledPreds; - + SmallSet<const MachineBasicBlock *, 16> SeenPreds; for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) { auto IRPred = PI->getIncomingBlock(i); - if (HandledPreds.count(IRPred)) - continue; - - HandledPreds.insert(IRPred); - ArrayRef<unsigned> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i)); + ArrayRef<Register> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i)); for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) { - assert(Pred->isSuccessor(ComponentPHIs[0]->getParent()) && - "incorrect CFG at MachineBasicBlock level"); + if (SeenPreds.count(Pred) || !PhiMBB->isPredecessor(Pred)) + continue; + SeenPreds.insert(Pred); for (unsigned j = 0; j < ValRegs.size(); ++j) { MachineInstrBuilder MIB(*MF, ComponentPHIs[j]); MIB.addUse(ValRegs[j]); @@ -1611,8 +2089,15 @@ bool IRTranslator::valueIsSplit(const Value &V, bool IRTranslator::translate(const Instruction &Inst) { CurBuilder->setDebugLoc(Inst.getDebugLoc()); - EntryBuilder->setDebugLoc(Inst.getDebugLoc()); - switch(Inst.getOpcode()) { + // We only emit constants into the entry block from here. To prevent jumpy + // debug behaviour set the line to 0. + if (const DebugLoc &DL = Inst.getDebugLoc()) + EntryBuilder->setDebugLoc( + DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt())); + else + EntryBuilder->setDebugLoc(DebugLoc()); + + switch (Inst.getOpcode()) { #define HANDLE_INST(NUM, OPCODE, CLASS) \ case Instruction::OPCODE: \ return translate##OPCODE(Inst, *CurBuilder.get()); @@ -1622,7 +2107,7 @@ bool IRTranslator::translate(const Instruction &Inst) { } } -bool IRTranslator::translate(const Constant &C, unsigned Reg) { +bool IRTranslator::translate(const Constant &C, Register Reg) { if (auto CI = dyn_cast<ConstantInt>(&C)) EntryBuilder->buildConstant(Reg, *CI); else if (auto CF = dyn_cast<ConstantFP>(&C)) @@ -1635,7 +2120,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) { unsigned NullSize = DL->getTypeSizeInBits(C.getType()); auto *ZeroTy = Type::getIntNTy(C.getContext(), NullSize); auto *ZeroVal = ConstantInt::get(ZeroTy, 0); - unsigned ZeroReg = getOrCreateVReg(*ZeroVal); + Register ZeroReg = getOrCreateVReg(*ZeroVal); EntryBuilder->buildCast(Reg, ZeroReg); } else if (auto GV = dyn_cast<GlobalValue>(&C)) EntryBuilder->buildGlobalValue(Reg, GV); @@ -1645,7 +2130,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) { // Return the scalar if it is a <1 x Ty> vector. if (CAZ->getNumElements() == 1) return translate(*CAZ->getElementValue(0u), Reg); - SmallVector<unsigned, 4> Ops; + SmallVector<Register, 4> Ops; for (unsigned i = 0; i < CAZ->getNumElements(); ++i) { Constant &Elt = *CAZ->getElementValue(i); Ops.push_back(getOrCreateVReg(Elt)); @@ -1655,7 +2140,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) { // Return the scalar if it is a <1 x Ty> vector. if (CV->getNumElements() == 1) return translate(*CV->getElementAsConstant(0), Reg); - SmallVector<unsigned, 4> Ops; + SmallVector<Register, 4> Ops; for (unsigned i = 0; i < CV->getNumElements(); ++i) { Constant &Elt = *CV->getElementAsConstant(i); Ops.push_back(getOrCreateVReg(Elt)); @@ -1673,7 +2158,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) { } else if (auto CV = dyn_cast<ConstantVector>(&C)) { if (CV->getNumOperands() == 1) return translate(*CV->getOperand(0), Reg); - SmallVector<unsigned, 4> Ops; + SmallVector<Register, 4> Ops; for (unsigned i = 0; i < CV->getNumOperands(); ++i) { Ops.push_back(getOrCreateVReg(*CV->getOperand(i))); } @@ -1686,6 +2171,17 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) { return true; } +void IRTranslator::finalizeBasicBlock() { + for (auto &JTCase : SL->JTCases) { + // Emit header first, if it wasn't already emitted. + if (!JTCase.first.Emitted) + emitJumpTableHeader(JTCase.second, JTCase.first, JTCase.first.HeaderBB); + + emitJumpTable(JTCase.second, JTCase.second.MBB); + } + SL->JTCases.clear(); +} + void IRTranslator::finalizeFunction() { // Release the memory used by the different maps we // needed during the translation. @@ -1698,6 +2194,7 @@ void IRTranslator::finalizeFunction() { // destroying it twice (in ~IRTranslator() and ~LLVMContext()) EntryBuilder.reset(); CurBuilder.reset(); + FuncInfo.clear(); } bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { @@ -1710,13 +2207,13 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { // Set the CSEConfig and run the analysis. GISelCSEInfo *CSEInfo = nullptr; TPC = &getAnalysis<TargetPassConfig>(); - bool IsO0 = TPC->getOptLevel() == CodeGenOpt::Level::None; - // Disable CSE for O0. - bool EnableCSE = !IsO0 && EnableCSEInIRTranslator; + bool EnableCSE = EnableCSEInIRTranslator.getNumOccurrences() + ? EnableCSEInIRTranslator + : TPC->isGISelCSEEnabled(); + if (EnableCSE) { EntryBuilder = make_unique<CSEMIRBuilder>(CurMF); - std::unique_ptr<CSEConfig> Config = make_unique<CSEConfig>(); - CSEInfo = &Wrapper.get(std::move(Config)); + CSEInfo = &Wrapper.get(TPC->getCSEConfig()); EntryBuilder->setCSEInfo(CSEInfo); CurBuilder = make_unique<CSEMIRBuilder>(CurMF); CurBuilder->setCSEInfo(CSEInfo); @@ -1730,6 +2227,14 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { MRI = &MF->getRegInfo(); DL = &F.getParent()->getDataLayout(); ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F); + FuncInfo.MF = MF; + FuncInfo.BPI = nullptr; + const auto &TLI = *MF->getSubtarget().getTargetLowering(); + const TargetMachine &TM = MF->getTarget(); + SL = make_unique<GISelSwitchLowering>(this, FuncInfo); + SL->init(TLI, TM, *DL); + + EnableOpts = TM.getOptLevel() != CodeGenOpt::None && !skipFunction(F); assert(PendingPHIs.empty() && "stale PHIs"); @@ -1749,6 +2254,10 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { MF->push_back(EntryBB); EntryBuilder->setMBB(*EntryBB); + DebugLoc DbgLoc = F.getEntryBlock().getFirstNonPHI()->getDebugLoc(); + SwiftError.setFunction(CurMF); + SwiftError.createEntriesInEntryBlock(DbgLoc); + // Create all blocks, in IR order, to preserve the layout. for (const BasicBlock &BB: F) { auto *&MBB = BBToMBB[&BB]; @@ -1764,20 +2273,25 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { EntryBB->addSuccessor(&getMBB(F.front())); // Lower the actual args into this basic block. - SmallVector<unsigned, 8> VRegArgs; + SmallVector<ArrayRef<Register>, 8> VRegArgs; for (const Argument &Arg: F.args()) { if (DL->getTypeStoreSize(Arg.getType()) == 0) continue; // Don't handle zero sized types. - VRegArgs.push_back( - MRI->createGenericVirtualRegister(getLLTForType(*Arg.getType(), *DL))); + ArrayRef<Register> VRegs = getOrCreateVRegs(Arg); + VRegArgs.push_back(VRegs); + + if (Arg.hasSwiftErrorAttr()) { + assert(VRegs.size() == 1 && "Too many vregs for Swift error"); + SwiftError.setCurrentVReg(EntryBB, SwiftError.getFunctionArg(), VRegs[0]); + } } // We don't currently support translating swifterror or swiftself functions. for (auto &Arg : F.args()) { - if (Arg.hasSwiftErrorAttr() || Arg.hasSwiftSelfAttr()) { + if (Arg.hasSwiftSelfAttr()) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", F.getSubprogram(), &F.getEntryBlock()); - R << "unable to lower arguments due to swifterror/swiftself: " + R << "unable to lower arguments due to swiftself: " << ore::NV("Prototype", F.getType()); reportTranslationError(*MF, *TPC, *ORE, R); return false; @@ -1792,20 +2306,6 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { return false; } - auto ArgIt = F.arg_begin(); - for (auto &VArg : VRegArgs) { - // If the argument is an unsplit scalar then don't use unpackRegs to avoid - // creating redundant copies. - if (!valueIsSplit(*ArgIt, VMap.getOffsets(*ArgIt))) { - auto &VRegs = *VMap.getVRegs(cast<Value>(*ArgIt)); - assert(VRegs.empty() && "VRegs already populated?"); - VRegs.push_back(VArg); - } else { - unpackRegs(*ArgIt, VArg, *EntryBuilder.get()); - } - ArgIt++; - } - // Need to visit defs before uses when translating instructions. GISelObserverWrapper WrapperObserver; if (EnableCSE && CSEInfo) @@ -1845,6 +2345,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { reportTranslationError(*MF, *TPC, *ORE, R); return false; } + + finalizeBasicBlock(); } #ifndef NDEBUG WrapperObserver.removeObserver(&Verifier); @@ -1853,6 +2355,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { finishPendingPhis(); + SwiftError.propagateVRegs(); + // Merge the argument lowering and constants block with its single // successor, the LLVM-IR entry block. We want the basic block to // be maximal. |
