diff options
Diffstat (limited to 'llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp')
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 894 |
1 files changed, 815 insertions, 79 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 8f6643b2f193..b97c369b832d 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -29,9 +29,11 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/SwitchLoweringUtils.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -48,11 +50,13 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -70,6 +74,7 @@ #include "llvm/Target/TargetMachine.h" #include <algorithm> #include <cassert> +#include <cstddef> #include <cstdint> #include <iterator> #include <string> @@ -90,6 +95,8 @@ INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(StackProtector) INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", false, false) @@ -110,7 +117,8 @@ static void reportTranslationError(MachineFunction &MF, ORE.emit(R); } -IRTranslator::IRTranslator() : MachineFunctionPass(ID) { } +IRTranslator::IRTranslator(CodeGenOpt::Level optlevel) + : MachineFunctionPass(ID), OptLevel(optlevel) {} #ifndef NDEBUG namespace { @@ -154,13 +162,17 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<StackProtector>(); AU.addRequired<TargetPassConfig>(); AU.addRequired<GISelCSEAnalysisWrapperPass>(); + if (OptLevel != CodeGenOpt::None) + AU.addRequired<BranchProbabilityInfoWrapperPass>(); getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } IRTranslator::ValueToVRegInfo::VRegListT & IRTranslator::allocateVRegs(const Value &Val) { - assert(!VMap.contains(Val) && "Value already allocated in VMap"); + auto VRegsIt = VMap.findVRegs(Val); + if (VRegsIt != VMap.vregs_end()) + return *VRegsIt->second; auto *Regs = VMap.getVRegs(Val); auto *Offsets = VMap.getOffsets(Val); SmallVector<LLT, 4> SplitTys; @@ -222,8 +234,9 @@ ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) { } int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) { - if (FrameIndices.find(&AI) != FrameIndices.end()) - return FrameIndices[&AI]; + auto MapEntry = FrameIndices.find(&AI); + if (MapEntry != FrameIndices.end()) + return MapEntry->second; uint64_t ElementSize = DL->getTypeAllocSize(AI.getAllocatedType()); uint64_t Size = @@ -293,25 +306,8 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U, return true; } -bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) { - // -0.0 - X --> G_FNEG - if (isa<Constant>(U.getOperand(0)) && - U.getOperand(0) == ConstantFP::getZeroValueForNegation(U.getType())) { - Register Op1 = getOrCreateVReg(*U.getOperand(1)); - Register Res = getOrCreateVReg(U); - uint16_t Flags = 0; - if (isa<Instruction>(U)) { - const Instruction &I = cast<Instruction>(U); - Flags = MachineInstr::copyFlagsFromInstruction(I); - } - // Negate the last operand of the FSUB - MIRBuilder.buildFNeg(Res, Op1, Flags); - return true; - } - return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder); -} - -bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { +bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U, + MachineIRBuilder &MIRBuilder) { Register Op0 = getOrCreateVReg(*U.getOperand(0)); Register Res = getOrCreateVReg(U); uint16_t Flags = 0; @@ -319,10 +315,14 @@ bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { const Instruction &I = cast<Instruction>(U); Flags = MachineInstr::copyFlagsFromInstruction(I); } - MIRBuilder.buildFNeg(Res, Op0, Flags); + MIRBuilder.buildInstr(Opcode, {Res}, {Op0}, Flags); return true; } +bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { + return translateUnaryOp(TargetOpcode::G_FNEG, U, MIRBuilder); +} + bool IRTranslator::translateCompare(const User &U, MachineIRBuilder &MIRBuilder) { auto *CI = dyn_cast<CmpInst>(&U); @@ -368,31 +368,289 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) { // The target may mess up with the insertion point, but // this is not important as a return is the last instruction // of the block anyway. - return CLI->lowerReturn(MIRBuilder, Ret, VRegs, SwiftErrorVReg); + return CLI->lowerReturn(MIRBuilder, Ret, VRegs, FuncInfo, SwiftErrorVReg); +} + +void IRTranslator::emitBranchForMergedCondition( + const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, + BranchProbability TProb, BranchProbability FProb, bool InvertCond) { + // If the leaf of the tree is a comparison, merge the condition into + // the caseblock. + if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { + CmpInst::Predicate Condition; + if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { + Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate(); + } else { + const FCmpInst *FC = cast<FCmpInst>(Cond); + Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate(); + } + + SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(0), + BOp->getOperand(1), nullptr, TBB, FBB, CurBB, + CurBuilder->getDebugLoc(), TProb, FProb); + SL->SwitchCases.push_back(CB); + return; + } + + // Create a CaseBlock record representing this branch. + CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ; + SwitchCG::CaseBlock CB( + Pred, false, Cond, ConstantInt::getTrue(MF->getFunction().getContext()), + nullptr, TBB, FBB, CurBB, CurBuilder->getDebugLoc(), TProb, FProb); + SL->SwitchCases.push_back(CB); +} + +static bool isValInBlock(const Value *V, const BasicBlock *BB) { + if (const Instruction *I = dyn_cast<Instruction>(V)) + return I->getParent() == BB; + return true; +} + +void IRTranslator::findMergedConditions( + const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, + Instruction::BinaryOps Opc, BranchProbability TProb, + BranchProbability FProb, bool InvertCond) { + using namespace PatternMatch; + assert((Opc == Instruction::And || Opc == Instruction::Or) && + "Expected Opc to be AND/OR"); + // Skip over not part of the tree and remember to invert op and operands at + // next level. + Value *NotCond; + if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) && + isValInBlock(NotCond, CurBB->getBasicBlock())) { + findMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb, + !InvertCond); + return; + } + + const Instruction *BOp = dyn_cast<Instruction>(Cond); + const Value *BOpOp0, *BOpOp1; + // Compute the effective opcode for Cond, taking into account whether it needs + // to be inverted, e.g. + // and (not (or A, B)), C + // gets lowered as + // and (and (not A, not B), C) + Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0; + if (BOp) { + BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1))) + ? Instruction::And + : (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1))) + ? Instruction::Or + : (Instruction::BinaryOps)0); + if (InvertCond) { + if (BOpc == Instruction::And) + BOpc = Instruction::Or; + else if (BOpc == Instruction::Or) + BOpc = Instruction::And; + } + } + + // If this node is not part of the or/and tree, emit it as a branch. + // Note that all nodes in the tree should have same opcode. + bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse(); + if (!BOpIsInOrAndTree || BOp->getParent() != CurBB->getBasicBlock() || + !isValInBlock(BOpOp0, CurBB->getBasicBlock()) || + !isValInBlock(BOpOp1, CurBB->getBasicBlock())) { + emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb, + InvertCond); + return; + } + + // Create TmpBB after CurBB. + MachineFunction::iterator BBI(CurBB); + MachineBasicBlock *TmpBB = + MF->CreateMachineBasicBlock(CurBB->getBasicBlock()); + CurBB->getParent()->insert(++BBI, TmpBB); + + if (Opc == Instruction::Or) { + // Codegen X | Y as: + // BB1: + // jmp_if_X TBB + // jmp TmpBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) + // = TrueProb for original BB. + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to + // A/(1+B) and 2B/(1+B). This choice assumes that + // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. + // Another choice is to assume TrueProb for BB1 equals to TrueProb for + // TmpBB, but the math is more complicated. + + auto NewTrueProb = TProb / 2; + auto NewFalseProb = TProb / 2 + FProb; + // Emit the LHS condition. + findMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb, + NewFalseProb, InvertCond); + + // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). + SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); + // Emit the RHS condition into TmpBB. + findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], + Probs[1], InvertCond); + } else { + assert(Opc == Instruction::And && "Unknown merge op!"); + // Codegen X & Y as: + // BB1: + // jmp_if_X TmpBB + // jmp FBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + // This requires creation of TmpBB after CurBB. + + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) + // = FalseProb for original BB. + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to + // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 == + // TrueProb for BB1 * FalseProb for TmpBB. + + auto NewTrueProb = TProb + FProb / 2; + auto NewFalseProb = FProb / 2; + // Emit the LHS condition. + findMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb, + NewFalseProb, InvertCond); + + // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). + SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); + // Emit the RHS condition into TmpBB. + findMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0], + Probs[1], InvertCond); + } +} + +bool IRTranslator::shouldEmitAsBranches( + const std::vector<SwitchCG::CaseBlock> &Cases) { + // For multiple cases, it's better to emit as branches. + if (Cases.size() != 2) + return true; + + // If this is two comparisons of the same values or'd or and'd together, they + // will get folded into a single comparison, so don't emit two blocks. + if ((Cases[0].CmpLHS == Cases[1].CmpLHS && + Cases[0].CmpRHS == Cases[1].CmpRHS) || + (Cases[0].CmpRHS == Cases[1].CmpLHS && + Cases[0].CmpLHS == Cases[1].CmpRHS)) { + return false; + } + + // Handle: (X != null) | (Y != null) --> (X|Y) != 0 + // Handle: (X == null) & (Y == null) --> (X|Y) == 0 + if (Cases[0].CmpRHS == Cases[1].CmpRHS && + Cases[0].PredInfo.Pred == Cases[1].PredInfo.Pred && + isa<Constant>(Cases[0].CmpRHS) && + cast<Constant>(Cases[0].CmpRHS)->isNullValue()) { + if (Cases[0].PredInfo.Pred == CmpInst::ICMP_EQ && + Cases[0].TrueBB == Cases[1].ThisBB) + return false; + if (Cases[0].PredInfo.Pred == CmpInst::ICMP_NE && + Cases[0].FalseBB == Cases[1].ThisBB) + return false; + } + + return true; } bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { const BranchInst &BrInst = cast<BranchInst>(U); - unsigned Succ = 0; - if (!BrInst.isUnconditional()) { - // We want a G_BRCOND to the true BB followed by an unconditional branch. - Register Tst = getOrCreateVReg(*BrInst.getCondition()); - const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++)); - MachineBasicBlock &TrueBB = getMBB(TrueTgt); - MIRBuilder.buildBrCond(Tst, TrueBB); + auto &CurMBB = MIRBuilder.getMBB(); + auto *Succ0MBB = &getMBB(*BrInst.getSuccessor(0)); + + if (BrInst.isUnconditional()) { + // If the unconditional target is the layout successor, fallthrough. + if (!CurMBB.isLayoutSuccessor(Succ0MBB)) + MIRBuilder.buildBr(*Succ0MBB); + + // Link successors. + for (const BasicBlock *Succ : successors(&BrInst)) + CurMBB.addSuccessor(&getMBB(*Succ)); + return true; } - const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ)); - MachineBasicBlock &TgtBB = getMBB(BrTgt); - MachineBasicBlock &CurBB = MIRBuilder.getMBB(); + // If this condition is one of the special cases we handle, do special stuff + // now. + const Value *CondVal = BrInst.getCondition(); + MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1)); - // If the unconditional target is the layout successor, fallthrough. - if (!CurBB.isLayoutSuccessor(&TgtBB)) - MIRBuilder.buildBr(TgtBB); + const auto &TLI = *MF->getSubtarget().getTargetLowering(); - // Link successors. - for (const BasicBlock *Succ : successors(&BrInst)) - CurBB.addSuccessor(&getMBB(*Succ)); + // If this is a series of conditions that are or'd or and'd together, emit + // this as a sequence of branches instead of setcc's with and/or operations. + // As long as jumps are not expensive (exceptions for multi-use logic ops, + // unpredictable branches, and vector extracts because those jumps are likely + // expensive for any target), this should improve performance. + // For example, instead of something like: + // cmp A, B + // C = seteq + // cmp D, E + // F = setle + // or C, F + // jnz foo + // Emit: + // cmp A, B + // je foo + // cmp D, E + // jle foo + using namespace PatternMatch; + const Instruction *CondI = dyn_cast<Instruction>(CondVal); + if (!TLI.isJumpExpensive() && CondI && CondI->hasOneUse() && + !BrInst.hasMetadata(LLVMContext::MD_unpredictable)) { + Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0; + Value *Vec; + const Value *BOp0, *BOp1; + if (match(CondI, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1)))) + Opcode = Instruction::And; + else if (match(CondI, m_LogicalOr(m_Value(BOp0), m_Value(BOp1)))) + Opcode = Instruction::Or; + + if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && + match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) { + findMergedConditions(CondI, Succ0MBB, Succ1MBB, &CurMBB, &CurMBB, Opcode, + getEdgeProbability(&CurMBB, Succ0MBB), + getEdgeProbability(&CurMBB, Succ1MBB), + /*InvertCond=*/false); + assert(SL->SwitchCases[0].ThisBB == &CurMBB && "Unexpected lowering!"); + + // Allow some cases to be rejected. + if (shouldEmitAsBranches(SL->SwitchCases)) { + // Emit the branch for this block. + emitSwitchCase(SL->SwitchCases[0], &CurMBB, *CurBuilder); + SL->SwitchCases.erase(SL->SwitchCases.begin()); + return true; + } + + // Okay, we decided not to do this, remove any inserted MBB's and clear + // SwitchCases. + for (unsigned I = 1, E = SL->SwitchCases.size(); I != E; ++I) + MF->erase(SL->SwitchCases[I].ThisBB); + + SL->SwitchCases.clear(); + } + } + + // Create a CaseBlock record representing this branch. + SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal, + ConstantInt::getTrue(MF->getFunction().getContext()), + nullptr, Succ0MBB, Succ1MBB, &CurMBB, + CurBuilder->getDebugLoc()); + + // Use emitSwitchCase to actually insert the fast branch sequence for this + // cond branch. + emitSwitchCase(CB, &CurMBB, *CurBuilder); return true; } @@ -457,6 +715,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) { } SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr); + SL->findBitTestClusters(Clusters, &SI); LLVM_DEBUG({ dbgs() << "Case clusters: "; @@ -577,8 +836,23 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, const LLT i1Ty = LLT::scalar(1); // Build the compare. if (!CB.CmpMHS) { - Register CondRHS = getOrCreateVReg(*CB.CmpRHS); - Cond = MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); + const auto *CI = dyn_cast<ConstantInt>(CB.CmpRHS); + // For conditional branch lowering, we might try to do something silly like + // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so, + // just re-use the existing condition vreg. + if (CI && CI->getZExtValue() == 1 && + MRI->getType(CondLHS).getSizeInBits() == 1 && + CB.PredInfo.Pred == CmpInst::ICMP_EQ) { + Cond = CondLHS; + } else { + Register CondRHS = getOrCreateVReg(*CB.CmpRHS); + if (CmpInst::isFPPredicate(CB.PredInfo.Pred)) + Cond = + MIB.buildFCmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); + else + Cond = + MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); + } } else { assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE && "Can only handle SLE ranges"); @@ -611,17 +885,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb); CB.ThisBB->normalizeSuccProbs(); - // if (SwitchBB->getBasicBlock() != CB.FalseBB->getBasicBlock()) - addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()}, - CB.ThisBB); - - // If the lhs block is the next block, invert the condition so that we can - // fall through to the lhs instead of the rhs block. - if (CB.TrueBB == CB.ThisBB->getNextNode()) { - std::swap(CB.TrueBB, CB.FalseBB); - auto True = MIB.buildConstant(i1Ty, 1); - Cond = MIB.buildXor(i1Ty, Cond, True).getReg(0); - } + addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()}, + CB.ThisBB); MIB.buildBrCond(Cond, *CB.TrueBB); MIB.buildBr(*CB.FalseBB); @@ -734,6 +999,156 @@ bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, return true; } +void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B, + MachineBasicBlock *SwitchBB) { + MachineIRBuilder &MIB = *CurBuilder; + MIB.setMBB(*SwitchBB); + + // Subtract the minimum value. + Register SwitchOpReg = getOrCreateVReg(*B.SValue); + + LLT SwitchOpTy = MRI->getType(SwitchOpReg); + Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0); + auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg); + + // Ensure that the type will fit the mask value. + LLT MaskTy = SwitchOpTy; + for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) { + if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) { + // Switch table case range are encoded into series of masks. + // Just use pointer type, it's guaranteed to fit. + MaskTy = LLT::scalar(64); + break; + } + } + Register SubReg = RangeSub.getReg(0); + if (SwitchOpTy != MaskTy) + SubReg = MIB.buildZExtOrTrunc(MaskTy, SubReg).getReg(0); + + B.RegVT = getMVTForLLT(MaskTy); + B.Reg = SubReg; + + MachineBasicBlock *MBB = B.Cases[0].ThisBB; + + if (!B.OmitRangeCheck) + addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); + addSuccessorWithProb(SwitchBB, MBB, B.Prob); + + SwitchBB->normalizeSuccProbs(); + + if (!B.OmitRangeCheck) { + // Conditional branch to the default block. + auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range); + auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1), + RangeSub, RangeCst); + MIB.buildBrCond(RangeCmp, *B.Default); + } + + // Avoid emitting unnecessary branches to the next block. + if (MBB != SwitchBB->getNextNode()) + MIB.buildBr(*MBB); +} + +void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB, + MachineBasicBlock *NextMBB, + BranchProbability BranchProbToNext, + Register Reg, SwitchCG::BitTestCase &B, + MachineBasicBlock *SwitchBB) { + MachineIRBuilder &MIB = *CurBuilder; + MIB.setMBB(*SwitchBB); + + LLT SwitchTy = getLLTForMVT(BB.RegVT); + Register Cmp; + unsigned PopCount = countPopulation(B.Mask); + if (PopCount == 1) { + // Testing for a single bit; just compare the shift count with what it + // would need to be to shift a 1 bit in that position. + auto MaskTrailingZeros = + MIB.buildConstant(SwitchTy, countTrailingZeros(B.Mask)); + Cmp = + MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros) + .getReg(0); + } else if (PopCount == BB.Range) { + // There is only one zero bit in the range, test for it directly. + auto MaskTrailingOnes = + MIB.buildConstant(SwitchTy, countTrailingOnes(B.Mask)); + Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes) + .getReg(0); + } else { + // Make desired shift. + auto CstOne = MIB.buildConstant(SwitchTy, 1); + auto SwitchVal = MIB.buildShl(SwitchTy, CstOne, Reg); + + // Emit bit tests and jumps. + auto CstMask = MIB.buildConstant(SwitchTy, B.Mask); + auto AndOp = MIB.buildAnd(SwitchTy, SwitchVal, CstMask); + auto CstZero = MIB.buildConstant(SwitchTy, 0); + Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), AndOp, CstZero) + .getReg(0); + } + + // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb. + addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb); + // The branch probability from SwitchBB to NextMBB is BranchProbToNext. + addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext); + // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is + // one as they are relative probabilities (and thus work more like weights), + // and hence we need to normalize them to let the sum of them become one. + SwitchBB->normalizeSuccProbs(); + + // Record the fact that the IR edge from the header to the bit test target + // will go through our new block. Neeeded for PHIs to have nodes added. + addMachineCFGPred({BB.Parent->getBasicBlock(), B.TargetBB->getBasicBlock()}, + SwitchBB); + + MIB.buildBrCond(Cmp, *B.TargetBB); + + // Avoid emitting unnecessary branches to the next block. + if (NextMBB != SwitchBB->getNextNode()) + MIB.buildBr(*NextMBB); +} + +bool IRTranslator::lowerBitTestWorkItem( + SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB, + MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB, + MachineIRBuilder &MIB, MachineFunction::iterator BBI, + BranchProbability DefaultProb, BranchProbability UnhandledProbs, + SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough, + bool FallthroughUnreachable) { + using namespace SwitchCG; + MachineFunction *CurMF = SwitchMBB->getParent(); + // FIXME: Optimize away range check based on pivot comparisons. + BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex]; + // The bit test blocks haven't been inserted yet; insert them here. + for (BitTestCase &BTC : BTB->Cases) + CurMF->insert(BBI, BTC.ThisBB); + + // Fill in fields of the BitTestBlock. + BTB->Parent = CurMBB; + BTB->Default = Fallthrough; + + BTB->DefaultProb = UnhandledProbs; + // If the cases in bit test don't form a contiguous range, we evenly + // distribute the probability on the edge to Fallthrough to two + // successors of CurMBB. + if (!BTB->ContiguousRange) { + BTB->Prob += DefaultProb / 2; + BTB->DefaultProb -= DefaultProb / 2; + } + + if (FallthroughUnreachable) { + // Skip the range check if the fallthrough block is unreachable. + BTB->OmitRangeCheck = true; + } + + // If we're in the right place, emit the bit test header right now. + if (CurMBB == SwitchMBB) { + emitBitTestHeader(*BTB, SwitchMBB); + BTB->Emitted = true; + } + return true; +} + bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W, Value *Cond, MachineBasicBlock *SwitchMBB, @@ -794,9 +1209,15 @@ bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W, switch (I->Kind) { case CC_BitTests: { - LLVM_DEBUG(dbgs() << "Switch to bit test optimization unimplemented"); - return false; // Bit tests currently unimplemented. + if (!lowerBitTestWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI, + DefaultProb, UnhandledProbs, I, Fallthrough, + FallthroughUnreachable)) { + LLVM_DEBUG(dbgs() << "Failed to lower bit test for switch"); + return false; + } + break; } + case CC_JumpTable: { if (!lowerJumpTableWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI, UnhandledProbs, I, Fallthrough, @@ -1137,16 +1558,33 @@ bool IRTranslator::translateGetElementPtr(const User &U, bool IRTranslator::translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, - Intrinsic::ID ID) { + unsigned Opcode) { // If the source is undef, then just emit a nop. if (isa<UndefValue>(CI.getArgOperand(1))) return true; - ArrayRef<Register> Res; - auto ICall = MIRBuilder.buildIntrinsic(ID, Res, true); - for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) - ICall.addUse(getOrCreateVReg(**AI)); + SmallVector<Register, 3> SrcRegs; + + unsigned MinPtrSize = UINT_MAX; + for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) { + Register SrcReg = getOrCreateVReg(**AI); + LLT SrcTy = MRI->getType(SrcReg); + if (SrcTy.isPointer()) + MinPtrSize = std::min(SrcTy.getSizeInBits(), MinPtrSize); + SrcRegs.push_back(SrcReg); + } + + LLT SizeTy = LLT::scalar(MinPtrSize); + + // The size operand should be the minimum of the pointer sizes. + Register &SizeOpReg = SrcRegs[SrcRegs.size() - 1]; + if (MRI->getType(SizeOpReg) != SizeTy) + SizeOpReg = MIRBuilder.buildZExtOrTrunc(SizeTy, SizeOpReg).getReg(0); + + auto ICall = MIRBuilder.buildInstr(Opcode); + for (Register SrcReg : SrcRegs) + ICall.addUse(SrcReg); Align DstAlign; Align SrcAlign; @@ -1175,7 +1613,7 @@ bool IRTranslator::translateMemFunc(const CallInst &CI, ICall.addMemOperand(MF->getMachineMemOperand( MachinePointerInfo(CI.getArgOperand(0)), MachineMemOperand::MOStore | VolFlag, 1, DstAlign)); - if (ID != Intrinsic::memset) + if (Opcode != TargetOpcode::G_MEMSET) ICall.addMemOperand(MF->getMachineMemOperand( MachinePointerInfo(CI.getArgOperand(1)), MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign)); @@ -1214,6 +1652,16 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op, return true; } +bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI, + MachineIRBuilder &MIRBuilder) { + Register Dst = getOrCreateVReg(CI); + Register Src0 = getOrCreateVReg(*CI.getOperand(0)); + Register Src1 = getOrCreateVReg(*CI.getOperand(1)); + uint64_t Scale = cast<ConstantInt>(CI.getOperand(2))->getZExtValue(); + MIRBuilder.buildInstr(Op, {Dst}, { Src0, Src1, Scale }); + return true; +} + unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { switch (ID) { default: @@ -1264,10 +1712,14 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_FNEARBYINT; case Intrinsic::pow: return TargetOpcode::G_FPOW; + case Intrinsic::powi: + return TargetOpcode::G_FPOWI; case Intrinsic::rint: return TargetOpcode::G_FRINT; case Intrinsic::round: return TargetOpcode::G_INTRINSIC_ROUND; + case Intrinsic::roundeven: + return TargetOpcode::G_INTRINSIC_ROUNDEVEN; case Intrinsic::sin: return TargetOpcode::G_FSIN; case Intrinsic::sqrt: @@ -1278,6 +1730,31 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { return TargetOpcode::G_READCYCLECOUNTER; case Intrinsic::ptrmask: return TargetOpcode::G_PTRMASK; + case Intrinsic::lrint: + return TargetOpcode::G_INTRINSIC_LRINT; + // FADD/FMUL require checking the FMF, so are handled elsewhere. + case Intrinsic::vector_reduce_fmin: + return TargetOpcode::G_VECREDUCE_FMIN; + case Intrinsic::vector_reduce_fmax: + return TargetOpcode::G_VECREDUCE_FMAX; + case Intrinsic::vector_reduce_add: + return TargetOpcode::G_VECREDUCE_ADD; + case Intrinsic::vector_reduce_mul: + return TargetOpcode::G_VECREDUCE_MUL; + case Intrinsic::vector_reduce_and: + return TargetOpcode::G_VECREDUCE_AND; + case Intrinsic::vector_reduce_or: + return TargetOpcode::G_VECREDUCE_OR; + case Intrinsic::vector_reduce_xor: + return TargetOpcode::G_VECREDUCE_XOR; + case Intrinsic::vector_reduce_smax: + return TargetOpcode::G_VECREDUCE_SMAX; + case Intrinsic::vector_reduce_smin: + return TargetOpcode::G_VECREDUCE_SMIN; + case Intrinsic::vector_reduce_umax: + return TargetOpcode::G_VECREDUCE_UMAX; + case Intrinsic::vector_reduce_umin: + return TargetOpcode::G_VECREDUCE_UMIN; } return Intrinsic::not_intrinsic; } @@ -1370,7 +1847,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, // Get the underlying objects for the location passed on the lifetime // marker. SmallVector<const Value *, 4> Allocas; - GetUnderlyingObjects(CI.getArgOperand(1), Allocas, *DL); + getUnderlyingObjects(CI.getArgOperand(1), Allocas); // Iterate over each underlying object, creating lifetime markers for each // static alloca. Quit if we find a non-static alloca. @@ -1484,6 +1961,37 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return translateBinaryOp(TargetOpcode::G_USUBSAT, CI, MIRBuilder); case Intrinsic::ssub_sat: return translateBinaryOp(TargetOpcode::G_SSUBSAT, CI, MIRBuilder); + case Intrinsic::ushl_sat: + return translateBinaryOp(TargetOpcode::G_USHLSAT, CI, MIRBuilder); + case Intrinsic::sshl_sat: + return translateBinaryOp(TargetOpcode::G_SSHLSAT, CI, MIRBuilder); + case Intrinsic::umin: + return translateBinaryOp(TargetOpcode::G_UMIN, CI, MIRBuilder); + case Intrinsic::umax: + return translateBinaryOp(TargetOpcode::G_UMAX, CI, MIRBuilder); + case Intrinsic::smin: + return translateBinaryOp(TargetOpcode::G_SMIN, CI, MIRBuilder); + case Intrinsic::smax: + return translateBinaryOp(TargetOpcode::G_SMAX, CI, MIRBuilder); + case Intrinsic::abs: + // TODO: Preserve "int min is poison" arg in GMIR? + return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder); + case Intrinsic::smul_fix: + return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder); + case Intrinsic::umul_fix: + return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder); + case Intrinsic::smul_fix_sat: + return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder); + case Intrinsic::umul_fix_sat: + return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder); + case Intrinsic::sdiv_fix: + return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, MIRBuilder); + case Intrinsic::udiv_fix: + return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, MIRBuilder); + case Intrinsic::sdiv_fix_sat: + return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder); + case Intrinsic::udiv_fix_sat: + return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder); case Intrinsic::fmuladd: { const TargetMachine &TM = MF->getTarget(); const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); @@ -1507,10 +2015,24 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, } return true; } + case Intrinsic::convert_from_fp16: + // FIXME: This intrinsic should probably be removed from the IR. + MIRBuilder.buildFPExt(getOrCreateVReg(CI), + getOrCreateVReg(*CI.getArgOperand(0)), + MachineInstr::copyFlagsFromInstruction(CI)); + return true; + case Intrinsic::convert_to_fp16: + // FIXME: This intrinsic should probably be removed from the IR. + MIRBuilder.buildFPTrunc(getOrCreateVReg(CI), + getOrCreateVReg(*CI.getArgOperand(0)), + MachineInstr::copyFlagsFromInstruction(CI)); + return true; case Intrinsic::memcpy: + return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY); case Intrinsic::memmove: + return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMMOVE); case Intrinsic::memset: - return translateMemFunc(CI, MIRBuilder, ID); + return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMSET); case Intrinsic::eh_typeid_for: { GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0)); Register Reg = getOrCreateVReg(CI); @@ -1593,7 +2115,18 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, } case Intrinsic::invariant_end: return true; + case Intrinsic::expect: + case Intrinsic::annotation: + case Intrinsic::ptr_annotation: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: { + // Drop the intrinsic, but forward the value. + MIRBuilder.buildCopy(getOrCreateVReg(CI), + getOrCreateVReg(*CI.getArgOperand(0))); + return true; + } case Intrinsic::assume: + case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::var_annotation: case Intrinsic::sideeffect: // Discard annotate attributes, assumptions, and artificial side-effects. @@ -1613,6 +2146,68 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, .addUse(getOrCreateVReg(*CI.getArgOperand(1))); return true; } + case Intrinsic::localescape: { + MachineBasicBlock &EntryMBB = MF->front(); + StringRef EscapedName = GlobalValue::dropLLVMManglingEscape(MF->getName()); + + // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission + // is the same on all targets. + for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) { + Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts(); + if (isa<ConstantPointerNull>(Arg)) + continue; // Skip null pointers. They represent a hole in index space. + + int FI = getOrCreateFrameIndex(*cast<AllocaInst>(Arg)); + MCSymbol *FrameAllocSym = + MF->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName, + Idx); + + // This should be inserted at the start of the entry block. + auto LocalEscape = + MIRBuilder.buildInstrNoInsert(TargetOpcode::LOCAL_ESCAPE) + .addSym(FrameAllocSym) + .addFrameIndex(FI); + + EntryMBB.insert(EntryMBB.begin(), LocalEscape); + } + + return true; + } + case Intrinsic::vector_reduce_fadd: + case Intrinsic::vector_reduce_fmul: { + // Need to check for the reassoc flag to decide whether we want a + // sequential reduction opcode or not. + Register Dst = getOrCreateVReg(CI); + Register ScalarSrc = getOrCreateVReg(*CI.getArgOperand(0)); + Register VecSrc = getOrCreateVReg(*CI.getArgOperand(1)); + unsigned Opc = 0; + if (!CI.hasAllowReassoc()) { + // The sequential ordering case. + Opc = ID == Intrinsic::vector_reduce_fadd + ? TargetOpcode::G_VECREDUCE_SEQ_FADD + : TargetOpcode::G_VECREDUCE_SEQ_FMUL; + MIRBuilder.buildInstr(Opc, {Dst}, {ScalarSrc, VecSrc}, + MachineInstr::copyFlagsFromInstruction(CI)); + return true; + } + // We split the operation into a separate G_FADD/G_FMUL + the reduce, + // since the associativity doesn't matter. + unsigned ScalarOpc; + if (ID == Intrinsic::vector_reduce_fadd) { + Opc = TargetOpcode::G_VECREDUCE_FADD; + ScalarOpc = TargetOpcode::G_FADD; + } else { + Opc = TargetOpcode::G_VECREDUCE_FMUL; + ScalarOpc = TargetOpcode::G_FMUL; + } + LLT DstTy = MRI->getType(Dst); + auto Rdx = MIRBuilder.buildInstr( + Opc, {DstTy}, {VecSrc}, MachineInstr::copyFlagsFromInstruction(CI)); + MIRBuilder.buildInstr(ScalarOpc, {Dst}, {ScalarSrc, Rdx}, + MachineInstr::copyFlagsFromInstruction(CI)); + + return true; + } #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" @@ -1722,10 +2317,6 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { MIB->copyIRFlags(CI); for (auto &Arg : enumerate(CI.arg_operands())) { - // Some intrinsics take metadata parameters. Reject them. - if (isa<MetadataAsValue>(Arg.value())) - return false; - // If this is required to be an immediate, don't materialize it in a // register. if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) { @@ -1738,6 +2329,11 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { } else { MIB.addFPImm(cast<ConstantFP>(Arg.value())); } + } else if (auto MD = dyn_cast<MetadataAsValue>(Arg.value())) { + auto *MDN = dyn_cast<MDNode>(MD->getMetadata()); + if (!MDN) // This was probably an MDString. + return false; + MIB.addMetadata(MDN); } else { ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value()); if (VRegs.size() > 1) @@ -1762,6 +2358,62 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { return true; } +bool IRTranslator::findUnwindDestinations( + const BasicBlock *EHPadBB, + BranchProbability Prob, + SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>> + &UnwindDests) { + EHPersonality Personality = classifyEHPersonality( + EHPadBB->getParent()->getFunction().getPersonalityFn()); + bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; + bool IsCoreCLR = Personality == EHPersonality::CoreCLR; + bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX; + bool IsSEH = isAsynchronousEHPersonality(Personality); + + if (IsWasmCXX) { + // Ignore this for now. + return false; + } + + while (EHPadBB) { + const Instruction *Pad = EHPadBB->getFirstNonPHI(); + BasicBlock *NewEHPadBB = nullptr; + if (isa<LandingPadInst>(Pad)) { + // Stop on landingpads. They are not funclets. + UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob); + break; + } + if (isa<CleanupPadInst>(Pad)) { + // Stop on cleanup pads. Cleanups are always funclet entries for all known + // personalities. + UnwindDests.emplace_back(&getMBB(*EHPadBB), Prob); + UnwindDests.back().first->setIsEHScopeEntry(); + UnwindDests.back().first->setIsEHFuncletEntry(); + break; + } + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { + // Add the catchpad handlers to the possible destinations. + for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { + UnwindDests.emplace_back(&getMBB(*CatchPadBB), Prob); + // For MSVC++ and the CLR, catchblocks are funclets and need prologues. + if (IsMSVCCXX || IsCoreCLR) + UnwindDests.back().first->setIsEHFuncletEntry(); + if (!IsSEH) + UnwindDests.back().first->setIsEHScopeEntry(); + } + NewEHPadBB = CatchSwitch->getUnwindDest(); + } else { + continue; + } + + BranchProbabilityInfo *BPI = FuncInfo.BPI; + if (BPI && NewEHPadBB) + Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB); + EHPadBB = NewEHPadBB; + } + return true; +} + bool IRTranslator::translateInvoke(const User &U, MachineIRBuilder &MIRBuilder) { const InvokeInst &I = cast<InvokeInst>(U); @@ -1787,7 +2439,7 @@ bool IRTranslator::translateInvoke(const User &U, return false; // FIXME: support Windows exception handling. - if (!isa<LandingPadInst>(EHPadBB->front())) + if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI())) return false; // Emit the actual call, bracketed by EH_LABELs so that the MF knows about @@ -1801,14 +2453,28 @@ bool IRTranslator::translateInvoke(const User &U, MCSymbol *EndSymbol = Context.createTempSymbol(); MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol); - // FIXME: track probabilities. + SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests; + BranchProbabilityInfo *BPI = FuncInfo.BPI; + MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB(); + BranchProbability EHPadBBProb = + BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB) + : BranchProbability::getZero(); + + if (!findUnwindDestinations(EHPadBB, EHPadBBProb, UnwindDests)) + return false; + MachineBasicBlock &EHPadMBB = getMBB(*EHPadBB), &ReturnMBB = getMBB(*ReturnBB); + // Update successor info. + addSuccessorWithProb(InvokeMBB, &ReturnMBB); + for (auto &UnwindDest : UnwindDests) { + UnwindDest.first->setIsEHPad(); + addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second); + } + InvokeMBB->normalizeSuccProbs(); + MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol); - MIRBuilder.getMBB().addSuccessor(&ReturnMBB); - MIRBuilder.getMBB().addSuccessor(&EHPadMBB); MIRBuilder.buildBr(ReturnMBB); - return true; } @@ -1846,6 +2512,12 @@ bool IRTranslator::translateLandingPad(const User &U, MIRBuilder.buildInstr(TargetOpcode::EH_LABEL) .addSym(MF->addLandingPad(&MBB)); + // If the unwinder does not preserve all registers, ensure that the + // function marks the clobbered registers as used. + const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); + if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF)) + MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask); + LLT Ty = getLLTForType(*LP.getType(), *DL); Register Undef = MRI->createGenericVirtualRegister(Ty); MIRBuilder.buildUndef(Undef); @@ -2184,8 +2856,8 @@ bool IRTranslator::translate(const Instruction &Inst) { // We only emit constants into the entry block from here. To prevent jumpy // debug behaviour set the line to 0. if (const DebugLoc &DL = Inst.getDebugLoc()) - EntryBuilder->setDebugLoc( - DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt())); + EntryBuilder->setDebugLoc(DILocation::get( + Inst.getContext(), 0, 0, DL.getScope(), DL.getInlinedAt())); else EntryBuilder->setDebugLoc(DebugLoc()); @@ -2263,6 +2935,57 @@ bool IRTranslator::translate(const Constant &C, Register Reg) { } void IRTranslator::finalizeBasicBlock() { + for (auto &BTB : SL->BitTestCases) { + // Emit header first, if it wasn't already emitted. + if (!BTB.Emitted) + emitBitTestHeader(BTB, BTB.Parent); + + BranchProbability UnhandledProb = BTB.Prob; + for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) { + UnhandledProb -= BTB.Cases[j].ExtraProb; + // Set the current basic block to the mbb we wish to insert the code into + MachineBasicBlock *MBB = BTB.Cases[j].ThisBB; + // If all cases cover a contiguous range, it is not necessary to jump to + // the default block after the last bit test fails. This is because the + // range check during bit test header creation has guaranteed that every + // case here doesn't go outside the range. In this case, there is no need + // to perform the last bit test, as it will always be true. Instead, make + // the second-to-last bit-test fall through to the target of the last bit + // test, and delete the last bit test. + + MachineBasicBlock *NextMBB; + if (BTB.ContiguousRange && j + 2 == ej) { + // Second-to-last bit-test with contiguous range: fall through to the + // target of the final bit test. + NextMBB = BTB.Cases[j + 1].TargetBB; + } else if (j + 1 == ej) { + // For the last bit test, fall through to Default. + NextMBB = BTB.Default; + } else { + // Otherwise, fall through to the next bit test. + NextMBB = BTB.Cases[j + 1].ThisBB; + } + + emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB); + + // FIXME delete this block below? + if (BTB.ContiguousRange && j + 2 == ej) { + // Since we're not going to use the final bit test, remove it. + BTB.Cases.pop_back(); + break; + } + } + // This is "default" BB. We have two jumps to it. From "header" BB and from + // last "case" BB, unless the latter was skipped. + CFGEdge HeaderToDefaultEdge = {BTB.Parent->getBasicBlock(), + BTB.Default->getBasicBlock()}; + addMachineCFGPred(HeaderToDefaultEdge, BTB.Parent); + if (!BTB.ContiguousRange) { + addMachineCFGPred(HeaderToDefaultEdge, BTB.Cases.back().ThisBB); + } + } + SL->BitTestCases.clear(); + for (auto &JTCase : SL->JTCases) { // Emit header first, if it wasn't already emitted. if (!JTCase.first.Emitted) @@ -2271,6 +2994,10 @@ void IRTranslator::finalizeBasicBlock() { emitJumpTable(JTCase.second, JTCase.second.MBB); } SL->JTCases.clear(); + + for (auto &SwCase : SL->SwitchCases) + emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder); + SL->SwitchCases.clear(); } void IRTranslator::finalizeFunction() { @@ -2332,14 +3059,23 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { MRI = &MF->getRegInfo(); DL = &F.getParent()->getDataLayout(); ORE = std::make_unique<OptimizationRemarkEmitter>(&F); + const TargetMachine &TM = MF->getTarget(); + TM.resetTargetOptions(F); + EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F); FuncInfo.MF = MF; - FuncInfo.BPI = nullptr; + if (EnableOpts) + FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); + else + FuncInfo.BPI = nullptr; + + FuncInfo.CanLowerReturn = CLI->checkReturnTypeForCallConv(*MF); + const auto &TLI = *MF->getSubtarget().getTargetLowering(); - const TargetMachine &TM = MF->getTarget(); + SL = std::make_unique<GISelSwitchLowering>(this, FuncInfo); SL->init(TLI, TM, *DL); - EnableOpts = TM.getOptLevel() != CodeGenOpt::None && !skipFunction(F); + assert(PendingPHIs.empty() && "stale PHIs"); @@ -2407,7 +3143,7 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { } } - if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs)) { + if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs, FuncInfo)) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", F.getSubprogram(), &F.getEntryBlock()); R << "unable to lower arguments: " << ore::NV("Prototype", F.getType()); |
