diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 10:51:19 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 10:51:19 +0000 | 
| commit | eb11fae6d08f479c0799db45860a98af528fa6e7 (patch) | |
| tree | 44d492a50c8c1a7eb8e2d17ea3360ec4d066f042 /lib/CodeGen/GlobalISel/IRTranslator.cpp | |
| parent | b8a2042aa938069e862750553db0e4d82d25822c (diff) | |
Notes
Diffstat (limited to 'lib/CodeGen/GlobalISel/IRTranslator.cpp')
| -rw-r--r-- | lib/CodeGen/GlobalISel/IRTranslator.cpp | 623 | 
1 files changed, 472 insertions, 151 deletions
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 433f99b0113b..bafb7a05536d 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -26,6 +26,7 @@  #include "llvm/CodeGen/MachineMemOperand.h"  #include "llvm/CodeGen/MachineOperand.h"  #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackProtector.h"  #include "llvm/CodeGen/TargetFrameLowering.h"  #include "llvm/CodeGen/TargetLowering.h"  #include "llvm/CodeGen/TargetPassConfig.h" @@ -102,37 +103,103 @@ IRTranslator::IRTranslator() : MachineFunctionPass(ID) {  }  void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { +  AU.addRequired<StackProtector>();    AU.addRequired<TargetPassConfig>(); +  getSelectionDAGFallbackAnalysisUsage(AU);    MachineFunctionPass::getAnalysisUsage(AU);  } -unsigned IRTranslator::getOrCreateVReg(const Value &Val) { -  unsigned &ValReg = ValToVReg[&Val]; +static void computeValueLLTs(const DataLayout &DL, Type &Ty, +                             SmallVectorImpl<LLT> &ValueTys, +                             SmallVectorImpl<uint64_t> *Offsets = nullptr, +                             uint64_t StartingOffset = 0) { +  // Given a struct type, recursively traverse the elements. +  if (StructType *STy = dyn_cast<StructType>(&Ty)) { +    const StructLayout *SL = DL.getStructLayout(STy); +    for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I) +      computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets, +                       StartingOffset + SL->getElementOffset(I)); +    return; +  } +  // Given an array type, recursively traverse the elements. +  if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) { +    Type *EltTy = ATy->getElementType(); +    uint64_t EltSize = DL.getTypeAllocSize(EltTy); +    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) +      computeValueLLTs(DL, *EltTy, ValueTys, Offsets, +                       StartingOffset + i * EltSize); +    return; +  } +  // Interpret void as zero return values. +  if (Ty.isVoidTy()) +    return; +  // Base case: we can get an LLT for this LLVM IR type. +  ValueTys.push_back(getLLTForType(Ty, DL)); +  if (Offsets != nullptr) +    Offsets->push_back(StartingOffset * 8); +} + +IRTranslator::ValueToVRegInfo::VRegListT & +IRTranslator::allocateVRegs(const Value &Val) { +  assert(!VMap.contains(Val) && "Value already allocated in VMap"); +  auto *Regs = VMap.getVRegs(Val); +  auto *Offsets = VMap.getOffsets(Val); +  SmallVector<LLT, 4> SplitTys; +  computeValueLLTs(*DL, *Val.getType(), SplitTys, +                   Offsets->empty() ? Offsets : nullptr); +  for (unsigned i = 0; i < SplitTys.size(); ++i) +    Regs->push_back(0); +  return *Regs; +} + +ArrayRef<unsigned> IRTranslator::getOrCreateVRegs(const Value &Val) { +  auto VRegsIt = VMap.findVRegs(Val); +  if (VRegsIt != VMap.vregs_end()) +    return *VRegsIt->second; -  if (ValReg) -    return ValReg; +  if (Val.getType()->isVoidTy()) +    return *VMap.getVRegs(Val); + +  // Create entry for this type. +  auto *VRegs = VMap.getVRegs(Val); +  auto *Offsets = VMap.getOffsets(Val); -  // Fill ValRegsSequence with the sequence of registers -  // we need to concat together to produce the value.    assert(Val.getType()->isSized() &&           "Don't know how to create an empty vreg"); -  unsigned VReg = -      MRI->createGenericVirtualRegister(getLLTForType(*Val.getType(), *DL)); -  ValReg = VReg; -  if (auto CV = dyn_cast<Constant>(&Val)) { -    bool Success = translate(*CV, VReg); +  SmallVector<LLT, 4> SplitTys; +  computeValueLLTs(*DL, *Val.getType(), SplitTys, +                   Offsets->empty() ? Offsets : nullptr); + +  if (!isa<Constant>(Val)) { +    for (auto Ty : SplitTys) +      VRegs->push_back(MRI->createGenericVirtualRegister(Ty)); +    return *VRegs; +  } + +  if (Val.getType()->isAggregateType()) { +    // UndefValue, ConstantAggregateZero +    auto &C = cast<Constant>(Val); +    unsigned Idx = 0; +    while (auto Elt = C.getAggregateElement(Idx++)) { +      auto EltRegs = getOrCreateVRegs(*Elt); +      std::copy(EltRegs.begin(), EltRegs.end(), std::back_inserter(*VRegs)); +    } +  } else { +    assert(SplitTys.size() == 1 && "unexpectedly split LLT"); +    VRegs->push_back(MRI->createGenericVirtualRegister(SplitTys[0])); +    bool Success = translate(cast<Constant>(Val), VRegs->front());      if (!Success) {        OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",                                   MF->getFunction().getSubprogram(),                                   &MF->getFunction().getEntryBlock());        R << "unable to translate constant: " << ore::NV("Type", Val.getType());        reportTranslationError(*MF, *TPC, *ORE, R); -      return VReg; +      return *VRegs;      }    } -  return VReg; +  return *VRegs;  }  int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) { @@ -164,6 +231,20 @@ unsigned IRTranslator::getMemOpAlignment(const Instruction &I) {    } else if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {      Alignment = LI->getAlignment();      ValTy = LI->getType(); +  } else if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) { +    // TODO(PR27168): This instruction has no alignment attribute, but unlike +    // the default alignment for load/store, the default here is to assume +    // it has NATURAL alignment, not DataLayout-specified alignment. +    const DataLayout &DL = AI->getModule()->getDataLayout(); +    Alignment = DL.getTypeStoreSize(AI->getCompareOperand()->getType()); +    ValTy = AI->getCompareOperand()->getType(); +  } else if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) { +    // TODO(PR27168): This instruction has no alignment attribute, but unlike +    // the default alignment for load/store, the default here is to assume +    // it has NATURAL alignment, not DataLayout-specified alignment. +    const DataLayout &DL = AI->getModule()->getDataLayout(); +    Alignment = DL.getTypeStoreSize(AI->getValOperand()->getType()); +    ValTy = AI->getType();    } else {      OptimizationRemarkMissed R("gisel-irtranslator", "", &I);      R << "unable to translate memop: " << ore::NV("Opcode", &I); @@ -243,7 +324,11 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {    // The target may mess up with the insertion point, but    // this is not important as a return is the last instruction    // of the block anyway. -  return CLI->lowerReturn(MIRBuilder, Ret, !Ret ? 0 : getOrCreateVReg(*Ret)); + +  // FIXME: this interface should simplify when CallLowering gets adapted to +  // multiple VRegs per Value. +  unsigned VReg = Ret ? packRegs(*Ret, MIRBuilder) : 0; +  return CLI->lowerReturn(MIRBuilder, Ret, VReg);  }  bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { @@ -342,15 +427,23 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {    if (DL->getTypeStoreSize(LI.getType()) == 0)      return true; -  unsigned Res = getOrCreateVReg(LI); -  unsigned Addr = getOrCreateVReg(*LI.getPointerOperand()); +  ArrayRef<unsigned> Regs = getOrCreateVRegs(LI); +  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI); +  unsigned Base = getOrCreateVReg(*LI.getPointerOperand()); + +  for (unsigned i = 0; i < Regs.size(); ++i) { +    unsigned Addr = 0; +    MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8); + +    MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8); +    unsigned BaseAlign = getMemOpAlignment(LI); +    auto MMO = MF->getMachineMemOperand( +        Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8, +        MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr, +        LI.getSyncScopeID(), LI.getOrdering()); +    MIRBuilder.buildLoad(Regs[i], Addr, *MMO); +  } -  MIRBuilder.buildLoad( -      Res, Addr, -      *MF->getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()), -                                Flags, DL->getTypeStoreSize(LI.getType()), -                                getMemOpAlignment(LI), AAMDNodes(), nullptr, -                                LI.getSyncScopeID(), LI.getOrdering()));    return true;  } @@ -363,50 +456,61 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {    if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0)      return true; -  unsigned Val = getOrCreateVReg(*SI.getValueOperand()); -  unsigned Addr = getOrCreateVReg(*SI.getPointerOperand()); - -  MIRBuilder.buildStore( -      Val, Addr, -      *MF->getMachineMemOperand( -          MachinePointerInfo(SI.getPointerOperand()), Flags, -          DL->getTypeStoreSize(SI.getValueOperand()->getType()), -          getMemOpAlignment(SI), AAMDNodes(), nullptr, SI.getSyncScopeID(), -          SI.getOrdering())); +  ArrayRef<unsigned> Vals = getOrCreateVRegs(*SI.getValueOperand()); +  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand()); +  unsigned Base = getOrCreateVReg(*SI.getPointerOperand()); + +  for (unsigned i = 0; i < Vals.size(); ++i) { +    unsigned Addr = 0; +    MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8); + +    MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8); +    unsigned BaseAlign = getMemOpAlignment(SI); +    auto MMO = MF->getMachineMemOperand( +        Ptr, Flags, (MRI->getType(Vals[i]).getSizeInBits() + 7) / 8, +        MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr, +        SI.getSyncScopeID(), SI.getOrdering()); +    MIRBuilder.buildStore(Vals[i], Addr, *MMO); +  }    return true;  } -bool IRTranslator::translateExtractValue(const User &U, -                                         MachineIRBuilder &MIRBuilder) { +static uint64_t getOffsetFromIndices(const User &U, const DataLayout &DL) {    const Value *Src = U.getOperand(0);    Type *Int32Ty = Type::getInt32Ty(U.getContext()); -  SmallVector<Value *, 1> Indices; - -  // If Src is a single element ConstantStruct, translate extractvalue -  // to that element to avoid inserting a cast instruction. -  if (auto CS = dyn_cast<ConstantStruct>(Src)) -    if (CS->getNumOperands() == 1) { -      unsigned Res = getOrCreateVReg(*CS->getOperand(0)); -      ValToVReg[&U] = Res; -      return true; -    }    // getIndexedOffsetInType is designed for GEPs, so the first index is the    // usual array element rather than looking into the actual aggregate. +  SmallVector<Value *, 1> Indices;    Indices.push_back(ConstantInt::get(Int32Ty, 0));    if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&U)) {      for (auto Idx : EVI->indices())        Indices.push_back(ConstantInt::get(Int32Ty, Idx)); +  } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) { +    for (auto Idx : IVI->indices()) +      Indices.push_back(ConstantInt::get(Int32Ty, Idx));    } else {      for (unsigned i = 1; i < U.getNumOperands(); ++i)        Indices.push_back(U.getOperand(i));    } -  uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices); +  return 8 * static_cast<uint64_t>( +                 DL.getIndexedOffsetInType(Src->getType(), Indices)); +} -  unsigned Res = getOrCreateVReg(U); -  MIRBuilder.buildExtract(Res, getOrCreateVReg(*Src), Offset); +bool IRTranslator::translateExtractValue(const User &U, +                                         MachineIRBuilder &MIRBuilder) { +  const Value *Src = U.getOperand(0); +  uint64_t Offset = getOffsetFromIndices(U, *DL); +  ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src); +  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*Src); +  unsigned Idx = std::lower_bound(Offsets.begin(), Offsets.end(), Offset) - +                 Offsets.begin(); +  auto &DstRegs = allocateVRegs(U); + +  for (unsigned i = 0; i < DstRegs.size(); ++i) +    DstRegs[i] = SrcRegs[Idx++];    return true;  } @@ -414,37 +518,33 @@ bool IRTranslator::translateExtractValue(const User &U,  bool IRTranslator::translateInsertValue(const User &U,                                          MachineIRBuilder &MIRBuilder) {    const Value *Src = U.getOperand(0); -  Type *Int32Ty = Type::getInt32Ty(U.getContext()); -  SmallVector<Value *, 1> Indices; - -  // getIndexedOffsetInType is designed for GEPs, so the first index is the -  // usual array element rather than looking into the actual aggregate. -  Indices.push_back(ConstantInt::get(Int32Ty, 0)); - -  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) { -    for (auto Idx : IVI->indices()) -      Indices.push_back(ConstantInt::get(Int32Ty, Idx)); -  } else { -    for (unsigned i = 2; i < U.getNumOperands(); ++i) -      Indices.push_back(U.getOperand(i)); +  uint64_t Offset = getOffsetFromIndices(U, *DL); +  auto &DstRegs = allocateVRegs(U); +  ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U); +  ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src); +  ArrayRef<unsigned> InsertedRegs = getOrCreateVRegs(*U.getOperand(1)); +  auto InsertedIt = InsertedRegs.begin(); + +  for (unsigned i = 0; i < DstRegs.size(); ++i) { +    if (DstOffsets[i] >= Offset && InsertedIt != InsertedRegs.end()) +      DstRegs[i] = *InsertedIt++; +    else +      DstRegs[i] = SrcRegs[i];    } -  uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices); - -  unsigned Res = getOrCreateVReg(U); -  unsigned Inserted = getOrCreateVReg(*U.getOperand(1)); -  MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), Inserted, Offset); -    return true;  }  bool IRTranslator::translateSelect(const User &U,                                     MachineIRBuilder &MIRBuilder) { -  unsigned Res = getOrCreateVReg(U);    unsigned Tst = getOrCreateVReg(*U.getOperand(0)); -  unsigned Op0 = getOrCreateVReg(*U.getOperand(1)); -  unsigned Op1 = getOrCreateVReg(*U.getOperand(2)); -  MIRBuilder.buildSelect(Res, Tst, Op0, Op1); +  ArrayRef<unsigned> ResRegs = getOrCreateVRegs(U); +  ArrayRef<unsigned> Op0Regs = getOrCreateVRegs(*U.getOperand(1)); +  ArrayRef<unsigned> Op1Regs = getOrCreateVRegs(*U.getOperand(2)); + +  for (unsigned i = 0; i < ResRegs.size(); ++i) +    MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]); +    return true;  } @@ -453,15 +553,16 @@ bool IRTranslator::translateBitCast(const User &U,    // If we're bitcasting to the source type, we can reuse the source vreg.    if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==        getLLTForType(*U.getType(), *DL)) { -    // Get the source vreg now, to avoid invalidating ValToVReg.      unsigned SrcReg = getOrCreateVReg(*U.getOperand(0)); -    unsigned &Reg = ValToVReg[&U]; +    auto &Regs = *VMap.getVRegs(U);      // If we already assigned a vreg for this bitcast, we can't change that.      // Emit a copy to satisfy the users we already emitted. -    if (Reg) -      MIRBuilder.buildCopy(Reg, SrcReg); -    else -      Reg = SrcReg; +    if (!Regs.empty()) +      MIRBuilder.buildCopy(Regs[0], SrcReg); +    else { +      Regs.push_back(SrcReg); +      VMap.getOffsets(U)->push_back(0); +    }      return true;    }    return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder); @@ -516,10 +617,6 @@ bool IRTranslator::translateGetElementPtr(const User &U,          Offset = 0;        } -      // N = N + Idx * ElementSize; -      unsigned ElementSizeReg = -          getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize)); -        unsigned IdxReg = getOrCreateVReg(*Idx);        if (MRI->getType(IdxReg) != OffsetTy) {          unsigned NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy); @@ -527,11 +624,20 @@ bool IRTranslator::translateGetElementPtr(const User &U,          IdxReg = NewIdxReg;        } -      unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy); -      MIRBuilder.buildMul(OffsetReg, ElementSizeReg, IdxReg); +      // N = N + Idx * ElementSize; +      // Avoid doing it for ElementSize of 1. +      unsigned GepOffsetReg; +      if (ElementSize != 1) { +        unsigned ElementSizeReg = +            getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize)); + +        GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy); +        MIRBuilder.buildMul(GepOffsetReg, ElementSizeReg, IdxReg); +      } else +        GepOffsetReg = IdxReg;        unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); -      MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg); +      MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg);        BaseReg = NewBaseReg;      }    } @@ -607,14 +713,10 @@ void IRTranslator::getStackGuard(unsigned DstReg,  bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,                                                MachineIRBuilder &MIRBuilder) { -  LLT Ty = getLLTForType(*CI.getOperand(0)->getType(), *DL); -  LLT s1 = LLT::scalar(1); -  unsigned Width = Ty.getSizeInBits(); -  unsigned Res = MRI->createGenericVirtualRegister(Ty); -  unsigned Overflow = MRI->createGenericVirtualRegister(s1); +  ArrayRef<unsigned> ResRegs = getOrCreateVRegs(CI);    auto MIB = MIRBuilder.buildInstr(Op) -                 .addDef(Res) -                 .addDef(Overflow) +                 .addDef(ResRegs[0]) +                 .addDef(ResRegs[1])                   .addUse(getOrCreateVReg(*CI.getOperand(0)))                   .addUse(getOrCreateVReg(*CI.getOperand(1))); @@ -624,7 +726,6 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,      MIB.addUse(Zero);    } -  MIRBuilder.buildSequence(getOrCreateVReg(CI), {Res, Overflow}, {0, Width});    return true;  } @@ -647,7 +748,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,      const Value *Address = DI.getAddress();      if (!Address || isa<UndefValue>(Address)) { -      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); +      LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");        return true;      } @@ -741,6 +842,11 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,          .addDef(getOrCreateVReg(CI))          .addUse(getOrCreateVReg(*CI.getArgOperand(0)));      return true; +  case Intrinsic::fabs: +    MIRBuilder.buildInstr(TargetOpcode::G_FABS) +        .addDef(getOrCreateVReg(CI)) +        .addUse(getOrCreateVReg(*CI.getArgOperand(0))); +    return true;    case Intrinsic::fma:      MIRBuilder.buildInstr(TargetOpcode::G_FMA)          .addDef(getOrCreateVReg(CI)) @@ -748,6 +854,25 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,          .addUse(getOrCreateVReg(*CI.getArgOperand(1)))          .addUse(getOrCreateVReg(*CI.getArgOperand(2)));      return true; +  case Intrinsic::fmuladd: { +    const TargetMachine &TM = MF->getTarget(); +    const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering(); +    unsigned Dst = getOrCreateVReg(CI); +    unsigned Op0 = getOrCreateVReg(*CI.getArgOperand(0)); +    unsigned Op1 = getOrCreateVReg(*CI.getArgOperand(1)); +    unsigned Op2 = getOrCreateVReg(*CI.getArgOperand(2)); +    if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && +        TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) { +      // TODO: Revisit this to see if we should move this part of the +      // lowering to the combiner. +      MIRBuilder.buildInstr(TargetOpcode::G_FMA, Dst, Op0, Op1, Op2); +    } else { +      LLT Ty = getLLTForType(*CI.getType(), *DL); +      auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, Ty, Op0, Op1); +      MIRBuilder.buildInstr(TargetOpcode::G_FADD, Dst, FMul, Op2); +    } +    return true; +  }    case Intrinsic::memcpy:    case Intrinsic::memmove:    case Intrinsic::memset: @@ -807,36 +932,86 @@ bool IRTranslator::translateInlineAsm(const CallInst &CI,    return true;  } +unsigned IRTranslator::packRegs(const Value &V, +                                  MachineIRBuilder &MIRBuilder) { +  ArrayRef<unsigned> Regs = getOrCreateVRegs(V); +  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V); +  LLT BigTy = getLLTForType(*V.getType(), *DL); + +  if (Regs.size() == 1) +    return Regs[0]; + +  unsigned Dst = MRI->createGenericVirtualRegister(BigTy); +  MIRBuilder.buildUndef(Dst); +  for (unsigned i = 0; i < Regs.size(); ++i) { +    unsigned NewDst = MRI->createGenericVirtualRegister(BigTy); +    MIRBuilder.buildInsert(NewDst, Dst, Regs[i], Offsets[i]); +    Dst = NewDst; +  } +  return Dst; +} + +void IRTranslator::unpackRegs(const Value &V, unsigned Src, +                                MachineIRBuilder &MIRBuilder) { +  ArrayRef<unsigned> Regs = getOrCreateVRegs(V); +  ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V); + +  for (unsigned i = 0; i < Regs.size(); ++i) +    MIRBuilder.buildExtract(Regs[i], Src, Offsets[i]); +} +  bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {    const CallInst &CI = cast<CallInst>(U);    auto TII = MF->getTarget().getIntrinsicInfo();    const Function *F = CI.getCalledFunction(); +  // FIXME: support Windows dllimport function calls. +  if (F && F->hasDLLImportStorageClass()) +    return false; +    if (CI.isInlineAsm())      return translateInlineAsm(CI, MIRBuilder); -  if (!F || !F->isIntrinsic()) { -    unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI); +  Intrinsic::ID ID = Intrinsic::not_intrinsic; +  if (F && F->isIntrinsic()) { +    ID = F->getIntrinsicID(); +    if (TII && ID == Intrinsic::not_intrinsic) +      ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F)); +  } + +  bool IsSplitType = valueIsSplit(CI); +  if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) { +    unsigned Res = IsSplitType ? MRI->createGenericVirtualRegister( +                                     getLLTForType(*CI.getType(), *DL)) +                               : getOrCreateVReg(CI); +      SmallVector<unsigned, 8> Args;      for (auto &Arg: CI.arg_operands()) -      Args.push_back(getOrCreateVReg(*Arg)); +      Args.push_back(packRegs(*Arg, MIRBuilder));      MF->getFrameInfo().setHasCalls(true); -    return CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() { +    bool Success = CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() {        return getOrCreateVReg(*CI.getCalledValue());      }); -  } -  Intrinsic::ID ID = F->getIntrinsicID(); -  if (TII && ID == Intrinsic::not_intrinsic) -    ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F)); +    if (IsSplitType) +      unpackRegs(CI, Res, MIRBuilder); +    return Success; +  }    assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");    if (translateKnownIntrinsic(CI, ID, MIRBuilder))      return true; -  unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI); +  unsigned Res = 0; +  if (!CI.getType()->isVoidTy()) { +    if (IsSplitType) +      Res = +          MRI->createGenericVirtualRegister(getLLTForType(*CI.getType(), *DL)); +    else +      Res = getOrCreateVReg(CI); +  }    MachineInstrBuilder MIB =        MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory()); @@ -844,9 +1019,12 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {      // Some intrinsics take metadata parameters. Reject them.      if (isa<MetadataAsValue>(Arg))        return false; -    MIB.addUse(getOrCreateVReg(*Arg)); +    MIB.addUse(packRegs(*Arg, MIRBuilder));    } +  if (IsSplitType) +    unpackRegs(CI, Res, MIRBuilder); +    // Add a MachineMemOperand if it is a target mem intrinsic.    const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();    TargetLowering::IntrinsicInfo Info; @@ -890,15 +1068,18 @@ bool IRTranslator::translateInvoke(const User &U,    MCSymbol *BeginSymbol = Context.createTempSymbol();    MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol); -  unsigned Res = I.getType()->isVoidTy() ? 0 : getOrCreateVReg(I); +  unsigned Res = +        MRI->createGenericVirtualRegister(getLLTForType(*I.getType(), *DL));    SmallVector<unsigned, 8> Args;    for (auto &Arg: I.arg_operands()) -    Args.push_back(getOrCreateVReg(*Arg)); +    Args.push_back(packRegs(*Arg, MIRBuilder));    if (!CLI->lowerCall(MIRBuilder, &I, Res, Args,                        [&]() { return getOrCreateVReg(*I.getCalledValue()); }))      return false; +  unpackRegs(I, Res, MIRBuilder); +    MCSymbol *EndSymbol = Context.createTempSymbol();    MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol); @@ -957,27 +1138,18 @@ bool IRTranslator::translateLandingPad(const User &U,      return false;    MBB.addLiveIn(ExceptionReg); -  unsigned VReg = MRI->createGenericVirtualRegister(Tys[0]), -           Tmp = MRI->createGenericVirtualRegister(Ty); -  MIRBuilder.buildCopy(VReg, ExceptionReg); -  MIRBuilder.buildInsert(Tmp, Undef, VReg, 0); +  ArrayRef<unsigned> ResRegs = getOrCreateVRegs(LP); +  MIRBuilder.buildCopy(ResRegs[0], ExceptionReg);    unsigned SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);    if (!SelectorReg)      return false;    MBB.addLiveIn(SelectorReg); - -  // N.b. the exception selector register always has pointer type and may not -  // match the actual IR-level type in the landingpad so an extra cast is -  // needed.    unsigned PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);    MIRBuilder.buildCopy(PtrVReg, SelectorReg); +  MIRBuilder.buildCast(ResRegs[1], PtrVReg); -  VReg = MRI->createGenericVirtualRegister(Tys[1]); -  MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT).addDef(VReg).addUse(PtrVReg); -  MIRBuilder.buildInsert(getOrCreateVReg(LP), Tmp, VReg, -                         Tys[0].getSizeInBits());    return true;  } @@ -985,6 +1157,9 @@ bool IRTranslator::translateAlloca(const User &U,                                     MachineIRBuilder &MIRBuilder) {    auto &AI = cast<AllocaInst>(U); +  if (AI.isSwiftError()) +    return false; +    if (AI.isStaticAlloca()) {      unsigned Res = getOrCreateVReg(AI);      int FI = getOrCreateFrameIndex(AI); @@ -992,6 +1167,10 @@ bool IRTranslator::translateAlloca(const User &U,      return true;    } +  // FIXME: support stack probing for Windows. +  if (MF->getTarget().getTargetTriple().isOSWindows()) +    return false; +    // Now we're in the harder dynamic case.    Type *Ty = AI.getAllocatedType();    unsigned Align = @@ -1063,9 +1242,16 @@ bool IRTranslator::translateInsertElement(const User &U,    // not a legal vector type in LLT.    if (U.getType()->getVectorNumElements() == 1) {      unsigned Elt = getOrCreateVReg(*U.getOperand(1)); -    ValToVReg[&U] = Elt; +    auto &Regs = *VMap.getVRegs(U); +    if (Regs.empty()) { +      Regs.push_back(Elt); +      VMap.getOffsets(U)->push_back(0); +    } else { +      MIRBuilder.buildCopy(Regs[0], Elt); +    }      return true;    } +    unsigned Res = getOrCreateVReg(U);    unsigned Val = getOrCreateVReg(*U.getOperand(0));    unsigned Elt = getOrCreateVReg(*U.getOperand(1)); @@ -1080,7 +1266,13 @@ bool IRTranslator::translateExtractElement(const User &U,    // not a legal vector type in LLT.    if (U.getOperand(0)->getType()->getVectorNumElements() == 1) {      unsigned Elt = getOrCreateVReg(*U.getOperand(0)); -    ValToVReg[&U] = Elt; +    auto &Regs = *VMap.getVRegs(U); +    if (Regs.empty()) { +      Regs.push_back(Elt); +      VMap.getOffsets(U)->push_back(0); +    } else { +      MIRBuilder.buildCopy(Regs[0], Elt); +    }      return true;    }    unsigned Res = getOrCreateVReg(U); @@ -1102,17 +1294,115 @@ bool IRTranslator::translateShuffleVector(const User &U,  bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {    const PHINode &PI = cast<PHINode>(U); -  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI); -  MIB.addDef(getOrCreateVReg(PI)); -  PendingPHIs.emplace_back(&PI, MIB.getInstr()); +  SmallVector<MachineInstr *, 4> Insts; +  for (auto Reg : getOrCreateVRegs(PI)) { +    auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, Reg); +    Insts.push_back(MIB.getInstr()); +  } + +  PendingPHIs.emplace_back(&PI, std::move(Insts)); +  return true; +} + +bool IRTranslator::translateAtomicCmpXchg(const User &U, +                                          MachineIRBuilder &MIRBuilder) { +  const AtomicCmpXchgInst &I = cast<AtomicCmpXchgInst>(U); + +  if (I.isWeak()) +    return false; + +  auto Flags = I.isVolatile() ? MachineMemOperand::MOVolatile +                              : MachineMemOperand::MONone; +  Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + +  Type *ResType = I.getType(); +  Type *ValType = ResType->Type::getStructElementType(0); + +  auto Res = getOrCreateVRegs(I); +  unsigned OldValRes = Res[0]; +  unsigned SuccessRes = Res[1]; +  unsigned Addr = getOrCreateVReg(*I.getPointerOperand()); +  unsigned Cmp = getOrCreateVReg(*I.getCompareOperand()); +  unsigned NewVal = getOrCreateVReg(*I.getNewValOperand()); + +  MIRBuilder.buildAtomicCmpXchgWithSuccess( +      OldValRes, SuccessRes, Addr, Cmp, NewVal, +      *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), +                                Flags, DL->getTypeStoreSize(ValType), +                                getMemOpAlignment(I), AAMDNodes(), nullptr, +                                I.getSyncScopeID(), I.getSuccessOrdering(), +                                I.getFailureOrdering())); +  return true; +} + +bool IRTranslator::translateAtomicRMW(const User &U, +                                      MachineIRBuilder &MIRBuilder) { +  const AtomicRMWInst &I = cast<AtomicRMWInst>(U); + +  auto Flags = I.isVolatile() ? MachineMemOperand::MOVolatile +                              : MachineMemOperand::MONone; +  Flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore; + +  Type *ResType = I.getType(); + +  unsigned Res = getOrCreateVReg(I); +  unsigned Addr = getOrCreateVReg(*I.getPointerOperand()); +  unsigned Val = getOrCreateVReg(*I.getValOperand()); + +  unsigned Opcode = 0; +  switch (I.getOperation()) { +  default: +    llvm_unreachable("Unknown atomicrmw op"); +    return false; +  case AtomicRMWInst::Xchg: +    Opcode = TargetOpcode::G_ATOMICRMW_XCHG; +    break; +  case AtomicRMWInst::Add: +    Opcode = TargetOpcode::G_ATOMICRMW_ADD; +    break; +  case AtomicRMWInst::Sub: +    Opcode = TargetOpcode::G_ATOMICRMW_SUB; +    break; +  case AtomicRMWInst::And: +    Opcode = TargetOpcode::G_ATOMICRMW_AND; +    break; +  case AtomicRMWInst::Nand: +    Opcode = TargetOpcode::G_ATOMICRMW_NAND; +    break; +  case AtomicRMWInst::Or: +    Opcode = TargetOpcode::G_ATOMICRMW_OR; +    break; +  case AtomicRMWInst::Xor: +    Opcode = TargetOpcode::G_ATOMICRMW_XOR; +    break; +  case AtomicRMWInst::Max: +    Opcode = TargetOpcode::G_ATOMICRMW_MAX; +    break; +  case AtomicRMWInst::Min: +    Opcode = TargetOpcode::G_ATOMICRMW_MIN; +    break; +  case AtomicRMWInst::UMax: +    Opcode = TargetOpcode::G_ATOMICRMW_UMAX; +    break; +  case AtomicRMWInst::UMin: +    Opcode = TargetOpcode::G_ATOMICRMW_UMIN; +    break; +  } + +  MIRBuilder.buildAtomicRMW( +      Opcode, Res, Addr, Val, +      *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), +                                Flags, DL->getTypeStoreSize(ResType), +                                getMemOpAlignment(I), AAMDNodes(), nullptr, +                                I.getSyncScopeID(), I.getOrdering()));    return true;  }  void IRTranslator::finishPendingPhis() { -  for (std::pair<const PHINode *, MachineInstr *> &Phi : PendingPHIs) { +  for (auto &Phi : PendingPHIs) {      const PHINode *PI = Phi.first; -    MachineInstrBuilder MIB(*MF, Phi.second); +    ArrayRef<MachineInstr *> ComponentPHIs = Phi.second;      // All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator      // won't create extra control flow here, otherwise we need to find the @@ -1126,17 +1416,27 @@ void IRTranslator::finishPendingPhis() {          continue;        HandledPreds.insert(IRPred); -      unsigned ValReg = getOrCreateVReg(*PI->getIncomingValue(i)); +      ArrayRef<unsigned> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));        for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) { -        assert(Pred->isSuccessor(MIB->getParent()) && +        assert(Pred->isSuccessor(ComponentPHIs[0]->getParent()) &&                 "incorrect CFG at MachineBasicBlock level"); -        MIB.addUse(ValReg); -        MIB.addMBB(Pred); +        for (unsigned j = 0; j < ValRegs.size(); ++j) { +          MachineInstrBuilder MIB(*MF, ComponentPHIs[j]); +          MIB.addUse(ValRegs[j]); +          MIB.addMBB(Pred); +        }        }      }    }  } +bool IRTranslator::valueIsSplit(const Value &V, +                                SmallVectorImpl<uint64_t> *Offsets) { +  SmallVector<LLT, 4> SplitTys; +  computeValueLLTs(*DL, *V.getType(), SplitTys, Offsets); +  return SplitTys.size() > 1; +} +  bool IRTranslator::translate(const Instruction &Inst) {    CurBuilder.setDebugLoc(Inst.getDebugLoc());    switch(Inst.getOpcode()) { @@ -1155,9 +1455,15 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {      EntryBuilder.buildFConstant(Reg, *CF);    else if (isa<UndefValue>(C))      EntryBuilder.buildUndef(Reg); -  else if (isa<ConstantPointerNull>(C)) -    EntryBuilder.buildConstant(Reg, 0); -  else if (auto GV = dyn_cast<GlobalValue>(&C)) +  else if (isa<ConstantPointerNull>(C)) { +    // As we are trying to build a constant val of 0 into a pointer, +    // insert a cast to make them correct with respect to types. +    unsigned NullSize = DL->getTypeSizeInBits(C.getType()); +    auto *ZeroTy = Type::getIntNTy(C.getContext(), NullSize); +    auto *ZeroVal = ConstantInt::get(ZeroTy, 0); +    unsigned ZeroReg = getOrCreateVReg(*ZeroVal); +    EntryBuilder.buildCast(Reg, ZeroReg); +  } else if (auto GV = dyn_cast<GlobalValue>(&C))      EntryBuilder.buildGlobalValue(Reg, GV);    else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {      if (!CAZ->getType()->isVectorTy()) @@ -1189,23 +1495,6 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {      default:        return false;      } -  } else if (auto CS = dyn_cast<ConstantStruct>(&C)) { -    // Return the element if it is a single element ConstantStruct. -    if (CS->getNumOperands() == 1) { -      unsigned EltReg = getOrCreateVReg(*CS->getOperand(0)); -      EntryBuilder.buildCast(Reg, EltReg); -      return true; -    } -    SmallVector<unsigned, 4> Ops; -    SmallVector<uint64_t, 4> Indices; -    uint64_t Offset = 0; -    for (unsigned i = 0; i < CS->getNumOperands(); ++i) { -      unsigned OpReg = getOrCreateVReg(*CS->getOperand(i)); -      Ops.push_back(OpReg); -      Indices.push_back(Offset); -      Offset += MRI->getType(OpReg).getSizeInBits(); -    } -    EntryBuilder.buildSequence(Reg, Ops, Indices);    } else if (auto CV = dyn_cast<ConstantVector>(&C)) {      if (CV->getNumOperands() == 1)        return translate(*CV->getOperand(0), Reg); @@ -1224,7 +1513,7 @@ void IRTranslator::finalizeFunction() {    // Release the memory used by the different maps we    // needed during the translation.    PendingPHIs.clear(); -  ValToVReg.clear(); +  VMap.reset();    FrameIndices.clear();    MachinePreds.clear();    // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it @@ -1284,8 +1573,22 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {    for (const Argument &Arg: F.args()) {      if (DL->getTypeStoreSize(Arg.getType()) == 0)        continue; // Don't handle zero sized types. -    VRegArgs.push_back(getOrCreateVReg(Arg)); +    VRegArgs.push_back( +        MRI->createGenericVirtualRegister(getLLTForType(*Arg.getType(), *DL)));    } + +  // We don't currently support translating swifterror or swiftself functions. +  for (auto &Arg : F.args()) { +    if (Arg.hasSwiftErrorAttr() || Arg.hasSwiftSelfAttr()) { +      OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", +                                 F.getSubprogram(), &F.getEntryBlock()); +      R << "unable to lower arguments due to swifterror/swiftself: " +        << ore::NV("Prototype", F.getType()); +      reportTranslationError(*MF, *TPC, *ORE, R); +      return false; +    } +  } +    if (!CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs)) {      OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",                                 F.getSubprogram(), &F.getEntryBlock()); @@ -1294,14 +1597,28 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {      return false;    } +  auto ArgIt = F.arg_begin(); +  for (auto &VArg : VRegArgs) { +    // If the argument is an unsplit scalar then don't use unpackRegs to avoid +    // creating redundant copies. +    if (!valueIsSplit(*ArgIt, VMap.getOffsets(*ArgIt))) { +      auto &VRegs = *VMap.getVRegs(cast<Value>(*ArgIt)); +      assert(VRegs.empty() && "VRegs already populated?"); +      VRegs.push_back(VArg); +    } else { +      unpackRegs(*ArgIt, VArg, EntryBuilder); +    } +    ArgIt++; +  } +    // And translate the function! -  for (const BasicBlock &BB: F) { +  for (const BasicBlock &BB : F) {      MachineBasicBlock &MBB = getMBB(BB);      // Set the insertion point of all the following translations to      // the end of this basic block.      CurBuilder.setMBB(MBB); -    for (const Instruction &Inst: BB) { +    for (const Instruction &Inst : BB) {        if (translate(Inst))          continue; @@ -1351,5 +1668,9 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {    assert(&MF->front() == &NewEntryBB &&           "New entry wasn't next in the list of basic block!"); +  // Initialize stack protector information. +  StackProtector &SP = getAnalysis<StackProtector>(); +  SP.copyToMachineFrameInfo(MF->getFrameInfo()); +    return false;  }  | 
