diff options
Diffstat (limited to 'lib/CodeGen/GlobalISel/LegalizerHelper.cpp')
-rw-r--r-- | lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 978 |
1 files changed, 917 insertions, 61 deletions
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index f5cf7fc9bd9b..21512e543878 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -171,6 +172,26 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, return true; } +static LLT getGCDType(LLT OrigTy, LLT TargetTy) { + if (OrigTy.isVector() && TargetTy.isVector()) { + assert(OrigTy.getElementType() == TargetTy.getElementType()); + int GCD = greatestCommonDivisor(OrigTy.getNumElements(), + TargetTy.getNumElements()); + return LLT::scalarOrVector(GCD, OrigTy.getElementType()); + } + + if (OrigTy.isVector() && !TargetTy.isVector()) { + assert(OrigTy.getElementType() == TargetTy); + return TargetTy; + } + + assert(!OrigTy.isVector() && !TargetTy.isVector()); + + int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(), + TargetTy.getSizeInBits()); + return LLT::scalar(GCD); +} + void LegalizerHelper::insertParts(Register DstReg, LLT ResultTy, LLT PartTy, ArrayRef<Register> PartRegs, @@ -219,11 +240,29 @@ void LegalizerHelper::insertParts(Register DstReg, static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { switch (Opcode) { case TargetOpcode::G_SDIV: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32; + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + switch (Size) { + case 32: + return RTLIB::SDIV_I32; + case 64: + return RTLIB::SDIV_I64; + case 128: + return RTLIB::SDIV_I128; + default: + llvm_unreachable("unexpected size"); + } case TargetOpcode::G_UDIV: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32; + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + switch (Size) { + case 32: + return RTLIB::UDIV_I32; + case 64: + return RTLIB::UDIV_I64; + case 128: + return RTLIB::UDIV_I128; + default: + llvm_unreachable("unexpected size"); + } case TargetOpcode::G_SREM: assert((Size == 32 || Size == 64) && "Unsupported size"); return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32; @@ -288,6 +327,35 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { llvm_unreachable("Unknown libcall function"); } +/// True if an instruction is in tail position in its caller. Intended for +/// legalizing libcalls as tail calls when possible. +static bool isLibCallInTailPosition(MachineInstr &MI) { + const Function &F = MI.getParent()->getParent()->getFunction(); + + // Conservatively require the attributes of the call to match those of + // the return. Ignore NoAlias and NonNull because they don't affect the + // call sequence. + AttributeList CallerAttrs = F.getAttributes(); + if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex) + .removeAttribute(Attribute::NoAlias) + .removeAttribute(Attribute::NonNull) + .hasAttributes()) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) + return false; + + // Only tail call if the following instruction is a standard return. + auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); + MachineInstr *Next = MI.getNextNode(); + if (!Next || TII.isTailCall(*Next) || !Next->isReturn()) + return false; + + return true; +} + LegalizerHelper::LegalizeResult llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, @@ -296,9 +364,12 @@ llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); const char *Name = TLI.getLibcallName(Libcall); - MIRBuilder.getMF().getFrameInfo().setHasCalls(true); - if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), - MachineOperand::CreateES(Name), Result, Args)) + CallLowering::CallLoweringInfo Info; + Info.CallConv = TLI.getLibcallCallingConv(Libcall); + Info.Callee = MachineOperand::CreateES(Name); + Info.OrigRet = Result; + std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); + if (!CLI.lowerCall(MIRBuilder, Info)) return LegalizerHelper::UnableToLegalize; return LegalizerHelper::Legalized; @@ -317,6 +388,74 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Args); } +LegalizerHelper::LegalizeResult +llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); + auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); + + SmallVector<CallLowering::ArgInfo, 3> Args; + // Add all the args, except for the last which is an imm denoting 'tail'. + for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) { + Register Reg = MI.getOperand(i).getReg(); + + // Need derive an IR type for call lowering. + LLT OpLLT = MRI.getType(Reg); + Type *OpTy = nullptr; + if (OpLLT.isPointer()) + OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace()); + else + OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits()); + Args.push_back({Reg, OpTy}); + } + + auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); + auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); + Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID(); + RTLIB::Libcall RTLibcall; + switch (ID) { + case Intrinsic::memcpy: + RTLibcall = RTLIB::MEMCPY; + break; + case Intrinsic::memset: + RTLibcall = RTLIB::MEMSET; + break; + case Intrinsic::memmove: + RTLibcall = RTLIB::MEMMOVE; + break; + default: + return LegalizerHelper::UnableToLegalize; + } + const char *Name = TLI.getLibcallName(RTLibcall); + + MIRBuilder.setInstr(MI); + + CallLowering::CallLoweringInfo Info; + Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); + Info.Callee = MachineOperand::CreateES(Name); + Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx)); + Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 && + isLibCallInTailPosition(MI); + + std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); + if (!CLI.lowerCall(MIRBuilder, Info)) + return LegalizerHelper::UnableToLegalize; + + if (Info.LoweredTailCall) { + assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?"); + // We must have a return following the call to get past + // isLibCallInTailPosition. + assert(MI.getNextNode() && MI.getNextNode()->isReturn() && + "Expected instr following MI to be a return?"); + + // We lowered a tail call, so the call is now the return from the block. + // Delete the old return. + MI.getNextNode()->eraseFromParent(); + } + + return LegalizerHelper::Legalized; +} + static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType) { auto ToMVT = MVT::getVT(ToType); @@ -518,6 +657,65 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_SEXT: { + if (TypeIdx != 0) + return UnableToLegalize; + + Register SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + // FIXME: support the general case where the requested NarrowTy may not be + // the same as the source type. E.g. s128 = sext(s32) + if ((SrcTy.getSizeInBits() != SizeOp0 / 2) || + SrcTy.getSizeInBits() != NarrowTy.getSizeInBits()) { + LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy << "\n"); + return UnableToLegalize; + } + + // Shift the sign bit of the low register through the high register. + auto ShiftAmt = + MIRBuilder.buildConstant(LLT::scalar(64), NarrowTy.getSizeInBits() - 1); + auto Shift = MIRBuilder.buildAShr(NarrowTy, SrcReg, ShiftAmt); + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {SrcReg, Shift.getReg(0)}); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_ZEXT: { + if (TypeIdx != 0) + return UnableToLegalize; + + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + uint64_t SizeOp1 = SrcTy.getSizeInBits(); + if (SizeOp0 % SizeOp1 != 0) + return UnableToLegalize; + + // Generate a merge where the bottom bits are taken from the source, and + // zero everything else. + Register ZeroReg = MIRBuilder.buildConstant(SrcTy, 0).getReg(0); + unsigned NumParts = SizeOp0 / SizeOp1; + SmallVector<Register, 4> Srcs = {MI.getOperand(1).getReg()}; + for (unsigned Part = 1; Part < NumParts; ++Part) + Srcs.push_back(ZeroReg); + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Srcs); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_TRUNC: { + if (TypeIdx != 1) + return UnableToLegalize; + + uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + if (NarrowTy.getSizeInBits() * 2 != SizeOp1) { + LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n"); + return UnableToLegalize; + } + + auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg()); + MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Unmerge.getReg(0)); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_ADD: { // FIXME: add support for when SizeOp0 isn't an exact multiple of // NarrowSize. @@ -530,15 +728,17 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); - Register CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); - MIRBuilder.buildConstant(CarryIn, 0); - + Register CarryIn; for (int i = 0; i < NumParts; ++i) { Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); - MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], - Src2Regs[i], CarryIn); + if (i == 0) + MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]); + else { + MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], + Src2Regs[i], CarryIn); + } DstRegs.push_back(DstReg); CarryIn = CarryOut; @@ -730,7 +930,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, for (unsigned j = 1; j < MI.getNumOperands(); j += 2) MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1)); } - MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); + MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI()); MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); Observer.changedInstr(MI); MI.eraseFromParent(); @@ -763,6 +963,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, CmpInst::Predicate Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); + LLT ResTy = MRI.getType(MI.getOperand(0).getReg()); if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL); @@ -771,18 +972,109 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0); MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero); } else { - const LLT s1 = LLT::scalar(1); - MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, s1, LHSH, RHSH); + MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH); MachineInstrBuilder CmpHEQ = - MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, s1, LHSH, RHSH); + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH); MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( - ICmpInst::getUnsignedPredicate(Pred), s1, LHSL, RHSL); + ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL); MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH); } Observer.changedInstr(MI); MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_SEXT_INREG: { + if (TypeIdx != 0) + return UnableToLegalize; + + if (!MI.getOperand(2).isImm()) + return UnableToLegalize; + int64_t SizeInBits = MI.getOperand(2).getImm(); + + // So long as the new type has more bits than the bits we're extending we + // don't need to break it apart. + if (NarrowTy.getScalarSizeInBits() >= SizeInBits) { + Observer.changingInstr(MI); + // We don't lose any non-extension bits by truncating the src and + // sign-extending the dst. + MachineOperand &MO1 = MI.getOperand(1); + auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1.getReg()); + MO1.setReg(TruncMIB->getOperand(0).getReg()); + + MachineOperand &MO2 = MI.getOperand(0); + Register DstExt = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + MIRBuilder.buildInstr(TargetOpcode::G_SEXT, {MO2.getReg()}, {DstExt}); + MO2.setReg(DstExt); + Observer.changedInstr(MI); + return Legalized; + } + + // Break it apart. Components below the extension point are unmodified. The + // component containing the extension point becomes a narrower SEXT_INREG. + // Components above it are ashr'd from the component containing the + // extension point. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; + + // List the registers where the destination will be scattered. + SmallVector<Register, 2> DstRegs; + // List the registers where the source will be split. + SmallVector<Register, 2> SrcRegs; + + // Create all the temporary registers. + for (int i = 0; i < NumParts; ++i) { + Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy); + + SrcRegs.push_back(SrcReg); + } + + // Explode the big arguments into smaller chunks. + MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1).getReg()); + + Register AshrCstReg = + MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1) + ->getOperand(0) + .getReg(); + Register FullExtensionReg = 0; + Register PartialExtensionReg = 0; + + // Do the operation on each small part. + for (int i = 0; i < NumParts; ++i) { + if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits) + DstRegs.push_back(SrcRegs[i]); + else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) { + assert(PartialExtensionReg && + "Expected to visit partial extension before full"); + if (FullExtensionReg) { + DstRegs.push_back(FullExtensionReg); + continue; + } + DstRegs.push_back(MIRBuilder + .buildInstr(TargetOpcode::G_ASHR, {NarrowTy}, + {PartialExtensionReg, AshrCstReg}) + ->getOperand(0) + .getReg()); + FullExtensionReg = DstRegs.back(); + } else { + DstRegs.push_back( + MIRBuilder + .buildInstr( + TargetOpcode::G_SEXT_INREG, {NarrowTy}, + {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()}) + ->getOperand(0) + .getReg()); + PartialExtensionReg = DstRegs.back(); + } + } + + // Gather the destination registers into the final destination. + Register DstReg = MI.getOperand(0).getReg(); + MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); + return Legalized; + } } } @@ -892,7 +1184,7 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); - Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg : + Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg : MRI.createGenericVirtualRegister(WideTy); auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset); @@ -903,6 +1195,8 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, if (WideSize > DstSize) MIRBuilder.buildTrunc(DstReg, ResultReg); + else if (DstTy.isPointer()) + MIRBuilder.buildIntToPtr(DstReg, ResultReg); MI.eraseFromParent(); return Legalized; @@ -1218,6 +1512,24 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_BITREVERSE: { + Observer.changingInstr(MI); + + Register DstReg = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(DstReg); + unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); + + Register DstExt = MRI.createGenericVirtualRegister(WideTy); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + MI.getOperand(0).setReg(DstExt); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + + auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits); + auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt); + MIRBuilder.buildTrunc(DstReg, Shift); + Observer.changedInstr(MI); + return Legalized; + } case TargetOpcode::G_ADD: case TargetOpcode::G_AND: case TargetOpcode::G_MUL: @@ -1310,13 +1622,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: - if (TypeIdx != 0) - return UnableToLegalize; Observer.changingInstr(MI); - widenScalarDst(MI, WideTy); + + if (TypeIdx == 0) + widenScalarDst(MI, WideTy); + else + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); + Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_SITOFP: if (TypeIdx != 1) return UnableToLegalize; @@ -1483,6 +1797,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FMUL: case TargetOpcode::G_FSUB: case TargetOpcode::G_FMA: + case TargetOpcode::G_FMAD: case TargetOpcode::G_FNEG: case TargetOpcode::G_FABS: case TargetOpcode::G_FCANONICALIZE: @@ -1553,6 +1868,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_SEXT_INREG: + if (TypeIdx != 0) + return UnableToLegalize; + + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC); + Observer.changedInstr(MI); + return Legalized; } } @@ -1579,6 +1903,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_SADDO: + case TargetOpcode::G_SSUBO: + return lowerSADDO_SSUBO(MI); case TargetOpcode::G_SMULO: case TargetOpcode::G_UMULO: { // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the @@ -1669,6 +1996,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_FMAD: + return lowerFMad(MI); case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { Register OldValRes = MI.getOperand(0).getReg(); Register SuccessRes = MI.getOperand(1).getReg(); @@ -1690,11 +2019,57 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { LLT DstTy = MRI.getType(DstReg); auto &MMO = **MI.memoperands_begin(); - if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { - // In the case of G_LOAD, this was a non-extending load already and we're - // about to lower to the same instruction. - if (MI.getOpcode() == TargetOpcode::G_LOAD) + if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { + if (MI.getOpcode() == TargetOpcode::G_LOAD) { + // This load needs splitting into power of 2 sized loads. + if (DstTy.isVector()) return UnableToLegalize; + if (isPowerOf2_32(DstTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + // Our strategy here is to generate anyextending loads for the smaller + // types up to next power-2 result type, and then combine the two larger + // result values together, before truncating back down to the non-pow-2 + // type. + // E.g. v1 = i24 load => + // v2 = i32 load (2 byte) + // v3 = i32 load (1 byte) + // v4 = i32 shl v3, 16 + // v5 = i32 or v4, v2 + // v1 = i24 trunc v5 + // By doing this we generate the correct truncate which should get + // combined away as an artifact with a matching extend. + uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); + uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; + + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = MF.getMachineMemOperand( + &MMO, LargeSplitSize / 8, SmallSplitSize / 8); + + LLT PtrTy = MRI.getType(PtrReg); + unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); + LLT AnyExtTy = LLT::scalar(AnyExtSize); + Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); + Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); + auto LargeLoad = + MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO); + + auto OffsetCst = + MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); + Register GEPReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0)); + auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), + *SmallMMO); + + auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); + auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); + auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); + MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); + MI.eraseFromParent(); + return Legalized; + } MIRBuilder.buildLoad(DstReg, PtrReg, MMO); MI.eraseFromParent(); return Legalized; @@ -1723,6 +2098,51 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return UnableToLegalize; } + case TargetOpcode::G_STORE: { + // Lower a non-power of 2 store into multiple pow-2 stores. + // E.g. split an i24 store into an i16 store + i8 store. + // We do this by first extending the stored value to the next largest power + // of 2 type, and then using truncating stores to store the components. + // By doing this, likewise with G_LOAD, generate an extend that can be + // artifact-combined away instead of leaving behind extracts. + Register SrcReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + MachineMemOperand &MMO = **MI.memoperands_begin(); + if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) + return UnableToLegalize; + if (SrcTy.isVector()) + return UnableToLegalize; + if (isPowerOf2_32(SrcTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + // Extend to the next pow-2. + const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); + auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); + + // Obtain the smaller value by shifting away the larger value. + uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); + uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; + auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); + auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); + + // Generate the GEP and truncating stores. + LLT PtrTy = MRI.getType(PtrReg); + auto OffsetCst = + MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); + Register GEPReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0)); + + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = + MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); + MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); + MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_CTLZ_ZERO_UNDEF: case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTLZ: @@ -1797,6 +2217,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return lowerUITOFP(MI, TypeIdx, Ty); case G_SITOFP: return lowerSITOFP(MI, TypeIdx, Ty); + case G_FPTOUI: + return lowerFPTOUI(MI, TypeIdx, Ty); case G_SMIN: case G_SMAX: case G_UMIN: @@ -1807,6 +2229,31 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { case G_FMINNUM: case G_FMAXNUM: return lowerFMinNumMaxNum(MI); + case G_UNMERGE_VALUES: + return lowerUnmergeValues(MI); + case TargetOpcode::G_SEXT_INREG: { + assert(MI.getOperand(2).isImm() && "Expected immediate"); + int64_t SizeInBits = MI.getOperand(2).getImm(); + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + Register TmpRes = MRI.createGenericVirtualRegister(DstTy); + + auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits); + MIRBuilder.buildInstr(TargetOpcode::G_SHL, {TmpRes}, {SrcReg, MIBSz->getOperand(0).getReg()}); + MIRBuilder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {TmpRes, MIBSz->getOperand(0).getReg()}); + MI.eraseFromParent(); + return Legalized; + } + case G_SHUFFLE_VECTOR: + return lowerShuffleVector(MI); + case G_DYN_STACKALLOC: + return lowerDynStackAlloc(MI); + case G_EXTRACT: + return lowerExtract(MI); + case G_INSERT: + return lowerInsert(MI); } } @@ -2283,6 +2730,105 @@ LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, } LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + const int NumDst = MI.getNumOperands() - 1; + const Register SrcReg = MI.getOperand(NumDst).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + // TODO: Create sequence of extracts. + if (DstTy == NarrowTy) + return UnableToLegalize; + + LLT GCDTy = getGCDType(SrcTy, NarrowTy); + if (DstTy == GCDTy) { + // This would just be a copy of the same unmerge. + // TODO: Create extracts, pad with undef and create intermediate merges. + return UnableToLegalize; + } + + auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); + const int NumUnmerge = Unmerge->getNumOperands() - 1; + const int PartsPerUnmerge = NumDst / NumUnmerge; + + for (int I = 0; I != NumUnmerge; ++I) { + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); + + for (int J = 0; J != PartsPerUnmerge; ++J) + MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg()); + MIB.addUse(Unmerge.getReg(I)); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy) { + assert(TypeIdx == 0 && "not a vector type index"); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = DstTy.getElementType(); + + int DstNumElts = DstTy.getNumElements(); + int NarrowNumElts = NarrowTy.getNumElements(); + int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts; + LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy); + + SmallVector<Register, 8> ConcatOps; + SmallVector<Register, 8> SubBuildVector; + + Register UndefReg; + if (WidenedDstTy != DstTy) + UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0); + + // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as + // necessary. + // + // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 + // -> <2 x s16> + // + // %4:_(s16) = G_IMPLICIT_DEF + // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 + // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 + // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6 + // %3:_(<3 x s16>) = G_EXTRACT %7, 0 + for (int I = 0; I != NumConcat; ++I) { + for (int J = 0; J != NarrowNumElts; ++J) { + int SrcIdx = NarrowNumElts * I + J; + + if (SrcIdx < DstNumElts) { + Register SrcReg = MI.getOperand(SrcIdx + 1).getReg(); + SubBuildVector.push_back(SrcReg); + } else + SubBuildVector.push_back(UndefReg); + } + + auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector); + ConcatOps.push_back(BuildVec.getReg(0)); + SubBuildVector.clear(); + } + + if (DstTy == WidenedDstTy) + MIRBuilder.buildConcatVectors(DstReg, ConcatOps); + else { + auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps); + MIRBuilder.buildExtract(DstReg, Concat, 0); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { // FIXME: Don't know how to handle secondary types yet. @@ -2395,6 +2941,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FDIV: case G_FREM: case G_FMA: + case G_FMAD: case G_FPOW: case G_FEXP: case G_FEXP2: @@ -2411,6 +2958,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FSIN: case G_FSQRT: case G_BSWAP: + case G_BITREVERSE: case G_SDIV: case G_SMIN: case G_SMAX: @@ -2453,6 +3001,10 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); case G_PHI: return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); + case G_UNMERGE_VALUES: + return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); + case G_BUILD_VECTOR: + return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy); case G_LOAD: case G_STORE: return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); @@ -2604,11 +3156,11 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, switch (MI.getOpcode()) { case TargetOpcode::G_SHL: { // Short: ShAmt < NewBitSize - auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt); + auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt); - auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt); - auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); - auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); + auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); + auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt); + auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); // Long: ShAmt >= NewBitSize auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero. @@ -2622,41 +3174,25 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, ResultRegs[1] = Hi.getReg(0); break; } - case TargetOpcode::G_LSHR: { - // Short: ShAmt < NewBitSize - auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt); - - auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); - auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack); - auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); - - // Long: ShAmt >= NewBitSize - auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. - auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part. - - auto Lo = MIRBuilder.buildSelect( - HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); - auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); - - ResultRegs[0] = Lo.getReg(0); - ResultRegs[1] = Hi.getReg(0); - break; - } + case TargetOpcode::G_LSHR: case TargetOpcode::G_ASHR: { // Short: ShAmt < NewBitSize - auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt); + auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt}); - auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); - auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack); - auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); + auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt); + auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack); + auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); // Long: ShAmt >= NewBitSize - - // Sign of Hi part. - auto HiL = MIRBuilder.buildAShr( - HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1)); - - auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part. + MachineInstrBuilder HiL; + if (MI.getOpcode() == TargetOpcode::G_LSHR) { + HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. + } else { + auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1); + HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part. + } + auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, + {InH, AmtExcess}); // Lo from Hi part. auto Lo = MIRBuilder.buildSelect( HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); @@ -2701,12 +3237,22 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, MIRBuilder.setInstr(MI); unsigned Opc = MI.getOpcode(); switch (Opc) { - case TargetOpcode::G_IMPLICIT_DEF: { + case TargetOpcode::G_IMPLICIT_DEF: + case TargetOpcode::G_LOAD: { + if (TypeIdx != 0) + return UnableToLegalize; Observer.changingInstr(MI); moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_STORE: + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; case TargetOpcode::G_AND: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: @@ -2748,6 +3294,26 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_UNMERGE_VALUES: { + if (TypeIdx != 1) + return UnableToLegalize; + + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + int NumDst = MI.getNumOperands() - 1; + moreElementsVectorSrc(MI, MoreTy, NumDst); + + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); + for (int I = 0; I != NumDst; ++I) + MIB.addDef(MI.getOperand(I).getReg()); + + int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits(); + for (int I = NumDst; I != NewNumDst; ++I) + MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); + + MIB.addUse(MI.getOperand(NumDst).getReg()); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_PHI: return moreElementsVectorPhi(MI, TypeIdx, MoreTy); default: @@ -3310,6 +3876,48 @@ LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return UnableToLegalize; } +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + const LLT S64 = LLT::scalar(64); + const LLT S32 = LLT::scalar(32); + + if (SrcTy != S64 && SrcTy != S32) + return UnableToLegalize; + if (DstTy != S32 && DstTy != S64) + return UnableToLegalize; + + // FPTOSI gives same result as FPTOUI for positive signed integers. + // FPTOUI needs to deal with fp values that convert to unsigned integers + // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp. + + APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits()); + APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle() + : APFloat::IEEEdouble(), + APInt::getNullValue(SrcTy.getSizeInBits())); + TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven); + + MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src); + + MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP); + // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on + // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1. + MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold); + MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub); + MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt); + MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit); + + MachineInstrBuilder FCMP = + MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, DstTy, Src, Threshold); + MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res); + + MI.eraseFromParent(); + return Legalized; +} + static CmpInst::Predicate minMaxToCompare(unsigned Opc) { switch (Opc) { case TargetOpcode::G_SMIN: @@ -3419,3 +4027,251 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { MI.eraseFromParent(); return Legalized; } + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) { + // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c + Register DstReg = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(DstReg); + unsigned Flags = MI.getFlags(); + + auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2), + Flags); + MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { + const unsigned NumDst = MI.getNumOperands() - 1; + const Register SrcReg = MI.getOperand(NumDst).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + Register Dst0Reg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst0Reg); + + + // Expand scalarizing unmerge as bitcast to integer and shift. + if (!DstTy.isVector() && SrcTy.isVector() && + SrcTy.getElementType() == DstTy) { + LLT IntTy = LLT::scalar(SrcTy.getSizeInBits()); + Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0); + + MIRBuilder.buildTrunc(Dst0Reg, Cast); + + const unsigned DstSize = DstTy.getSizeInBits(); + unsigned Offset = DstSize; + for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) { + auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset); + auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt); + MIRBuilder.buildTrunc(MI.getOperand(I), Shift); + } + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { + Register DstReg = MI.getOperand(0).getReg(); + Register Src0Reg = MI.getOperand(1).getReg(); + Register Src1Reg = MI.getOperand(2).getReg(); + LLT Src0Ty = MRI.getType(Src0Reg); + LLT DstTy = MRI.getType(DstReg); + LLT IdxTy = LLT::scalar(32); + + const Constant *ShufMask = MI.getOperand(3).getShuffleMask(); + + SmallVector<int, 32> Mask; + ShuffleVectorInst::getShuffleMask(ShufMask, Mask); + + if (DstTy.isScalar()) { + if (Src0Ty.isVector()) + return UnableToLegalize; + + // This is just a SELECT. + assert(Mask.size() == 1 && "Expected a single mask element"); + Register Val; + if (Mask[0] < 0 || Mask[0] > 1) + Val = MIRBuilder.buildUndef(DstTy).getReg(0); + else + Val = Mask[0] == 0 ? Src0Reg : Src1Reg; + MIRBuilder.buildCopy(DstReg, Val); + MI.eraseFromParent(); + return Legalized; + } + + Register Undef; + SmallVector<Register, 32> BuildVec; + LLT EltTy = DstTy.getElementType(); + + for (int Idx : Mask) { + if (Idx < 0) { + if (!Undef.isValid()) + Undef = MIRBuilder.buildUndef(EltTy).getReg(0); + BuildVec.push_back(Undef); + continue; + } + + if (Src0Ty.isScalar()) { + BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg); + } else { + int NumElts = Src0Ty.getNumElements(); + Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg; + int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts; + auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); + auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK); + BuildVec.push_back(Extract.getReg(0)); + } + } + + MIRBuilder.buildBuildVector(DstReg, BuildVec); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register AllocSize = MI.getOperand(1).getReg(); + unsigned Align = MI.getOperand(2).getImm(); + + const auto &MF = *MI.getMF(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + + LLT PtrTy = MRI.getType(Dst); + LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); + + Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); + auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg); + SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp); + + // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't + // have to generate an extra instruction to negate the alloc and then use + // G_GEP to add the negative offset. + auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize); + if (Align) { + APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true); + AlignMask.negate(); + auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask); + Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst); + } + + SPTmp = MIRBuilder.buildCast(PtrTy, Alloc); + MIRBuilder.buildCopy(SPReg, SPTmp); + MIRBuilder.buildCopy(Dst, SPTmp); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerExtract(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + unsigned Offset = MI.getOperand(2).getImm(); + + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + if (DstTy.isScalar() && + (SrcTy.isScalar() || + (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) { + LLT SrcIntTy = SrcTy; + if (!SrcTy.isScalar()) { + SrcIntTy = LLT::scalar(SrcTy.getSizeInBits()); + Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0); + } + + if (Offset == 0) + MIRBuilder.buildTrunc(Dst, Src); + else { + auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset); + auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt); + MIRBuilder.buildTrunc(Dst, Shr); + } + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register InsertSrc = MI.getOperand(2).getReg(); + uint64_t Offset = MI.getOperand(3).getImm(); + + LLT DstTy = MRI.getType(Src); + LLT InsertTy = MRI.getType(InsertSrc); + + if (InsertTy.isScalar() && + (DstTy.isScalar() || + (DstTy.isVector() && DstTy.getElementType() == InsertTy))) { + LLT IntDstTy = DstTy; + if (!DstTy.isScalar()) { + IntDstTy = LLT::scalar(DstTy.getSizeInBits()); + Src = MIRBuilder.buildBitcast(IntDstTy, Src).getReg(0); + } + + Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0); + if (Offset != 0) { + auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset); + ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0); + } + + APInt MaskVal = ~APInt::getBitsSet(DstTy.getSizeInBits(), Offset, + InsertTy.getSizeInBits()); + + auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal); + auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask); + auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc); + + MIRBuilder.buildBitcast(Dst, Or); + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) { + Register Dst0 = MI.getOperand(0).getReg(); + Register Dst1 = MI.getOperand(1).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO; + + LLT Ty = MRI.getType(Dst0); + LLT BoolTy = MRI.getType(Dst1); + + if (IsAdd) + MIRBuilder.buildAdd(Dst0, LHS, RHS); + else + MIRBuilder.buildSub(Dst0, LHS, RHS); + + // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow. + + auto Zero = MIRBuilder.buildConstant(Ty, 0); + + // For an addition, the result should be less than one of the operands (LHS) + // if and only if the other operand (RHS) is negative, otherwise there will + // be overflow. + // For a subtraction, the result should be less than one of the operands + // (LHS) if and only if the other operand (RHS) is (non-zero) positive, + // otherwise there will be overflow. + auto ResultLowerThanLHS = + MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS); + auto ConditionRHS = MIRBuilder.buildICmp( + IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero); + + MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS); + MI.eraseFromParent(); + return Legalized; +} |