diff options
Diffstat (limited to 'llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp')
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 622 |
1 files changed, 492 insertions, 130 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index f0da0d88140f..37e7153be572 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -119,8 +119,7 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI, MIRBuilder.setInstrAndDebugLoc(MI); - if (MI.getOpcode() == TargetOpcode::G_INTRINSIC || - MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) + if (isa<GIntrinsic>(MI)) return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize; auto Step = LI.getAction(MI, MRI); switch (Step.Action) { @@ -526,6 +525,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { RTLIBCASE(EXP_F); case TargetOpcode::G_FEXP2: RTLIBCASE(EXP2_F); + case TargetOpcode::G_FEXP10: + RTLIBCASE(EXP10_F); case TargetOpcode::G_FREM: RTLIBCASE(REM_F); case TargetOpcode::G_FPOW: @@ -690,7 +691,7 @@ llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, LLT OpLLT = MRI.getType(Reg); Type *OpTy = nullptr; if (OpLLT.isPointer()) - OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace()); + OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace()); else OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits()); Args.push_back({Reg, OpTy, 0}); @@ -795,10 +796,134 @@ conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, {{MI.getOperand(1).getReg(), FromType, 0}}); } +static RTLIB::Libcall +getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI) { + RTLIB::Libcall RTLibcall; + switch (MI.getOpcode()) { + case TargetOpcode::G_GET_FPMODE: + RTLibcall = RTLIB::FEGETMODE; + break; + case TargetOpcode::G_SET_FPMODE: + case TargetOpcode::G_RESET_FPMODE: + RTLibcall = RTLIB::FESETMODE; + break; + default: + llvm_unreachable("Unexpected opcode"); + } + return RTLibcall; +} + +// Some library functions that read FP state (fegetmode, fegetenv) write the +// state into a region in memory. IR intrinsics that do the same operations +// (get_fpmode, get_fpenv) return the state as integer value. To implement these +// intrinsics via the library functions, we need to use temporary variable, +// for example: +// +// %0:_(s32) = G_GET_FPMODE +// +// is transformed to: +// +// %1:_(p0) = G_FRAME_INDEX %stack.0 +// BL &fegetmode +// %0:_(s32) = G_LOAD % 1 +// +LegalizerHelper::LegalizeResult +LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder, + MachineInstr &MI) { + const DataLayout &DL = MIRBuilder.getDataLayout(); + auto &MF = MIRBuilder.getMF(); + auto &MRI = *MIRBuilder.getMRI(); + auto &Ctx = MF.getFunction().getContext(); + + // Create temporary, where library function will put the read state. + Register Dst = MI.getOperand(0).getReg(); + LLT StateTy = MRI.getType(Dst); + TypeSize StateSize = StateTy.getSizeInBytes(); + Align TempAlign = getStackTemporaryAlignment(StateTy); + MachinePointerInfo TempPtrInfo; + auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo); + + // Create a call to library function, with the temporary as an argument. + unsigned TempAddrSpace = DL.getAllocaAddrSpace(); + Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace); + RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI); + auto Res = + createLibcall(MIRBuilder, RTLibcall, + CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0), + CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0})); + if (Res != LegalizerHelper::Legalized) + return Res; + + // Create a load from the temporary. + MachineMemOperand *MMO = MF.getMachineMemOperand( + TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign); + MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO); + + return LegalizerHelper::Legalized; +} + +// Similar to `createGetStateLibcall` the function calls a library function +// using transient space in stack. In this case the library function reads +// content of memory region. +LegalizerHelper::LegalizeResult +LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder, + MachineInstr &MI) { + const DataLayout &DL = MIRBuilder.getDataLayout(); + auto &MF = MIRBuilder.getMF(); + auto &MRI = *MIRBuilder.getMRI(); + auto &Ctx = MF.getFunction().getContext(); + + // Create temporary, where library function will get the new state. + Register Src = MI.getOperand(0).getReg(); + LLT StateTy = MRI.getType(Src); + TypeSize StateSize = StateTy.getSizeInBytes(); + Align TempAlign = getStackTemporaryAlignment(StateTy); + MachinePointerInfo TempPtrInfo; + auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo); + + // Put the new state into the temporary. + MachineMemOperand *MMO = MF.getMachineMemOperand( + TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign); + MIRBuilder.buildStore(Src, Temp, *MMO); + + // Create a call to library function, with the temporary as an argument. + unsigned TempAddrSpace = DL.getAllocaAddrSpace(); + Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace); + RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI); + return createLibcall(MIRBuilder, RTLibcall, + CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0), + CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0})); +} + +// The function is used to legalize operations that set default environment +// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that. +// On most targets supported in glibc FE_DFL_MODE is defined as +// `((const femode_t *) -1)`. Such assumption is used here. If for some target +// it is not true, the target must provide custom lowering. +LegalizerHelper::LegalizeResult +LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder, + MachineInstr &MI) { + const DataLayout &DL = MIRBuilder.getDataLayout(); + auto &MF = MIRBuilder.getMF(); + auto &Ctx = MF.getFunction().getContext(); + + // Create an argument for the library function. + unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace(); + Type *StatePtrTy = PointerType::get(Ctx, AddrSpace); + unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace); + LLT MemTy = LLT::pointer(AddrSpace, PtrSize); + auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL); + DstOp Dest(MRI.createGenericVirtualRegister(MemTy)); + MIRBuilder.buildIntToPtr(Dest, DefValue); + + RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI); + return createLibcall(MIRBuilder, RTLibcall, + CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0), + CallLowering::ArgInfo({ Dest.getReg(), StatePtrTy, 0})); +} + LegalizerHelper::LegalizeResult LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { - LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); - unsigned Size = LLTy.getSizeInBits(); auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); switch (MI.getOpcode()) { @@ -810,6 +935,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { case TargetOpcode::G_SREM: case TargetOpcode::G_UREM: case TargetOpcode::G_CTLZ_ZERO_UNDEF: { + LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); + unsigned Size = LLTy.getSizeInBits(); Type *HLTy = IntegerType::get(Ctx, Size); auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); if (Status != Legalized) @@ -831,6 +958,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { case TargetOpcode::G_FLDEXP: case TargetOpcode::G_FEXP: case TargetOpcode::G_FEXP2: + case TargetOpcode::G_FEXP10: case TargetOpcode::G_FCEIL: case TargetOpcode::G_FFLOOR: case TargetOpcode::G_FMINNUM: @@ -839,6 +967,8 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { case TargetOpcode::G_FRINT: case TargetOpcode::G_FNEARBYINT: case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { + LLT LLTy = MRI.getType(MI.getOperand(0).getReg()); + unsigned Size = LLTy.getSizeInBits(); Type *HLTy = getFloatTypeForLLT(Ctx, LLTy); if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) { LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n"); @@ -901,6 +1031,24 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) { MI.eraseFromParent(); return Result; } + case TargetOpcode::G_GET_FPMODE: { + LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI); + if (Result != Legalized) + return Result; + break; + } + case TargetOpcode::G_SET_FPMODE: { + LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI); + if (Result != Legalized) + return Result; + break; + } + case TargetOpcode::G_RESET_FPMODE: { + LegalizeResult Result = createResetStateLibcall(MIRBuilder, MI); + if (Result != Legalized) + return Result; + break; + } } MI.eraseFromParent(); @@ -1297,7 +1445,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, // So long as the new type has more bits than the bits we're extending we // don't need to break it apart. - if (NarrowTy.getScalarSizeInBits() >= SizeInBits) { + if (NarrowTy.getScalarSizeInBits() > SizeInBits) { Observer.changingInstr(MI); // We don't lose any non-extension bits by truncating the src and // sign-extending the dst. @@ -1340,14 +1488,15 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, Register AshrCstReg = MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1) .getReg(0); - Register FullExtensionReg = 0; - Register PartialExtensionReg = 0; + Register FullExtensionReg; + Register PartialExtensionReg; // Do the operation on each small part. for (int i = 0; i < NumParts; ++i) { - if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits) + if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) { DstRegs.push_back(SrcRegs[i]); - else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) { + PartialExtensionReg = DstRegs.back(); + } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) { assert(PartialExtensionReg && "Expected to visit partial extension before full"); if (FullExtensionReg) { @@ -1993,8 +2142,20 @@ LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx, auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS}); auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS}); - auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy}, - {LeftOperand, RightOperand}); + // Multiplication cannot overflow if the WideTy is >= 2 * original width, + // so we don't need to check the overflow result of larger type Mulo. + bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth; + + unsigned MulOpc = + WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL; + + MachineInstrBuilder Mulo; + if (WideMulCanOverflow) + Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy}, + {LeftOperand, RightOperand}); + else + Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand}); + auto Mul = Mulo->getOperand(0); MIRBuilder.buildTrunc(Result, Mul); @@ -2012,9 +2173,7 @@ LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx, ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth); } - // Multiplication cannot overflow if the WideTy is >= 2 * original width, - // so we don't need to check the overflow result of larger type Mulo. - if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) { + if (WideMulCanOverflow) { auto Overflow = MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult); // Finally check if the multiplication in the larger type itself overflowed. @@ -2247,6 +2406,16 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_ROTR: + case TargetOpcode::G_ROTL: + if (TypeIdx != 1) + return UnableToLegalize; + + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_SDIV: case TargetOpcode::G_SREM: case TargetOpcode::G_SMIN: @@ -2325,6 +2494,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: + case TargetOpcode::G_IS_FPCLASS: Observer.changingInstr(MI); if (TypeIdx == 0) @@ -2494,6 +2664,17 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return Legalized; } case TargetOpcode::G_INSERT_VECTOR_ELT: { + if (TypeIdx == 0) { + Observer.changingInstr(MI); + const LLT WideEltTy = WideTy.getElementType(); + + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy, 0); + Observer.changedInstr(MI); + return Legalized; + } + if (TypeIdx == 1) { Observer.changingInstr(MI); @@ -2546,6 +2727,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FSQRT: case TargetOpcode::G_FEXP: case TargetOpcode::G_FEXP2: + case TargetOpcode::G_FEXP10: case TargetOpcode::G_FPOW: case TargetOpcode::G_INTRINSIC_TRUNC: case TargetOpcode::G_INTRINSIC_ROUND: @@ -2648,6 +2830,23 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_VECREDUCE_FADD: + case TargetOpcode::G_VECREDUCE_FMIN: + case TargetOpcode::G_VECREDUCE_FMAX: + case TargetOpcode::G_VECREDUCE_FMINIMUM: + case TargetOpcode::G_VECREDUCE_FMAXIMUM: + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + Register VecReg = MI.getOperand(1).getReg(); + LLT VecTy = MRI.getType(VecReg); + LLT WideVecTy = VecTy.isVector() + ? LLT::vector(VecTy.getElementCount(), WideTy) + : WideTy; + widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT); + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + Observer.changedInstr(MI); + return Legalized; } } @@ -3384,10 +3583,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return lowerFFloor(MI); case TargetOpcode::G_INTRINSIC_ROUND: return lowerIntrinsicRound(MI); - case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { + case TargetOpcode::G_FRINT: { // Since round even is the assumed rounding mode for unconstrained FP // operations, rint and roundeven are the same operation. - changeOpcode(MI, TargetOpcode::G_FRINT); + changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN); return Legalized; } case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { @@ -3421,12 +3620,25 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { } case G_UADDE: { auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs(); - LLT Ty = MRI.getType(Res); + const LLT CondTy = MRI.getType(CarryOut); + const LLT Ty = MRI.getType(Res); + // Initial add of the two operands. auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS); + + // Initial check for carry. + auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS); + + // Add the sum and the carry. auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn); MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn); - MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS); + + // Second check for carry. We can only carry if the initial sum is all 1s + // and the carry is set, resulting in a new sum of 0. + auto Zero = MIRBuilder.buildConstant(Ty, 0); + auto ResEqZero = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, Res, Zero); + auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn); + MIRBuilder.buildOr(CarryOut, Carry, Carry2); MI.eraseFromParent(); return Legalized; @@ -3445,13 +3657,23 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { const LLT CondTy = MRI.getType(BorrowOut); const LLT Ty = MRI.getType(Res); + // Initial subtract of the two operands. auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS); + + // Initial check for borrow. + auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS); + + // Subtract the borrow from the first subtract. auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn); MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn); - auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS); - auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS); - MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS); + // Second check for borrow. We can only borrow if the initial difference is + // 0 and the borrow is set, resulting in a new difference of all 1s. + auto Zero = MIRBuilder.buildConstant(Ty, 0); + auto TmpResEqZero = + MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero); + auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn); + MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2); MI.eraseFromParent(); return Legalized; @@ -3503,6 +3725,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return lowerShuffleVector(MI); case G_DYN_STACKALLOC: return lowerDynStackAlloc(MI); + case G_STACKSAVE: + return lowerStackSave(MI); + case G_STACKRESTORE: + return lowerStackRestore(MI); case G_EXTRACT: return lowerExtract(MI); case G_INSERT: @@ -3559,8 +3785,16 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) { return lowerMemCpyFamily(MI); case G_MEMCPY_INLINE: return lowerMemcpyInline(MI); + case G_ZEXT: + case G_SEXT: + case G_ANYEXT: + return lowerEXT(MI); + case G_TRUNC: + return lowerTRUNC(MI); GISEL_VECREDUCE_CASES_NONSEQ return lowerVectorReduction(MI); + case G_VAARG: + return lowerVAArg(MI); } } @@ -4168,6 +4402,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FPOW: case G_FEXP: case G_FEXP2: + case G_FEXP10: case G_FLOG: case G_FLOG2: case G_FLOG10: @@ -4425,73 +4660,22 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle( return Legalized; } -static unsigned getScalarOpcForReduction(unsigned Opc) { - unsigned ScalarOpc; - switch (Opc) { - case TargetOpcode::G_VECREDUCE_FADD: - ScalarOpc = TargetOpcode::G_FADD; - break; - case TargetOpcode::G_VECREDUCE_FMUL: - ScalarOpc = TargetOpcode::G_FMUL; - break; - case TargetOpcode::G_VECREDUCE_FMAX: - ScalarOpc = TargetOpcode::G_FMAXNUM; - break; - case TargetOpcode::G_VECREDUCE_FMIN: - ScalarOpc = TargetOpcode::G_FMINNUM; - break; - case TargetOpcode::G_VECREDUCE_ADD: - ScalarOpc = TargetOpcode::G_ADD; - break; - case TargetOpcode::G_VECREDUCE_MUL: - ScalarOpc = TargetOpcode::G_MUL; - break; - case TargetOpcode::G_VECREDUCE_AND: - ScalarOpc = TargetOpcode::G_AND; - break; - case TargetOpcode::G_VECREDUCE_OR: - ScalarOpc = TargetOpcode::G_OR; - break; - case TargetOpcode::G_VECREDUCE_XOR: - ScalarOpc = TargetOpcode::G_XOR; - break; - case TargetOpcode::G_VECREDUCE_SMAX: - ScalarOpc = TargetOpcode::G_SMAX; - break; - case TargetOpcode::G_VECREDUCE_SMIN: - ScalarOpc = TargetOpcode::G_SMIN; - break; - case TargetOpcode::G_VECREDUCE_UMAX: - ScalarOpc = TargetOpcode::G_UMAX; - break; - case TargetOpcode::G_VECREDUCE_UMIN: - ScalarOpc = TargetOpcode::G_UMIN; - break; - default: - llvm_unreachable("Unhandled reduction"); - } - return ScalarOpc; -} - LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions( MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) { - unsigned Opc = MI.getOpcode(); - assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD && - Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL && - "Sequential reductions not expected"); + auto &RdxMI = cast<GVecReduce>(MI); if (TypeIdx != 1) return UnableToLegalize; // The semantics of the normal non-sequential reductions allow us to freely // re-associate the operation. - auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); + auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs(); if (NarrowTy.isVector() && (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0)) return UnableToLegalize; - unsigned ScalarOpc = getScalarOpcForReduction(Opc); + unsigned ScalarOpc = RdxMI.getScalarOpcForReduction(); SmallVector<Register> SplitSrcs; // If NarrowTy is a scalar then we're being asked to scalarize. const unsigned NumParts = @@ -4536,10 +4720,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions( SmallVector<Register> PartialReductions; for (unsigned Part = 0; Part < NumParts; ++Part) { PartialReductions.push_back( - MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0)); + MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]}) + .getReg(0)); } - // If the types involved are powers of 2, we can generate intermediate vector // ops, before generating a final reduction operation. if (isPowerOf2_32(SrcTy.getNumElements()) && @@ -4836,7 +5020,9 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_SUB: case TargetOpcode::G_MUL: case TargetOpcode::G_FADD: + case TargetOpcode::G_FSUB: case TargetOpcode::G_FMUL: + case TargetOpcode::G_FDIV: case TargetOpcode::G_UADDSAT: case TargetOpcode::G_USUBSAT: case TargetOpcode::G_SADDSAT: @@ -4886,6 +5072,14 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_FREEZE: case TargetOpcode::G_FNEG: case TargetOpcode::G_FABS: + case TargetOpcode::G_FSQRT: + case TargetOpcode::G_FCEIL: + case TargetOpcode::G_FFLOOR: + case TargetOpcode::G_FNEARBYINT: + case TargetOpcode::G_FRINT: + case TargetOpcode::G_INTRINSIC_ROUND: + case TargetOpcode::G_INTRINSIC_ROUNDEVEN: + case TargetOpcode::G_INTRINSIC_TRUNC: case TargetOpcode::G_BSWAP: case TargetOpcode::G_FCANONICALIZE: case TargetOpcode::G_SEXT_INREG: @@ -4943,15 +5137,13 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, MI.eraseFromParent(); return Legalized; } - case TargetOpcode::G_TRUNC: { - Observer.changingInstr(MI); - moreElementsVectorSrc(MI, MoreTy, 1); - moreElementsVectorDst(MI, MoreTy, 0); - Observer.changedInstr(MI); - return Legalized; - } + case TargetOpcode::G_TRUNC: case TargetOpcode::G_FPTRUNC: - case TargetOpcode::G_FPEXT: { + case TargetOpcode::G_FPEXT: + case TargetOpcode::G_FPTOSI: + case TargetOpcode::G_FPTOUI: + case TargetOpcode::G_SITOFP: + case TargetOpcode::G_UITOFP: { if (TypeIdx != 0) return UnableToLegalize; Observer.changingInstr(MI); @@ -5765,8 +5957,10 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) { MI.eraseFromParent(); return Legalized; } + Observer.changingInstr(MI); MI.setDesc(TII.get(TargetOpcode::G_CTPOP)); MI.getOperand(1).setReg(MIBTmp.getReg(0)); + Observer.changedInstr(MI); return Legalized; } case TargetOpcode::G_CTPOP: { @@ -5956,6 +6150,105 @@ LegalizerHelper::lowerFunnelShift(MachineInstr &MI) { return Result; } +LegalizerHelper::LegalizeResult LegalizerHelper::lowerEXT(MachineInstr &MI) { + auto [Dst, Src] = MI.getFirst2Regs(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + uint32_t DstTySize = DstTy.getSizeInBits(); + uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits(); + uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits(); + + if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) || + !isPowerOf2_32(SrcTyScalarSize)) + return UnableToLegalize; + + // The step between extend is too large, split it by creating an intermediate + // extend instruction + if (SrcTyScalarSize * 2 < DstTyScalarSize) { + LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2); + // If the destination type is illegal, split it into multiple statements + // zext x -> zext(merge(zext(unmerge), zext(unmerge))) + auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src}); + // Unmerge the vector + LLT EltTy = MidTy.changeElementCount( + MidTy.getElementCount().divideCoefficientBy(2)); + auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt); + + // ZExt the vectors + LLT ZExtResTy = DstTy.changeElementCount( + DstTy.getElementCount().divideCoefficientBy(2)); + auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy}, + {UnmergeSrc.getReg(0)}); + auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy}, + {UnmergeSrc.getReg(1)}); + + // Merge the ending vectors + MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2}); + + MI.eraseFromParent(); + return Legalized; + } + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerTRUNC(MachineInstr &MI) { + // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); + // Similar to how operand splitting is done in SelectiondDAG, we can handle + // %res(v8s8) = G_TRUNC %in(v8s32) by generating: + // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>) + // %lo16(<4 x s16>) = G_TRUNC %inlo + // %hi16(<4 x s16>) = G_TRUNC %inhi + // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16 + // %res(<8 x s8>) = G_TRUNC %in16 + + assert(MI.getOpcode() == TargetOpcode::G_TRUNC); + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + + if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) && + isPowerOf2_32(DstTy.getScalarSizeInBits()) && + isPowerOf2_32(SrcTy.getNumElements()) && + isPowerOf2_32(SrcTy.getScalarSizeInBits())) { + // Split input type. + LLT SplitSrcTy = SrcTy.changeElementCount( + SrcTy.getElementCount().divideCoefficientBy(2)); + + // First, split the source into two smaller vectors. + SmallVector<Register, 2> SplitSrcs; + extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs); + + // Truncate the splits into intermediate narrower elements. + LLT InterTy; + if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits()) + InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2); + else + InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits()); + for (unsigned I = 0; I < SplitSrcs.size(); ++I) { + SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0); + } + + // Combine the new truncates into one vector + auto Merge = MIRBuilder.buildMergeLikeInstr( + DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs); + + // Truncate the new vector to the final result type + if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits()) + MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0)); + else + MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0)); + + MI.eraseFromParent(); + + return Legalized; + } + return UnableToLegalize; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) { auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs(); @@ -6523,23 +6816,25 @@ LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) { // round(x) => // t = trunc(x); // d = fabs(x - t); - // o = copysign(1.0f, x); - // return t + (d >= 0.5 ? o : 0.0); + // o = copysign(d >= 0.5 ? 1.0 : 0.0, x); + // return t + o; auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags); auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags); auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags); - auto Zero = MIRBuilder.buildFConstant(Ty, 0.0); - auto One = MIRBuilder.buildFConstant(Ty, 1.0); + auto Half = MIRBuilder.buildFConstant(Ty, 0.5); - auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X); + auto Cmp = + MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags); - auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, - Flags); - auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags); + // Could emit G_UITOFP instead + auto One = MIRBuilder.buildFConstant(Ty, 1.0); + auto Zero = MIRBuilder.buildFConstant(Ty, 0.0); + auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero); + auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X); - MIRBuilder.buildFAdd(DstReg, T, Sel, Flags); + MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags); MI.eraseFromParent(); return Legalized; @@ -6688,8 +6983,8 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) { Align EltAlign; MachinePointerInfo PtrInfo; - auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()), - VecAlign, PtrInfo); + auto StackTemp = createStackTemporary( + TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo); MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign); // Get the pointer to the element, and be sure not to hit undefined behavior @@ -6727,26 +7022,9 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { LLT IdxTy = LLT::scalar(32); ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask(); - - if (DstTy.isScalar()) { - if (Src0Ty.isVector()) - return UnableToLegalize; - - // This is just a SELECT. - assert(Mask.size() == 1 && "Expected a single mask element"); - Register Val; - if (Mask[0] < 0 || Mask[0] > 1) - Val = MIRBuilder.buildUndef(DstTy).getReg(0); - else - Val = Mask[0] == 0 ? Src0Reg : Src1Reg; - MIRBuilder.buildCopy(DstReg, Val); - MI.eraseFromParent(); - return Legalized; - } - Register Undef; SmallVector<Register, 32> BuildVec; - LLT EltTy = DstTy.getElementType(); + LLT EltTy = DstTy.getScalarType(); for (int Idx : Mask) { if (Idx < 0) { @@ -6768,26 +7046,20 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { } } - MIRBuilder.buildBuildVector(DstReg, BuildVec); + if (DstTy.isScalar()) + MIRBuilder.buildCopy(DstReg, BuildVec[0]); + else + MIRBuilder.buildBuildVector(DstReg, BuildVec); MI.eraseFromParent(); return Legalized; } -LegalizerHelper::LegalizeResult -LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { - const auto &MF = *MI.getMF(); - const auto &TFI = *MF.getSubtarget().getFrameLowering(); - if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) - return UnableToLegalize; - - Register Dst = MI.getOperand(0).getReg(); - Register AllocSize = MI.getOperand(1).getReg(); - Align Alignment = assumeAligned(MI.getOperand(2).getImm()); - - LLT PtrTy = MRI.getType(Dst); +Register LegalizerHelper::getDynStackAllocTargetPtr(Register SPReg, + Register AllocSize, + Align Alignment, + LLT PtrTy) { LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); - Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg); SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp); @@ -6802,7 +7074,25 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst); } - SPTmp = MIRBuilder.buildCast(PtrTy, Alloc); + return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { + const auto &MF = *MI.getMF(); + const auto &TFI = *MF.getSubtarget().getFrameLowering(); + if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp) + return UnableToLegalize; + + Register Dst = MI.getOperand(0).getReg(); + Register AllocSize = MI.getOperand(1).getReg(); + Align Alignment = assumeAligned(MI.getOperand(2).getImm()); + + LLT PtrTy = MRI.getType(Dst); + Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); + Register SPTmp = + getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy); + MIRBuilder.buildCopy(SPReg, SPTmp); MIRBuilder.buildCopy(Dst, SPTmp); @@ -6811,6 +7101,28 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { } LegalizerHelper::LegalizeResult +LegalizerHelper::lowerStackSave(MachineInstr &MI) { + Register StackPtr = TLI.getStackPointerRegisterToSaveRestore(); + if (!StackPtr) + return UnableToLegalize; + + MIRBuilder.buildCopy(MI.getOperand(0), StackPtr); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerStackRestore(MachineInstr &MI) { + Register StackPtr = TLI.getStackPointerRegisterToSaveRestore(); + if (!StackPtr) + return UnableToLegalize; + + MIRBuilder.buildCopy(StackPtr, MI.getOperand(0)); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerExtract(MachineInstr &MI) { auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs(); unsigned Offset = MI.getOperand(2).getImm(); @@ -7577,6 +7889,56 @@ LegalizerHelper::lowerVectorReduction(MachineInstr &MI) { return UnableToLegalize; } +static Type *getTypeForLLT(LLT Ty, LLVMContext &C); + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerVAArg(MachineInstr &MI) { + MachineFunction &MF = *MI.getMF(); + const DataLayout &DL = MIRBuilder.getDataLayout(); + LLVMContext &Ctx = MF.getFunction().getContext(); + Register ListPtr = MI.getOperand(1).getReg(); + LLT PtrTy = MRI.getType(ListPtr); + + // LstPtr is a pointer to the head of the list. Get the address + // of the head of the list. + Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx)); + MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand( + MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment); + auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0); + + const Align A(MI.getOperand(2).getImm()); + LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits()); + if (A > TLI.getMinStackArgumentAlignment()) { + Register AlignAmt = + MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0); + auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt); + auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A)); + VAList = AndDst.getReg(0); + } + + // Increment the pointer, VAList, to the next vaarg + // The list should be bumped by the size of element in the current head of + // list. + Register Dst = MI.getOperand(0).getReg(); + LLT LLTTy = MRI.getType(Dst); + Type *Ty = getTypeForLLT(LLTTy, Ctx); + auto IncAmt = + MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty)); + auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt); + + // Store the increment VAList to the legalized pointer + MachineMemOperand *StoreMMO = MF.getMachineMemOperand( + MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment); + MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO); + // Load the actual argument out of the pointer VAList + Align EltAlignment = DL.getABITypeAlign(Ty); + MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand( + MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment); + MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO); + + MI.eraseFromParent(); + return Legalized; +} + static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { // On Darwin, -Os means optimize for size without hurting performance, so // only really optimize for size when -Oz (MinSize) is used. |
