diff options
Diffstat (limited to 'lib/CodeGen/GlobalISel/LegalizerHelper.cpp')
-rw-r--r-- | lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 2884 |
1 files changed, 2461 insertions, 423 deletions
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index b3fc94cdec60..f5cf7fc9bd9b 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1,9 +1,8 @@ //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -30,6 +29,39 @@ using namespace llvm; using namespace LegalizeActions; +/// Try to break down \p OrigTy into \p NarrowTy sized pieces. +/// +/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy, +/// with any leftover piece as type \p LeftoverTy +/// +/// Returns -1 in the first element of the pair if the breakdown is not +/// satisfiable. +static std::pair<int, int> +getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) { + assert(!LeftoverTy.isValid() && "this is an out argument"); + + unsigned Size = OrigTy.getSizeInBits(); + unsigned NarrowSize = NarrowTy.getSizeInBits(); + unsigned NumParts = Size / NarrowSize; + unsigned LeftoverSize = Size - NumParts * NarrowSize; + assert(Size > NarrowSize); + + if (LeftoverSize == 0) + return {NumParts, 0}; + + if (NarrowTy.isVector()) { + unsigned EltSize = OrigTy.getScalarSizeInBits(); + if (LeftoverSize % EltSize != 0) + return {-1, -1}; + LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); + } else { + LeftoverTy = LLT::scalar(LeftoverSize); + } + + int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits(); + return std::make_pair(NumParts, NumLeftover); +} + LegalizerHelper::LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &Builder) @@ -50,6 +82,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); + if (MI.getOpcode() == TargetOpcode::G_INTRINSIC || + MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS) + return LI.legalizeIntrinsic(MI, MRI, MIRBuilder) ? Legalized + : UnableToLegalize; auto Step = LI.getAction(MI, MRI); switch (Step.Action) { case Legal: @@ -70,6 +106,9 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { case FewerElements: LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n"); return fewerElementsVector(MI, Step.TypeIdx, Step.NewType); + case MoreElements: + LLVM_DEBUG(dbgs() << ".. Increase number of elements\n"); + return moreElementsVector(MI, Step.TypeIdx, Step.NewType); case Custom: LLVM_DEBUG(dbgs() << ".. Custom legalization\n"); return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized @@ -80,13 +119,103 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { } } -void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts, - SmallVectorImpl<unsigned> &VRegs) { +void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts, + SmallVectorImpl<Register> &VRegs) { for (int i = 0; i < NumParts; ++i) VRegs.push_back(MRI.createGenericVirtualRegister(Ty)); MIRBuilder.buildUnmerge(VRegs, Reg); } +bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, + LLT MainTy, LLT &LeftoverTy, + SmallVectorImpl<Register> &VRegs, + SmallVectorImpl<Register> &LeftoverRegs) { + assert(!LeftoverTy.isValid() && "this is an out argument"); + + unsigned RegSize = RegTy.getSizeInBits(); + unsigned MainSize = MainTy.getSizeInBits(); + unsigned NumParts = RegSize / MainSize; + unsigned LeftoverSize = RegSize - NumParts * MainSize; + + // Use an unmerge when possible. + if (LeftoverSize == 0) { + for (unsigned I = 0; I < NumParts; ++I) + VRegs.push_back(MRI.createGenericVirtualRegister(MainTy)); + MIRBuilder.buildUnmerge(VRegs, Reg); + return true; + } + + if (MainTy.isVector()) { + unsigned EltSize = MainTy.getScalarSizeInBits(); + if (LeftoverSize % EltSize != 0) + return false; + LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize); + } else { + LeftoverTy = LLT::scalar(LeftoverSize); + } + + // For irregular sizes, extract the individual parts. + for (unsigned I = 0; I != NumParts; ++I) { + Register NewReg = MRI.createGenericVirtualRegister(MainTy); + VRegs.push_back(NewReg); + MIRBuilder.buildExtract(NewReg, Reg, MainSize * I); + } + + for (unsigned Offset = MainSize * NumParts; Offset < RegSize; + Offset += LeftoverSize) { + Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy); + LeftoverRegs.push_back(NewReg); + MIRBuilder.buildExtract(NewReg, Reg, Offset); + } + + return true; +} + +void LegalizerHelper::insertParts(Register DstReg, + LLT ResultTy, LLT PartTy, + ArrayRef<Register> PartRegs, + LLT LeftoverTy, + ArrayRef<Register> LeftoverRegs) { + if (!LeftoverTy.isValid()) { + assert(LeftoverRegs.empty()); + + if (!ResultTy.isVector()) { + MIRBuilder.buildMerge(DstReg, PartRegs); + return; + } + + if (PartTy.isVector()) + MIRBuilder.buildConcatVectors(DstReg, PartRegs); + else + MIRBuilder.buildBuildVector(DstReg, PartRegs); + return; + } + + unsigned PartSize = PartTy.getSizeInBits(); + unsigned LeftoverPartSize = LeftoverTy.getSizeInBits(); + + Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy); + MIRBuilder.buildUndef(CurResultReg); + + unsigned Offset = 0; + for (Register PartReg : PartRegs) { + Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy); + MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset); + CurResultReg = NewResultReg; + Offset += PartSize; + } + + for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) { + // Use the original output register for the final insert to avoid a copy. + Register NewResultReg = (I + 1 == E) ? + DstReg : MRI.createGenericVirtualRegister(ResultTy); + + MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset); + CurResultReg = NewResultReg; + Offset += LeftoverPartSize; + } +} + static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { switch (Opcode) { case TargetOpcode::G_SDIV: @@ -116,6 +245,12 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { case TargetOpcode::G_FDIV: assert((Size == 32 || Size == 64) && "Unsupported size"); return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32; + case TargetOpcode::G_FEXP: + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32; + case TargetOpcode::G_FEXP2: + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32; case TargetOpcode::G_FREM: return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32; case TargetOpcode::G_FPOW: @@ -123,6 +258,32 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { case TargetOpcode::G_FMA: assert((Size == 32 || Size == 64) && "Unsupported size"); return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32; + case TargetOpcode::G_FSIN: + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + return Size == 128 ? RTLIB::SIN_F128 + : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32; + case TargetOpcode::G_FCOS: + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + return Size == 128 ? RTLIB::COS_F128 + : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32; + case TargetOpcode::G_FLOG10: + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + return Size == 128 ? RTLIB::LOG10_F128 + : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32; + case TargetOpcode::G_FLOG: + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + return Size == 128 ? RTLIB::LOG_F128 + : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32; + case TargetOpcode::G_FLOG2: + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + return Size == 128 ? RTLIB::LOG2_F128 + : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32; + case TargetOpcode::G_FCEIL: + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32; + case TargetOpcode::G_FFLOOR: + assert((Size == 32 || Size == 64) && "Unsupported size"); + return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32; } llvm_unreachable("Unknown libcall function"); } @@ -214,7 +375,20 @@ LegalizerHelper::libcall(MachineInstr &MI) { case TargetOpcode::G_FDIV: case TargetOpcode::G_FMA: case TargetOpcode::G_FPOW: - case TargetOpcode::G_FREM: { + case TargetOpcode::G_FREM: + case TargetOpcode::G_FCOS: + case TargetOpcode::G_FSIN: + case TargetOpcode::G_FLOG10: + case TargetOpcode::G_FLOG: + case TargetOpcode::G_FLOG2: + case TargetOpcode::G_FEXP: + case TargetOpcode::G_FEXP2: + case TargetOpcode::G_FCEIL: + case TargetOpcode::G_FFLOOR: { + if (Size > 64) { + LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n"); + return UnableToLegalize; + } Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx); auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy); if (Status != Legalized) @@ -250,10 +424,11 @@ LegalizerHelper::libcall(MachineInstr &MI) { // FIXME: Support other types unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - if (ToSize != 32 || (FromSize != 32 && FromSize != 64)) + if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64)) return UnableToLegalize; LegalizeResult Status = conversionLibcall( - MI, MIRBuilder, Type::getInt32Ty(Ctx), + MI, MIRBuilder, + ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx), FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx)); if (Status != Legalized) return Status; @@ -264,12 +439,12 @@ LegalizerHelper::libcall(MachineInstr &MI) { // FIXME: Support other types unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); - if (FromSize != 32 || (ToSize != 32 && ToSize != 64)) + if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64)) return UnableToLegalize; LegalizeResult Status = conversionLibcall( MI, MIRBuilder, ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx), - Type::getInt32Ty(Ctx)); + FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx)); if (Status != Legalized) return Status; break; @@ -283,10 +458,6 @@ LegalizerHelper::libcall(MachineInstr &MI) { LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { - // FIXME: Don't know how to handle secondary types yet. - if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT) - return UnableToLegalize; - MIRBuilder.setInstr(MI); uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); @@ -302,12 +473,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return UnableToLegalize; int NumParts = SizeOp0 / NarrowSize; - SmallVector<unsigned, 2> DstRegs; + SmallVector<Register, 2> DstRegs; for (int i = 0; i < NumParts; ++i) DstRegs.push_back( MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg()); - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); if(MRI.getType(DstReg).isVector()) MIRBuilder.buildBuildVector(DstReg, DstRegs); else @@ -315,6 +486,38 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_CONSTANT: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + const APInt &Val = MI.getOperand(1).getCImm()->getValue(); + unsigned TotalSize = Ty.getSizeInBits(); + unsigned NarrowSize = NarrowTy.getSizeInBits(); + int NumParts = TotalSize / NarrowSize; + + SmallVector<Register, 4> PartRegs; + for (int I = 0; I != NumParts; ++I) { + unsigned Offset = I * NarrowSize; + auto K = MIRBuilder.buildConstant(NarrowTy, + Val.lshr(Offset).trunc(NarrowSize)); + PartRegs.push_back(K.getReg(0)); + } + + LLT LeftoverTy; + unsigned LeftoverBits = TotalSize - NumParts * NarrowSize; + SmallVector<Register, 1> LeftoverRegs; + if (LeftoverBits != 0) { + LeftoverTy = LLT::scalar(LeftoverBits); + auto K = MIRBuilder.buildConstant( + LeftoverTy, + Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits)); + LeftoverRegs.push_back(K.getReg(0)); + } + + insertParts(MI.getOperand(0).getReg(), + Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs); + + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_ADD: { // FIXME: add support for when SizeOp0 isn't an exact multiple of // NarrowSize. @@ -323,16 +526,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, // Expand in terms of carry-setting/consuming G_ADDE instructions. int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); - SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; + SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); - unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); + Register CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); MIRBuilder.buildConstant(CarryIn, 0); for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); - unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); + Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); + Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], Src2Regs[i], CarryIn); @@ -340,67 +543,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, DstRegs.push_back(DstReg); CarryIn = CarryOut; } - unsigned DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) - MIRBuilder.buildBuildVector(DstReg, DstRegs); - else - MIRBuilder.buildMerge(DstReg, DstRegs); - MI.eraseFromParent(); - return Legalized; - } - case TargetOpcode::G_EXTRACT: { - if (TypeIdx != 1) - return UnableToLegalize; - - int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); - // FIXME: add support for when SizeOp1 isn't an exact multiple of - // NarrowSize. - if (SizeOp1 % NarrowSize != 0) - return UnableToLegalize; - int NumParts = SizeOp1 / NarrowSize; - - SmallVector<unsigned, 2> SrcRegs, DstRegs; - SmallVector<uint64_t, 2> Indexes; - extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); - - unsigned OpReg = MI.getOperand(0).getReg(); - uint64_t OpStart = MI.getOperand(2).getImm(); - uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); - for (int i = 0; i < NumParts; ++i) { - unsigned SrcStart = i * NarrowSize; - - if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { - // No part of the extract uses this subregister, ignore it. - continue; - } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { - // The entire subregister is extracted, forward the value. - DstRegs.push_back(SrcRegs[i]); - continue; - } - - // OpSegStart is where this destination segment would start in OpReg if it - // extended infinitely in both directions. - int64_t ExtractOffset; - uint64_t SegSize; - if (OpStart < SrcStart) { - ExtractOffset = 0; - SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); - } else { - ExtractOffset = OpStart - SrcStart; - SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); - } - - unsigned SegReg = SrcRegs[i]; - if (ExtractOffset != 0 || SegSize != NarrowSize) { - // A genuine extract is needed. - SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); - MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); - } - - DstRegs.push_back(SegReg); - } - - unsigned DstReg = MI.getOperand(0).getReg(); + Register DstReg = MI.getOperand(0).getReg(); if(MRI.getType(DstReg).isVector()) MIRBuilder.buildBuildVector(DstReg, DstRegs); else @@ -408,178 +551,117 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } - case TargetOpcode::G_INSERT: { + case TargetOpcode::G_SUB: { // FIXME: add support for when SizeOp0 isn't an exact multiple of // NarrowSize. if (SizeOp0 % NarrowSize != 0) return UnableToLegalize; - int NumParts = SizeOp0 / NarrowSize; - - SmallVector<unsigned, 2> SrcRegs, DstRegs; - SmallVector<uint64_t, 2> Indexes; - extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); + int NumParts = SizeOp0 / NarrowTy.getSizeInBits(); - unsigned OpReg = MI.getOperand(2).getReg(); - uint64_t OpStart = MI.getOperand(3).getImm(); - uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); - for (int i = 0; i < NumParts; ++i) { - unsigned DstStart = i * NarrowSize; - - if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { - // No part of the insert affects this subregister, forward the original. - DstRegs.push_back(SrcRegs[i]); - continue; - } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { - // The entire subregister is defined by this insert, forward the new - // value. - DstRegs.push_back(OpReg); - continue; - } + SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; + extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); + extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); - // OpSegStart is where this destination segment would start in OpReg if it - // extended infinitely in both directions. - int64_t ExtractOffset, InsertOffset; - uint64_t SegSize; - if (OpStart < DstStart) { - InsertOffset = 0; - ExtractOffset = DstStart - OpStart; - SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); - } else { - InsertOffset = OpStart - DstStart; - ExtractOffset = 0; - SegSize = - std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); - } + Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); + Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); + MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut}, + {Src1Regs[0], Src2Regs[0]}); + DstRegs.push_back(DstReg); + Register BorrowIn = BorrowOut; + for (int i = 1; i < NumParts; ++i) { + DstReg = MRI.createGenericVirtualRegister(NarrowTy); + BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); - unsigned SegReg = OpReg; - if (ExtractOffset != 0 || SegSize != OpSize) { - // A genuine extract is needed. - SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); - MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); - } + MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut}, + {Src1Regs[i], Src2Regs[i], BorrowIn}); - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); DstRegs.push_back(DstReg); + BorrowIn = BorrowOut; } - - assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); - unsigned DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) - MIRBuilder.buildBuildVector(DstReg, DstRegs); - else - MIRBuilder.buildMerge(DstReg, DstRegs); + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_MUL: + case TargetOpcode::G_UMULH: + return narrowScalarMul(MI, NarrowTy); + case TargetOpcode::G_EXTRACT: + return narrowScalarExtract(MI, TypeIdx, NarrowTy); + case TargetOpcode::G_INSERT: + return narrowScalarInsert(MI, TypeIdx, NarrowTy); case TargetOpcode::G_LOAD: { - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) - return UnableToLegalize; - const auto &MMO = **MI.memoperands_begin(); - // This implementation doesn't work for atomics. Give up instead of doing - // something invalid. - if (MMO.getOrdering() != AtomicOrdering::NotAtomic || - MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + if (DstTy.isVector()) return UnableToLegalize; - int NumParts = SizeOp0 / NarrowSize; - LLT OffsetTy = LLT::scalar( - MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); - - SmallVector<unsigned, 2> DstRegs; - for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); - unsigned SrcReg = 0; - unsigned Adjustment = i * NarrowSize / 8; - unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment); - - MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( - MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), - NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(), - MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering()); - - MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy, - Adjustment); + if (8 * MMO.getSize() != DstTy.getSizeInBits()) { + Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); + auto &MMO = **MI.memoperands_begin(); + MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO); + MIRBuilder.buildAnyExt(DstReg, TmpReg); + MI.eraseFromParent(); + return Legalized; + } - MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO); + return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); + } + case TargetOpcode::G_ZEXTLOAD: + case TargetOpcode::G_SEXTLOAD: { + bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD; + Register DstReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); - DstRegs.push_back(DstReg); + Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); + auto &MMO = **MI.memoperands_begin(); + if (MMO.getSizeInBits() == NarrowSize) { + MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); + } else { + unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD + : TargetOpcode::G_SEXTLOAD; + MIRBuilder.buildInstr(ExtLoad) + .addDef(TmpReg) + .addUse(PtrReg) + .addMemOperand(&MMO); } - unsigned DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) - MIRBuilder.buildBuildVector(DstReg, DstRegs); + + if (ZExt) + MIRBuilder.buildZExt(DstReg, TmpReg); else - MIRBuilder.buildMerge(DstReg, DstRegs); + MIRBuilder.buildSExt(DstReg, TmpReg); + MI.eraseFromParent(); return Legalized; } case TargetOpcode::G_STORE: { - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) - return UnableToLegalize; - const auto &MMO = **MI.memoperands_begin(); - // This implementation doesn't work for atomics. Give up instead of doing - // something invalid. - if (MMO.getOrdering() != AtomicOrdering::NotAtomic || - MMO.getFailureOrdering() != AtomicOrdering::NotAtomic) + + Register SrcReg = MI.getOperand(0).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + if (SrcTy.isVector()) return UnableToLegalize; int NumParts = SizeOp0 / NarrowSize; - LLT OffsetTy = LLT::scalar( - MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits()); - - SmallVector<unsigned, 2> SrcRegs; - extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs); - - for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = 0; - unsigned Adjustment = i * NarrowSize / 8; - unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment); - - MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand( - MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(), - NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(), - MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering()); - - MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy, - Adjustment); - - MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO); - } - MI.eraseFromParent(); - return Legalized; - } - case TargetOpcode::G_CONSTANT: { - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) + unsigned HandledSize = NumParts * NarrowTy.getSizeInBits(); + unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize; + if (SrcTy.isVector() && LeftoverBits != 0) return UnableToLegalize; - int NumParts = SizeOp0 / NarrowSize; - const APInt &Cst = MI.getOperand(1).getCImm()->getValue(); - LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); - SmallVector<unsigned, 2> DstRegs; - for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); - ConstantInt *CI = - ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize)); - MIRBuilder.buildConstant(DstReg, *CI); - DstRegs.push_back(DstReg); + if (8 * MMO.getSize() != SrcTy.getSizeInBits()) { + Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); + auto &MMO = **MI.memoperands_begin(); + MIRBuilder.buildTrunc(TmpReg, SrcReg); + MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO); + MI.eraseFromParent(); + return Legalized; } - unsigned DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) - MIRBuilder.buildBuildVector(DstReg, DstRegs); - else - MIRBuilder.buildMerge(DstReg, DstRegs); - MI.eraseFromParent(); - return Legalized; + + return reduceLoadStoreWidth(MI, 0, NarrowTy); } + case TargetOpcode::G_SELECT: + return narrowScalarSelect(MI, TypeIdx, NarrowTy); case TargetOpcode::G_AND: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: { @@ -592,44 +674,112 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, // ... // AN = BinOp<Ty/N> BN, CN // A = G_MERGE_VALUES A1, ..., AN + return narrowScalarBasic(MI, TypeIdx, NarrowTy); + } + case TargetOpcode::G_SHL: + case TargetOpcode::G_LSHR: + case TargetOpcode::G_ASHR: + return narrowScalarShift(MI, TypeIdx, NarrowTy); + case TargetOpcode::G_CTLZ: + case TargetOpcode::G_CTLZ_ZERO_UNDEF: + case TargetOpcode::G_CTTZ: + case TargetOpcode::G_CTTZ_ZERO_UNDEF: + case TargetOpcode::G_CTPOP: + if (TypeIdx != 0) + return UnableToLegalize; // TODO - // FIXME: add support for when SizeOp0 isn't an exact multiple of - // NarrowSize. - if (SizeOp0 % NarrowSize != 0) + Observer.changingInstr(MI); + narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_INTTOPTR: + if (TypeIdx != 1) return UnableToLegalize; - int NumParts = SizeOp0 / NarrowSize; - // List the registers where the destination will be scattered. - SmallVector<unsigned, 2> DstRegs; - // List the registers where the first argument will be split. - SmallVector<unsigned, 2> SrcsReg1; - // List the registers where the second argument will be split. - SmallVector<unsigned, 2> SrcsReg2; - // Create all the temporary registers. - for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); - unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy); - unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy); + Observer.changingInstr(MI); + narrowScalarSrc(MI, NarrowTy, 1); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_PTRTOINT: + if (TypeIdx != 0) + return UnableToLegalize; - DstRegs.push_back(DstReg); - SrcsReg1.push_back(SrcReg1); - SrcsReg2.push_back(SrcReg2); + Observer.changingInstr(MI); + narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_PHI: { + unsigned NumParts = SizeOp0 / NarrowSize; + SmallVector<Register, 2> DstRegs; + SmallVector<SmallVector<Register, 2>, 2> SrcRegs; + DstRegs.resize(NumParts); + SrcRegs.resize(MI.getNumOperands() / 2); + Observer.changingInstr(MI); + for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { + MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB(); + MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); + extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts, + SrcRegs[i / 2]); } - // Explode the big arguments into smaller chunks. - MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg()); - MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg()); + MachineBasicBlock &MBB = *MI.getParent(); + MIRBuilder.setInsertPt(MBB, MI); + for (unsigned i = 0; i < NumParts; ++i) { + DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy); + MachineInstrBuilder MIB = + MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]); + for (unsigned j = 1; j < MI.getNumOperands(); j += 2) + MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1)); + } + MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); + Observer.changedInstr(MI); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_EXTRACT_VECTOR_ELT: + case TargetOpcode::G_INSERT_VECTOR_ELT: { + if (TypeIdx != 2) + return UnableToLegalize; - // Do the operation on each small part. - for (int i = 0; i < NumParts; ++i) - MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]}, - {SrcsReg1[i], SrcsReg2[i]}); + int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3; + Observer.changingInstr(MI); + narrowScalarSrc(MI, NarrowTy, OpIdx); + Observer.changedInstr(MI); + return Legalized; + } + case TargetOpcode::G_ICMP: { + uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); + if (NarrowSize * 2 != SrcSize) + return UnableToLegalize; - // Gather the destination registers into the final destination. - unsigned DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) - MIRBuilder.buildBuildVector(DstReg, DstRegs); - else - MIRBuilder.buildMerge(DstReg, DstRegs); + Observer.changingInstr(MI); + Register LHSL = MRI.createGenericVirtualRegister(NarrowTy); + Register LHSH = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg()); + + Register RHSL = MRI.createGenericVirtualRegister(NarrowTy); + Register RHSH = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg()); + + CmpInst::Predicate Pred = + static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); + + if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { + MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL); + MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH); + MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH); + MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0); + MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero); + } else { + const LLT s1 = LLT::scalar(1); + MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, s1, LHSH, RHSH); + MachineInstrBuilder CmpHEQ = + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, s1, LHSH, RHSH); + MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( + ICmpInst::getUnsignedPredicate(Pred), s1, LHSL, RHSL); + MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH); + } + Observer.changedInstr(MI); MI.eraseFromParent(); return Legalized; } @@ -643,15 +793,322 @@ void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy, MO.setReg(ExtB->getOperand(0).getReg()); } +void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, + unsigned OpIdx) { + MachineOperand &MO = MI.getOperand(OpIdx); + auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy}, + {MO.getReg()}); + MO.setReg(ExtB->getOperand(0).getReg()); +} + void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned TruncOpcode) { MachineOperand &MO = MI.getOperand(OpIdx); - unsigned DstExt = MRI.createGenericVirtualRegister(WideTy); + Register DstExt = MRI.createGenericVirtualRegister(WideTy); MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt}); MO.setReg(DstExt); } +void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy, + unsigned OpIdx, unsigned ExtOpcode) { + MachineOperand &MO = MI.getOperand(OpIdx); + Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc}); + MO.setReg(DstTrunc); +} + +void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, + unsigned OpIdx) { + MachineOperand &MO = MI.getOperand(OpIdx); + Register DstExt = MRI.createGenericVirtualRegister(WideTy); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + MIRBuilder.buildExtract(MO.getReg(), DstExt, 0); + MO.setReg(DstExt); +} + +void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, + unsigned OpIdx) { + MachineOperand &MO = MI.getOperand(OpIdx); + + LLT OldTy = MRI.getType(MO.getReg()); + unsigned OldElts = OldTy.getNumElements(); + unsigned NewElts = MoreTy.getNumElements(); + + unsigned NumParts = NewElts / OldElts; + + // Use concat_vectors if the result is a multiple of the number of elements. + if (NumParts * OldElts == NewElts) { + SmallVector<Register, 8> Parts; + Parts.push_back(MO.getReg()); + + Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0); + for (unsigned I = 1; I != NumParts; ++I) + Parts.push_back(ImpDef); + + auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts); + MO.setReg(Concat.getReg(0)); + return; + } + + Register MoreReg = MRI.createGenericVirtualRegister(MoreTy); + Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0); + MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0); + MO.setReg(MoreReg); +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + if (DstTy.isVector()) + return UnableToLegalize; + + Register Src1 = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(Src1); + const int DstSize = DstTy.getSizeInBits(); + const int SrcSize = SrcTy.getSizeInBits(); + const int WideSize = WideTy.getSizeInBits(); + const int NumMerge = (DstSize + WideSize - 1) / WideSize; + + unsigned NumOps = MI.getNumOperands(); + unsigned NumSrc = MI.getNumOperands() - 1; + unsigned PartSize = DstTy.getSizeInBits() / NumSrc; + + if (WideSize >= DstSize) { + // Directly pack the bits in the target type. + Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0); + + for (unsigned I = 2; I != NumOps; ++I) { + const unsigned Offset = (I - 1) * PartSize; + + Register SrcReg = MI.getOperand(I).getReg(); + assert(MRI.getType(SrcReg) == LLT::scalar(PartSize)); + + auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); + + Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg : + MRI.createGenericVirtualRegister(WideTy); + + auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset); + auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt); + MIRBuilder.buildOr(NextResult, ResultReg, Shl); + ResultReg = NextResult; + } + + if (WideSize > DstSize) + MIRBuilder.buildTrunc(DstReg, ResultReg); + + MI.eraseFromParent(); + return Legalized; + } + + // Unmerge the original values to the GCD type, and recombine to the next + // multiple greater than the original type. + // + // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6 + // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0 + // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1 + // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2 + // %10:_(s6) = G_MERGE_VALUES %4, %5, %6 + // %11:_(s6) = G_MERGE_VALUES %7, %8, %9 + // %12:_(s12) = G_MERGE_VALUES %10, %11 + // + // Padding with undef if necessary: + // + // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6 + // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0 + // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1 + // %7:_(s2) = G_IMPLICIT_DEF + // %8:_(s6) = G_MERGE_VALUES %3, %4, %5 + // %9:_(s6) = G_MERGE_VALUES %6, %7, %7 + // %10:_(s12) = G_MERGE_VALUES %8, %9 + + const int GCD = greatestCommonDivisor(SrcSize, WideSize); + LLT GCDTy = LLT::scalar(GCD); + + SmallVector<Register, 8> Parts; + SmallVector<Register, 8> NewMergeRegs; + SmallVector<Register, 8> Unmerges; + LLT WideDstTy = LLT::scalar(NumMerge * WideSize); + + // Decompose the original operands if they don't evenly divide. + for (int I = 1, E = MI.getNumOperands(); I != E; ++I) { + Register SrcReg = MI.getOperand(I).getReg(); + if (GCD == SrcSize) { + Unmerges.push_back(SrcReg); + } else { + auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); + for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J) + Unmerges.push_back(Unmerge.getReg(J)); + } + } + + // Pad with undef to the next size that is a multiple of the requested size. + if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) { + Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0); + for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I) + Unmerges.push_back(UndefReg); + } + + const int PartsPerGCD = WideSize / GCD; + + // Build merges of each piece. + ArrayRef<Register> Slicer(Unmerges); + for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) { + auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD)); + NewMergeRegs.push_back(Merge.getReg(0)); + } + + // A truncate may be necessary if the requested type doesn't evenly divide the + // original result type. + if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) { + MIRBuilder.buildMerge(DstReg, NewMergeRegs); + } else { + auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs); + MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0)); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + unsigned NumDst = MI.getNumOperands() - 1; + Register SrcReg = MI.getOperand(NumDst).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + if (!SrcTy.isScalar()) + return UnableToLegalize; + + Register Dst0Reg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst0Reg); + if (!DstTy.isScalar()) + return UnableToLegalize; + + unsigned NewSrcSize = NumDst * WideTy.getSizeInBits(); + LLT NewSrcTy = LLT::scalar(NewSrcSize); + unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits(); + + auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg); + + for (unsigned I = 1; I != NumDst; ++I) { + auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I); + auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt); + WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl); + } + + Observer.changingInstr(MI); + + MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg()); + for (unsigned I = 0; I != NumDst; ++I) + widenScalarDst(MI, WideTy, I); + + Observer.changedInstr(MI); + + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + LLT DstTy = MRI.getType(DstReg); + unsigned Offset = MI.getOperand(2).getImm(); + + if (TypeIdx == 0) { + if (SrcTy.isVector() || DstTy.isVector()) + return UnableToLegalize; + + SrcOp Src(SrcReg); + if (SrcTy.isPointer()) { + // Extracts from pointers can be handled only if they are really just + // simple integers. + const DataLayout &DL = MIRBuilder.getDataLayout(); + if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) + return UnableToLegalize; + + LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits()); + Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src); + SrcTy = SrcAsIntTy; + } + + if (DstTy.isPointer()) + return UnableToLegalize; + + if (Offset == 0) { + // Avoid a shift in the degenerate case. + MIRBuilder.buildTrunc(DstReg, + MIRBuilder.buildAnyExtOrTrunc(WideTy, Src)); + MI.eraseFromParent(); + return Legalized; + } + + // Do a shift in the source type. + LLT ShiftTy = SrcTy; + if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) { + Src = MIRBuilder.buildAnyExt(WideTy, Src); + ShiftTy = WideTy; + } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) + return UnableToLegalize; + + auto LShr = MIRBuilder.buildLShr( + ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset)); + MIRBuilder.buildTrunc(DstReg, LShr); + MI.eraseFromParent(); + return Legalized; + } + + if (SrcTy.isScalar()) { + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + Observer.changedInstr(MI); + return Legalized; + } + + if (!SrcTy.isVector()) + return UnableToLegalize; + + if (DstTy != SrcTy.getElementType()) + return UnableToLegalize; + + if (Offset % SrcTy.getScalarSizeInBits() != 0) + return UnableToLegalize; + + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + + MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) * + Offset); + widenScalarDst(MI, WideTy.getScalarType(), 0); + Observer.changedInstr(MI); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, + LLT WideTy) { + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy); + Observer.changedInstr(MI); + return Legalized; +} + LegalizerHelper::LegalizeResult LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { MIRBuilder.setInstr(MI); @@ -659,6 +1116,14 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { switch (MI.getOpcode()) { default: return UnableToLegalize; + case TargetOpcode::G_EXTRACT: + return widenScalarExtract(MI, TypeIdx, WideTy); + case TargetOpcode::G_INSERT: + return widenScalarInsert(MI, TypeIdx, WideTy); + case TargetOpcode::G_MERGE_VALUES: + return widenScalarMergeValues(MI, TypeIdx, WideTy); + case TargetOpcode::G_UNMERGE_VALUES: + return widenScalarUnmergeValues(MI, TypeIdx, WideTy); case TargetOpcode::G_UADDO: case TargetOpcode::G_USUBO: { if (TypeIdx == 1) @@ -690,19 +1155,28 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_CTLZ: case TargetOpcode::G_CTLZ_ZERO_UNDEF: case TargetOpcode::G_CTPOP: { + if (TypeIdx == 0) { + Observer.changingInstr(MI); + widenScalarDst(MI, WideTy, 0); + Observer.changedInstr(MI); + return Legalized; + } + + Register SrcReg = MI.getOperand(1).getReg(); + // First ZEXT the input. - auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg()); - LLT CurTy = MRI.getType(MI.getOperand(0).getReg()); + auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg); + LLT CurTy = MRI.getType(SrcReg); if (MI.getOpcode() == TargetOpcode::G_CTTZ) { // The count is the same in the larger type except if the original // value was zero. This can be handled by setting the bit just off // the top of the original type. auto TopBit = APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); - MIBSrc = MIRBuilder.buildInstr( - TargetOpcode::G_OR, {WideTy}, - {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())}); + MIBSrc = MIRBuilder.buildOr( + WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit)); } + // Perform the operation at the larger size. auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); // This is already the correct result for CTPOP and CTTZs @@ -714,22 +1188,43 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { TargetOpcode::G_SUB, {WideTy}, {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)}); } - auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); - // Make the original instruction a trunc now, and update its source. + + MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_BSWAP: { Observer.changingInstr(MI); - MI.setDesc(TII.get(TargetOpcode::G_TRUNC)); - MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg()); + Register DstReg = MI.getOperand(0).getReg(); + + Register ShrReg = MRI.createGenericVirtualRegister(WideTy); + Register DstExt = MRI.createGenericVirtualRegister(WideTy); + Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + + MI.getOperand(0).setReg(DstExt); + + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + + LLT Ty = MRI.getType(DstReg); + unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); + MIRBuilder.buildConstant(ShiftAmtReg, DiffBits); + MIRBuilder.buildInstr(TargetOpcode::G_LSHR) + .addDef(ShrReg) + .addUse(DstExt) + .addUse(ShiftAmtReg); + + MIRBuilder.buildTrunc(DstReg, ShrReg); Observer.changedInstr(MI); return Legalized; } - case TargetOpcode::G_ADD: case TargetOpcode::G_AND: case TargetOpcode::G_MUL: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: case TargetOpcode::G_SUB: - // Perform operation at larger width (any extension is fine here, high bits + // Perform operation at larger width (any extension is fines here, high bits // don't affect the result) and then truncate the result back to the // original type. Observer.changingInstr(MI); @@ -741,16 +1236,24 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_SHL: Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); - // The "number of bits to shift" operand must preserve its value as an - // unsigned integer: - widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); - widenScalarDst(MI, WideTy); + + if (TypeIdx == 0) { + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy); + } else { + assert(TypeIdx == 1); + // The "number of bits to shift" operand must preserve its value as an + // unsigned integer: + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); + } + Observer.changedInstr(MI); return Legalized; case TargetOpcode::G_SDIV: case TargetOpcode::G_SREM: + case TargetOpcode::G_SMIN: + case TargetOpcode::G_SMAX: Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); @@ -759,18 +1262,28 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return Legalized; case TargetOpcode::G_ASHR: + case TargetOpcode::G_LSHR: Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT); - // The "number of bits to shift" operand must preserve its value as an - // unsigned integer: - widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); - widenScalarDst(MI, WideTy); + + if (TypeIdx == 0) { + unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ? + TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; + + widenScalarSrc(MI, WideTy, 1, CvtOp); + widenScalarDst(MI, WideTy); + } else { + assert(TypeIdx == 1); + // The "number of bits to shift" operand must preserve its value as an + // unsigned integer: + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); + } + Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_UDIV: case TargetOpcode::G_UREM: - case TargetOpcode::G_LSHR: + case TargetOpcode::G_UMIN: + case TargetOpcode::G_UMAX: Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT); @@ -788,8 +1301,9 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT); widenScalarDst(MI, WideTy); } else { + bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector(); // Explicit extension is required here since high bits affect the result. - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); + widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false)); } Observer.changedInstr(MI); return Legalized; @@ -819,23 +1333,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_INSERT: - if (TypeIdx != 0) - return UnableToLegalize; - Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); - widenScalarDst(MI, WideTy); - Observer.changedInstr(MI); - return Legalized; - case TargetOpcode::G_LOAD: - // For some types like i24, we might try to widen to i32. To properly handle - // this we should be using a dedicated extending load, until then avoid - // trying to legalize. - if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) != - WideTy.getSizeInBits()) - return UnableToLegalize; - LLVM_FALLTHROUGH; case TargetOpcode::G_SEXTLOAD: case TargetOpcode::G_ZEXTLOAD: Observer.changingInstr(MI); @@ -844,12 +1342,19 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { return Legalized; case TargetOpcode::G_STORE: { - if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) || - WideTy != LLT::scalar(8)) + if (TypeIdx != 0) + return UnableToLegalize; + + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + if (!isPowerOf2_32(Ty.getSizeInBits())) return UnableToLegalize; Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT); + + unsigned ExtType = Ty.getScalarSizeInBits() == 1 ? + TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT; + widenScalarSrc(MI, WideTy, 0, ExtType); + Observer.changedInstr(MI); return Legalized; } @@ -871,14 +1376,19 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { bool LosesInfo; switch (WideTy.getSizeInBits()) { case 32: - Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo); + Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, + &LosesInfo); break; case 64: - Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo); + Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, + &LosesInfo); break; default: - llvm_unreachable("Unhandled fp widen type"); + return UnableToLegalize; } + + assert(!LosesInfo && "extend should always be lossless"); + Observer.changingInstr(MI); SrcMO.setFPImm(ConstantFP::get(Ctx, Val)); @@ -894,7 +1404,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { } case TargetOpcode::G_BRCOND: Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT); + widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false)); Observer.changedInstr(MI); return Legalized; @@ -947,23 +1457,103 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } - case TargetOpcode::G_EXTRACT_VECTOR_ELT: + case TargetOpcode::G_EXTRACT_VECTOR_ELT: { + if (TypeIdx == 0) { + Register VecReg = MI.getOperand(1).getReg(); + LLT VecTy = MRI.getType(VecReg); + Observer.changingInstr(MI); + + widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(), + WideTy.getSizeInBits()), + 1, TargetOpcode::G_SEXT); + + widenScalarDst(MI, WideTy, 0); + Observer.changedInstr(MI); + return Legalized; + } + if (TypeIdx != 2) return UnableToLegalize; Observer.changingInstr(MI); widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT); Observer.changedInstr(MI); return Legalized; - + } + case TargetOpcode::G_FADD: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_FMA: + case TargetOpcode::G_FNEG: + case TargetOpcode::G_FABS: + case TargetOpcode::G_FCANONICALIZE: + case TargetOpcode::G_FMINNUM: + case TargetOpcode::G_FMAXNUM: + case TargetOpcode::G_FMINNUM_IEEE: + case TargetOpcode::G_FMAXNUM_IEEE: + case TargetOpcode::G_FMINIMUM: + case TargetOpcode::G_FMAXIMUM: + case TargetOpcode::G_FDIV: + case TargetOpcode::G_FREM: case TargetOpcode::G_FCEIL: + case TargetOpcode::G_FFLOOR: + case TargetOpcode::G_FCOS: + case TargetOpcode::G_FSIN: + case TargetOpcode::G_FLOG10: + case TargetOpcode::G_FLOG: + case TargetOpcode::G_FLOG2: + case TargetOpcode::G_FRINT: + case TargetOpcode::G_FNEARBYINT: + case TargetOpcode::G_FSQRT: + case TargetOpcode::G_FEXP: + case TargetOpcode::G_FEXP2: + case TargetOpcode::G_FPOW: + case TargetOpcode::G_INTRINSIC_TRUNC: + case TargetOpcode::G_INTRINSIC_ROUND: + assert(TypeIdx == 0); + Observer.changingInstr(MI); + + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) + widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT); + + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_INTTOPTR: + if (TypeIdx != 1) + return UnableToLegalize; + + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_PTRTOINT: if (TypeIdx != 0) return UnableToLegalize; + Observer.changingInstr(MI); - widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); - widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + widenScalarDst(MI, WideTy, 0); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_BUILD_VECTOR: { + Observer.changingInstr(MI); + + const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType(); + for (int I = 1, E = MI.getNumOperands(); I != E; ++I) + widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT); + + // Avoid changing the result vector type if the source element type was + // requested. + if (TypeIdx == 1) { + auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); + MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC)); + } else { + widenScalarDst(MI, WideTy, 0); + } + Observer.changedInstr(MI); return Legalized; } + } } LegalizerHelper::LegalizeResult @@ -976,13 +1566,13 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return UnableToLegalize; case TargetOpcode::G_SREM: case TargetOpcode::G_UREM: { - unsigned QuotReg = MRI.createGenericVirtualRegister(Ty); + Register QuotReg = MRI.createGenericVirtualRegister(Ty); MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV) .addDef(QuotReg) .addUse(MI.getOperand(1).getReg()) .addUse(MI.getOperand(2).getReg()); - unsigned ProdReg = MRI.createGenericVirtualRegister(Ty); + Register ProdReg = MRI.createGenericVirtualRegister(Ty); MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg()); MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(), ProdReg); @@ -993,10 +1583,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { case TargetOpcode::G_UMULO: { // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the // result. - unsigned Res = MI.getOperand(0).getReg(); - unsigned Overflow = MI.getOperand(1).getReg(); - unsigned LHS = MI.getOperand(2).getReg(); - unsigned RHS = MI.getOperand(3).getReg(); + Register Res = MI.getOperand(0).getReg(); + Register Overflow = MI.getOperand(1).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); MIRBuilder.buildMul(Res, LHS, RHS); @@ -1004,20 +1594,20 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { ? TargetOpcode::G_SMULH : TargetOpcode::G_UMULH; - unsigned HiPart = MRI.createGenericVirtualRegister(Ty); + Register HiPart = MRI.createGenericVirtualRegister(Ty); MIRBuilder.buildInstr(Opcode) .addDef(HiPart) .addUse(LHS) .addUse(RHS); - unsigned Zero = MRI.createGenericVirtualRegister(Ty); + Register Zero = MRI.createGenericVirtualRegister(Ty); MIRBuilder.buildConstant(Zero, 0); // For *signed* multiply, overflow is detected by checking: // (hi != (lo >> bitwidth-1)) if (Opcode == TargetOpcode::G_SMULH) { - unsigned Shifted = MRI.createGenericVirtualRegister(Ty); - unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); + Register Shifted = MRI.createGenericVirtualRegister(Ty); + Register ShiftAmt = MRI.createGenericVirtualRegister(Ty); MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1); MIRBuilder.buildInstr(TargetOpcode::G_ASHR) .addDef(Shifted) @@ -1035,7 +1625,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { // represent them. if (Ty.isVector()) return UnableToLegalize; - unsigned Res = MI.getOperand(0).getReg(); + Register Res = MI.getOperand(0).getReg(); Type *ZeroTy; LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); switch (Ty.getSizeInBits()) { @@ -1057,10 +1647,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { ConstantFP &ZeroForNegation = *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy)); auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation); - MIRBuilder.buildInstr(TargetOpcode::G_FSUB) - .addDef(Res) - .addUse(Zero->getOperand(0).getReg()) - .addUse(MI.getOperand(1).getReg()); + Register SubByReg = MI.getOperand(1).getReg(); + Register ZeroReg = Zero->getOperand(0).getReg(); + MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg}, + MI.getFlags()); MI.eraseFromParent(); return Legalized; } @@ -1070,24 +1660,21 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { // end up with an infinite loop as G_FSUB is used to legalize G_FNEG. if (LI.getAction({G_FNEG, {Ty}}).Action == Lower) return UnableToLegalize; - unsigned Res = MI.getOperand(0).getReg(); - unsigned LHS = MI.getOperand(1).getReg(); - unsigned RHS = MI.getOperand(2).getReg(); - unsigned Neg = MRI.createGenericVirtualRegister(Ty); + Register Res = MI.getOperand(0).getReg(); + Register LHS = MI.getOperand(1).getReg(); + Register RHS = MI.getOperand(2).getReg(); + Register Neg = MRI.createGenericVirtualRegister(Ty); MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS); - MIRBuilder.buildInstr(TargetOpcode::G_FADD) - .addDef(Res) - .addUse(LHS) - .addUse(Neg); + MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags()); MI.eraseFromParent(); return Legalized; } case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { - unsigned OldValRes = MI.getOperand(0).getReg(); - unsigned SuccessRes = MI.getOperand(1).getReg(); - unsigned Addr = MI.getOperand(2).getReg(); - unsigned CmpVal = MI.getOperand(3).getReg(); - unsigned NewVal = MI.getOperand(4).getReg(); + Register OldValRes = MI.getOperand(0).getReg(); + Register SuccessRes = MI.getOperand(1).getReg(); + Register Addr = MI.getOperand(2).getReg(); + Register CmpVal = MI.getOperand(3).getReg(); + Register NewVal = MI.getOperand(4).getReg(); MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal, **MI.memoperands_begin()); MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal); @@ -1098,8 +1685,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { case TargetOpcode::G_SEXTLOAD: case TargetOpcode::G_ZEXTLOAD: { // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned PtrReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(DstReg); auto &MMO = **MI.memoperands_begin(); @@ -1114,8 +1701,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { } if (DstTy.isScalar()) { - unsigned TmpReg = MRI.createGenericVirtualRegister( - LLT::scalar(MMO.getSize() /* in bytes */ * 8)); + Register TmpReg = + MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits())); MIRBuilder.buildLoad(TmpReg, PtrReg, MMO); switch (MI.getOpcode()) { default: @@ -1142,15 +1729,27 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { case TargetOpcode::G_CTTZ: case TargetOpcode::G_CTPOP: return lowerBitCount(MI, TypeIdx, Ty); + case G_UADDO: { + Register Res = MI.getOperand(0).getReg(); + Register CarryOut = MI.getOperand(1).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + + MIRBuilder.buildAdd(Res, LHS, RHS); + MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS); + + MI.eraseFromParent(); + return Legalized; + } case G_UADDE: { - unsigned Res = MI.getOperand(0).getReg(); - unsigned CarryOut = MI.getOperand(1).getReg(); - unsigned LHS = MI.getOperand(2).getReg(); - unsigned RHS = MI.getOperand(3).getReg(); - unsigned CarryIn = MI.getOperand(4).getReg(); + Register Res = MI.getOperand(0).getReg(); + Register CarryOut = MI.getOperand(1).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + Register CarryIn = MI.getOperand(4).getReg(); - unsigned TmpRes = MRI.createGenericVirtualRegister(Ty); - unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty); + Register TmpRes = MRI.createGenericVirtualRegister(Ty); + Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty); MIRBuilder.buildAdd(TmpRes, LHS, RHS); MIRBuilder.buildZExt(ZExtCarryIn, CarryIn); @@ -1160,113 +1759,1325 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.eraseFromParent(); return Legalized; } + case G_USUBO: { + Register Res = MI.getOperand(0).getReg(); + Register BorrowOut = MI.getOperand(1).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + + MIRBuilder.buildSub(Res, LHS, RHS); + MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS); + + MI.eraseFromParent(); + return Legalized; + } + case G_USUBE: { + Register Res = MI.getOperand(0).getReg(); + Register BorrowOut = MI.getOperand(1).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + Register BorrowIn = MI.getOperand(4).getReg(); + + Register TmpRes = MRI.createGenericVirtualRegister(Ty); + Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty); + Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); + Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1)); + + MIRBuilder.buildSub(TmpRes, LHS, RHS); + MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn); + MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn); + MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS); + MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS); + MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS); + + MI.eraseFromParent(); + return Legalized; + } + case G_UITOFP: + return lowerUITOFP(MI, TypeIdx, Ty); + case G_SITOFP: + return lowerSITOFP(MI, TypeIdx, Ty); + case G_SMIN: + case G_SMAX: + case G_UMIN: + case G_UMAX: + return lowerMinMax(MI, TypeIdx, Ty); + case G_FCOPYSIGN: + return lowerFCopySign(MI, TypeIdx, Ty); + case G_FMINNUM: + case G_FMAXNUM: + return lowerFMinNumMaxNum(MI); } } +LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( + MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { + SmallVector<Register, 2> DstRegs; + + unsigned NarrowSize = NarrowTy.getSizeInBits(); + Register DstReg = MI.getOperand(0).getReg(); + unsigned Size = MRI.getType(DstReg).getSizeInBits(); + int NumParts = Size / NarrowSize; + // FIXME: Don't know how to handle the situation where the small vectors + // aren't all the same size yet. + if (Size % NarrowSize != 0) + return UnableToLegalize; + + for (int i = 0; i < NumParts; ++i) { + Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildUndef(TmpReg); + DstRegs.push_back(TmpReg); + } + + if (NarrowTy.isVector()) + MIRBuilder.buildConcatVectors(DstReg, DstRegs); + else + MIRBuilder.buildBuildVector(DstReg, DstRegs); + + MI.eraseFromParent(); + return Legalized; +} + LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - // FIXME: Don't know how to handle secondary types yet. +LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + const unsigned Opc = MI.getOpcode(); + const unsigned NumOps = MI.getNumOperands() - 1; + const unsigned NarrowSize = NarrowTy.getSizeInBits(); + const Register DstReg = MI.getOperand(0).getReg(); + const unsigned Flags = MI.getFlags(); + const LLT DstTy = MRI.getType(DstReg); + const unsigned Size = DstTy.getSizeInBits(); + const int NumParts = Size / NarrowSize; + const LLT EltTy = DstTy.getElementType(); + const unsigned EltSize = EltTy.getSizeInBits(); + const unsigned BitsForNumParts = NarrowSize * NumParts; + + // Check if we have any leftovers. If we do, then only handle the case where + // the leftover is one element. + if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size) + return UnableToLegalize; + + if (BitsForNumParts != Size) { + Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy); + MIRBuilder.buildUndef(AccumDstReg); + + // Handle the pieces which evenly divide into the requested type with + // extract/op/insert sequence. + for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) { + SmallVector<SrcOp, 4> SrcOps; + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { + Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset); + SrcOps.push_back(PartOpReg); + } + + Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); + + Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy); + MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset); + AccumDstReg = PartInsertReg; + } + + // Handle the remaining element sized leftover piece. + SmallVector<SrcOp, 4> SrcOps; + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { + Register PartOpReg = MRI.createGenericVirtualRegister(EltTy); + MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), + BitsForNumParts); + SrcOps.push_back(PartOpReg); + } + + Register PartDstReg = MRI.createGenericVirtualRegister(EltTy); + MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags); + MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts); + MI.eraseFromParent(); + + return Legalized; + } + + SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; + + extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs); + + if (NumOps >= 2) + extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs); + + if (NumOps >= 3) + extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs); + + for (int i = 0; i < NumParts; ++i) { + Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); + + if (NumOps == 1) + MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags); + else if (NumOps == 2) { + MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags); + } else if (NumOps == 3) { + MIRBuilder.buildInstr(Opc, {DstReg}, + {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags); + } + + DstRegs.push_back(DstReg); + } + + if (NarrowTy.isVector()) + MIRBuilder.buildConcatVectors(DstReg, DstRegs); + else + MIRBuilder.buildBuildVector(DstReg, DstRegs); + + MI.eraseFromParent(); + return Legalized; +} + +// Handle splitting vector operations which need to have the same number of +// elements in each type index, but each type index may have a different element +// type. +// +// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> -> +// <2 x s64> = G_SHL <2 x s64>, <2 x s32> +// <2 x s64> = G_SHL <2 x s64>, <2 x s32> +// +// Also handles some irregular breakdown cases, e.g. +// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> -> +// <2 x s64> = G_SHL <2 x s64>, <2 x s32> +// s64 = G_SHL s64, s32 +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorMultiEltType( + MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) { if (TypeIdx != 0) return UnableToLegalize; - MIRBuilder.setInstr(MI); - switch (MI.getOpcode()) { - default: + const LLT NarrowTy0 = NarrowTyArg; + const unsigned NewNumElts = + NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1; + + const Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT LeftoverTy0; + + // All of the operands need to have the same number of elements, so if we can + // determine a type breakdown for the result type, we can for all of the + // source types. + int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first; + if (NumParts < 0) return UnableToLegalize; - case TargetOpcode::G_IMPLICIT_DEF: { - SmallVector<unsigned, 2> DstRegs; - unsigned NarrowSize = NarrowTy.getSizeInBits(); - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned Size = MRI.getType(DstReg).getSizeInBits(); - int NumParts = Size / NarrowSize; - // FIXME: Don't know how to handle the situation where the small vectors - // aren't all the same size yet. - if (Size % NarrowSize != 0) + SmallVector<MachineInstrBuilder, 4> NewInsts; + + SmallVector<Register, 4> DstRegs, LeftoverDstRegs; + SmallVector<Register, 4> PartRegs, LeftoverRegs; + + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { + LLT LeftoverTy; + Register SrcReg = MI.getOperand(I).getReg(); + LLT SrcTyI = MRI.getType(SrcReg); + LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType()); + LLT LeftoverTyI; + + // Split this operand into the requested typed registers, and any leftover + // required to reproduce the original type. + if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs, + LeftoverRegs)) return UnableToLegalize; - for (int i = 0; i < NumParts; ++i) { - unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildUndef(TmpReg); - DstRegs.push_back(TmpReg); + if (I == 1) { + // For the first operand, create an instruction for each part and setup + // the result. + for (Register PartReg : PartRegs) { + Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0); + NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) + .addDef(PartDstReg) + .addUse(PartReg)); + DstRegs.push_back(PartDstReg); + } + + for (Register LeftoverReg : LeftoverRegs) { + Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0); + NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) + .addDef(PartDstReg) + .addUse(LeftoverReg)); + LeftoverDstRegs.push_back(PartDstReg); + } + } else { + assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size()); + + // Add the newly created operand splits to the existing instructions. The + // odd-sized pieces are ordered after the requested NarrowTyArg sized + // pieces. + unsigned InstCount = 0; + for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J) + NewInsts[InstCount++].addUse(PartRegs[J]); + for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J) + NewInsts[InstCount++].addUse(LeftoverRegs[J]); } - if (NarrowTy.isVector()) - MIRBuilder.buildConcatVectors(DstReg, DstRegs); - else - MIRBuilder.buildBuildVector(DstReg, DstRegs); + PartRegs.clear(); + LeftoverRegs.clear(); + } - MI.eraseFromParent(); - return Legalized; + // Insert the newly built operations and rebuild the result register. + for (auto &MIB : NewInsts) + MIRBuilder.insertInstr(MIB); + + insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); + + LLT NarrowTy0 = NarrowTy; + LLT NarrowTy1; + unsigned NumParts; + + if (NarrowTy.isVector()) { + // Uneven breakdown not handled. + NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); + if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) + return UnableToLegalize; + + NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits()); + } else { + NumParts = DstTy.getNumElements(); + NarrowTy1 = SrcTy.getElementType(); } - case TargetOpcode::G_ADD: { - unsigned NarrowSize = NarrowTy.getSizeInBits(); - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned Size = MRI.getType(DstReg).getSizeInBits(); - int NumParts = Size / NarrowSize; + + SmallVector<Register, 4> SrcRegs, DstRegs; + extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); + + for (unsigned I = 0; I < NumParts; ++I) { + Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); + MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode()) + .addDef(DstReg) + .addUse(SrcRegs[I]); + + NewInst->setFlags(MI.getFlags()); + DstRegs.push_back(DstReg); + } + + if (NarrowTy.isVector()) + MIRBuilder.buildConcatVectors(DstReg, DstRegs); + else + MIRBuilder.buildBuildVector(DstReg, DstRegs); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + Register DstReg = MI.getOperand(0).getReg(); + Register Src0Reg = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(Src0Reg); + + unsigned NumParts; + LLT NarrowTy0, NarrowTy1; + + if (TypeIdx == 0) { + unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; + unsigned OldElts = DstTy.getNumElements(); + + NarrowTy0 = NarrowTy; + NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); + NarrowTy1 = NarrowTy.isVector() ? + LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) : + SrcTy.getElementType(); + + } else { + unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; + unsigned OldElts = SrcTy.getNumElements(); + + NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : + NarrowTy.getNumElements(); + NarrowTy0 = LLT::vector(NarrowTy.getNumElements(), + DstTy.getScalarSizeInBits()); + NarrowTy1 = NarrowTy; + } + + // FIXME: Don't know how to handle the situation where the small vectors + // aren't all the same size yet. + if (NarrowTy1.isVector() && + NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) + return UnableToLegalize; + + CmpInst::Predicate Pred + = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); + + SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs; + extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); + extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); + + for (unsigned I = 0; I < NumParts; ++I) { + Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); + DstRegs.push_back(DstReg); + + if (MI.getOpcode() == TargetOpcode::G_ICMP) + MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); + else { + MachineInstr *NewCmp + = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); + NewCmp->setFlags(MI.getFlags()); + } + } + + if (NarrowTy1.isVector()) + MIRBuilder.buildConcatVectors(DstReg, DstRegs); + else + MIRBuilder.buildBuildVector(DstReg, DstRegs); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + Register DstReg = MI.getOperand(0).getReg(); + Register CondReg = MI.getOperand(1).getReg(); + + unsigned NumParts = 0; + LLT NarrowTy0, NarrowTy1; + + LLT DstTy = MRI.getType(DstReg); + LLT CondTy = MRI.getType(CondReg); + unsigned Size = DstTy.getSizeInBits(); + + assert(TypeIdx == 0 || CondTy.isVector()); + + if (TypeIdx == 0) { + NarrowTy0 = NarrowTy; + NarrowTy1 = CondTy; + + unsigned NarrowSize = NarrowTy0.getSizeInBits(); // FIXME: Don't know how to handle the situation where the small vectors // aren't all the same size yet. if (Size % NarrowSize != 0) return UnableToLegalize; - SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; - extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); - extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); + NumParts = Size / NarrowSize; - for (int i = 0; i < NumParts; ++i) { - unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy); - MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]); - DstRegs.push_back(DstReg); + // Need to break down the condition type + if (CondTy.isVector()) { + if (CondTy.getNumElements() == NumParts) + NarrowTy1 = CondTy.getElementType(); + else + NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts, + CondTy.getScalarSizeInBits()); + } + } else { + NumParts = CondTy.getNumElements(); + if (NarrowTy.isVector()) { + // TODO: Handle uneven breakdown. + if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements()) + return UnableToLegalize; + + return UnableToLegalize; + } else { + NarrowTy0 = DstTy.getElementType(); + NarrowTy1 = NarrowTy; } + } + + SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; + if (CondTy.isVector()) + extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs); + + extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs); + extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs); + for (unsigned i = 0; i < NumParts; ++i) { + Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); + MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg, + Src1Regs[i], Src2Regs[i]); + DstRegs.push_back(DstReg); + } + + if (NarrowTy0.isVector()) MIRBuilder.buildConcatVectors(DstReg, DstRegs); - MI.eraseFromParent(); - return Legalized; + else + MIRBuilder.buildBuildVector(DstReg, DstRegs); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + const Register DstReg = MI.getOperand(0).getReg(); + LLT PhiTy = MRI.getType(DstReg); + LLT LeftoverTy; + + // All of the operands need to have the same number of elements, so if we can + // determine a type breakdown for the result type, we can for all of the + // source types. + int NumParts, NumLeftover; + std::tie(NumParts, NumLeftover) + = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy); + if (NumParts < 0) + return UnableToLegalize; + + SmallVector<Register, 4> DstRegs, LeftoverDstRegs; + SmallVector<MachineInstrBuilder, 4> NewInsts; + + const int TotalNumParts = NumParts + NumLeftover; + + // Insert the new phis in the result block first. + for (int I = 0; I != TotalNumParts; ++I) { + LLT Ty = I < NumParts ? NarrowTy : LeftoverTy; + Register PartDstReg = MRI.createGenericVirtualRegister(Ty); + NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI) + .addDef(PartDstReg)); + if (I < NumParts) + DstRegs.push_back(PartDstReg); + else + LeftoverDstRegs.push_back(PartDstReg); } - case TargetOpcode::G_LOAD: - case TargetOpcode::G_STORE: { - bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; - unsigned ValReg = MI.getOperand(0).getReg(); - unsigned AddrReg = MI.getOperand(1).getReg(); - unsigned NarrowSize = NarrowTy.getSizeInBits(); - unsigned Size = MRI.getType(ValReg).getSizeInBits(); - unsigned NumParts = Size / NarrowSize; - - SmallVector<unsigned, 8> NarrowRegs; - if (!IsLoad) - extractParts(ValReg, NarrowTy, NumParts, NarrowRegs); - - const LLT OffsetTy = - LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); - MachineFunction &MF = *MI.getMF(); - MachineMemOperand *MMO = *MI.memoperands_begin(); - for (unsigned Idx = 0; Idx < NumParts; ++Idx) { - unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8; - unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment); - unsigned NewAddrReg = 0; - MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment); - MachineMemOperand &NewMMO = *MF.getMachineMemOperand( - MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(), - NarrowTy.getSizeInBits() / 8, Alignment); + + MachineBasicBlock *MBB = MI.getParent(); + MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI()); + insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs); + + SmallVector<Register, 4> PartRegs, LeftoverRegs; + + // Insert code to extract the incoming values in each predecessor block. + for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { + PartRegs.clear(); + LeftoverRegs.clear(); + + Register SrcReg = MI.getOperand(I).getReg(); + MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); + MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); + + LLT Unused; + if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs, + LeftoverRegs)) + return UnableToLegalize; + + // Add the newly created operand splits to the existing instructions. The + // odd-sized pieces are ordered after the requested NarrowTyArg sized + // pieces. + for (int J = 0; J != TotalNumParts; ++J) { + MachineInstrBuilder MIB = NewInsts[J]; + MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]); + MIB.addMBB(&OpMBB); + } + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + // FIXME: Don't know how to handle secondary types yet. + if (TypeIdx != 0) + return UnableToLegalize; + + MachineMemOperand *MMO = *MI.memoperands_begin(); + + // This implementation doesn't work for atomics. Give up instead of doing + // something invalid. + if (MMO->getOrdering() != AtomicOrdering::NotAtomic || + MMO->getFailureOrdering() != AtomicOrdering::NotAtomic) + return UnableToLegalize; + + bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; + Register ValReg = MI.getOperand(0).getReg(); + Register AddrReg = MI.getOperand(1).getReg(); + LLT ValTy = MRI.getType(ValReg); + + int NumParts = -1; + int NumLeftover = -1; + LLT LeftoverTy; + SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs; + if (IsLoad) { + std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy); + } else { + if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs, + NarrowLeftoverRegs)) { + NumParts = NarrowRegs.size(); + NumLeftover = NarrowLeftoverRegs.size(); + } + } + + if (NumParts == -1) + return UnableToLegalize; + + const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits()); + + unsigned TotalSize = ValTy.getSizeInBits(); + + // Split the load/store into PartTy sized pieces starting at Offset. If this + // is a load, return the new registers in ValRegs. For a store, each elements + // of ValRegs should be PartTy. Returns the next offset that needs to be + // handled. + auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs, + unsigned Offset) -> unsigned { + MachineFunction &MF = MIRBuilder.getMF(); + unsigned PartSize = PartTy.getSizeInBits(); + for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize; + Offset += PartSize, ++Idx) { + unsigned ByteSize = PartSize / 8; + unsigned ByteOffset = Offset / 8; + Register NewAddrReg; + + MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset); + + MachineMemOperand *NewMMO = + MF.getMachineMemOperand(MMO, ByteOffset, ByteSize); + if (IsLoad) { - unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy); - NarrowRegs.push_back(Dst); - MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO); + Register Dst = MRI.createGenericVirtualRegister(PartTy); + ValRegs.push_back(Dst); + MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO); } else { - MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO); + MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO); } } - if (IsLoad) { - if (NarrowTy.isVector()) - MIRBuilder.buildConcatVectors(ValReg, NarrowRegs); - else - MIRBuilder.buildBuildVector(ValReg, NarrowRegs); - } + + return Offset; + }; + + unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0); + + // Handle the rest of the register if this isn't an even type breakdown. + if (LeftoverTy.isValid()) + splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset); + + if (IsLoad) { + insertParts(ValReg, ValTy, NarrowTy, NarrowRegs, + LeftoverTy, NarrowLeftoverRegs); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + using namespace TargetOpcode; + + MIRBuilder.setInstr(MI); + switch (MI.getOpcode()) { + case G_IMPLICIT_DEF: + return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); + case G_AND: + case G_OR: + case G_XOR: + case G_ADD: + case G_SUB: + case G_MUL: + case G_SMULH: + case G_UMULH: + case G_FADD: + case G_FMUL: + case G_FSUB: + case G_FNEG: + case G_FABS: + case G_FCANONICALIZE: + case G_FDIV: + case G_FREM: + case G_FMA: + case G_FPOW: + case G_FEXP: + case G_FEXP2: + case G_FLOG: + case G_FLOG2: + case G_FLOG10: + case G_FNEARBYINT: + case G_FCEIL: + case G_FFLOOR: + case G_FRINT: + case G_INTRINSIC_ROUND: + case G_INTRINSIC_TRUNC: + case G_FCOS: + case G_FSIN: + case G_FSQRT: + case G_BSWAP: + case G_SDIV: + case G_SMIN: + case G_SMAX: + case G_UMIN: + case G_UMAX: + case G_FMINNUM: + case G_FMAXNUM: + case G_FMINNUM_IEEE: + case G_FMAXNUM_IEEE: + case G_FMINIMUM: + case G_FMAXIMUM: + return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy); + case G_SHL: + case G_LSHR: + case G_ASHR: + case G_CTLZ: + case G_CTLZ_ZERO_UNDEF: + case G_CTTZ: + case G_CTTZ_ZERO_UNDEF: + case G_CTPOP: + case G_FCOPYSIGN: + return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy); + case G_ZEXT: + case G_SEXT: + case G_ANYEXT: + case G_FPEXT: + case G_FPTRUNC: + case G_SITOFP: + case G_UITOFP: + case G_FPTOSI: + case G_FPTOUI: + case G_INTTOPTR: + case G_PTRTOINT: + case G_ADDRSPACE_CAST: + return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); + case G_ICMP: + case G_FCMP: + return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); + case G_SELECT: + return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); + case G_PHI: + return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); + case G_LOAD: + case G_STORE: + return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); + default: + return UnableToLegalize; + } +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, + const LLT HalfTy, const LLT AmtTy) { + + Register InL = MRI.createGenericVirtualRegister(HalfTy); + Register InH = MRI.createGenericVirtualRegister(HalfTy); + MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg()); + + if (Amt.isNullValue()) { + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH}); MI.eraseFromParent(); return Legalized; } + + LLT NVT = HalfTy; + unsigned NVTBits = HalfTy.getSizeInBits(); + unsigned VTBits = 2 * NVTBits; + + SrcOp Lo(Register(0)), Hi(Register(0)); + if (MI.getOpcode() == TargetOpcode::G_SHL) { + if (Amt.ugt(VTBits)) { + Lo = Hi = MIRBuilder.buildConstant(NVT, 0); + } else if (Amt.ugt(NVTBits)) { + Lo = MIRBuilder.buildConstant(NVT, 0); + Hi = MIRBuilder.buildShl(NVT, InL, + MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); + } else if (Amt == NVTBits) { + Lo = MIRBuilder.buildConstant(NVT, 0); + Hi = InL; + } else { + Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt)); + auto OrLHS = + MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt)); + auto OrRHS = MIRBuilder.buildLShr( + NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); + Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); + } + } else if (MI.getOpcode() == TargetOpcode::G_LSHR) { + if (Amt.ugt(VTBits)) { + Lo = Hi = MIRBuilder.buildConstant(NVT, 0); + } else if (Amt.ugt(NVTBits)) { + Lo = MIRBuilder.buildLShr(NVT, InH, + MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); + Hi = MIRBuilder.buildConstant(NVT, 0); + } else if (Amt == NVTBits) { + Lo = InH; + Hi = MIRBuilder.buildConstant(NVT, 0); + } else { + auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); + + auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); + auto OrRHS = MIRBuilder.buildShl( + NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); + + Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); + Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst); + } + } else { + if (Amt.ugt(VTBits)) { + Hi = Lo = MIRBuilder.buildAShr( + NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); + } else if (Amt.ugt(NVTBits)) { + Lo = MIRBuilder.buildAShr(NVT, InH, + MIRBuilder.buildConstant(AmtTy, Amt - NVTBits)); + Hi = MIRBuilder.buildAShr(NVT, InH, + MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); + } else if (Amt == NVTBits) { + Lo = InH; + Hi = MIRBuilder.buildAShr(NVT, InH, + MIRBuilder.buildConstant(AmtTy, NVTBits - 1)); + } else { + auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt); + + auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst); + auto OrRHS = MIRBuilder.buildShl( + NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)); + + Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS); + Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst); + } } + + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()}); + MI.eraseFromParent(); + + return Legalized; +} + +// TODO: Optimize if constant shift amount. +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, + LLT RequestedTy) { + if (TypeIdx == 1) { + Observer.changingInstr(MI); + narrowScalarSrc(MI, RequestedTy, 2); + Observer.changedInstr(MI); + return Legalized; + } + + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + if (DstTy.isVector()) + return UnableToLegalize; + + Register Amt = MI.getOperand(2).getReg(); + LLT ShiftAmtTy = MRI.getType(Amt); + const unsigned DstEltSize = DstTy.getScalarSizeInBits(); + if (DstEltSize % 2 != 0) + return UnableToLegalize; + + // Ignore the input type. We can only go to exactly half the size of the + // input. If that isn't small enough, the resulting pieces will be further + // legalized. + const unsigned NewBitSize = DstEltSize / 2; + const LLT HalfTy = LLT::scalar(NewBitSize); + const LLT CondTy = LLT::scalar(1); + + if (const MachineInstr *KShiftAmt = + getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) { + return narrowScalarShiftByConstant( + MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy); + } + + // TODO: Expand with known bits. + + // Handle the fully general expansion by an unknown amount. + auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize); + + Register InL = MRI.createGenericVirtualRegister(HalfTy); + Register InH = MRI.createGenericVirtualRegister(HalfTy); + MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg()); + + auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits); + auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt); + + auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0); + auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits); + auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero); + + Register ResultRegs[2]; + switch (MI.getOpcode()) { + case TargetOpcode::G_SHL: { + // Short: ShAmt < NewBitSize + auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt); + + auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt); + auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); + auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); + + // Long: ShAmt >= NewBitSize + auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero. + auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part. + + auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL); + auto Hi = MIRBuilder.buildSelect( + HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL)); + + ResultRegs[0] = Lo.getReg(0); + ResultRegs[1] = Hi.getReg(0); + break; + } + case TargetOpcode::G_LSHR: { + // Short: ShAmt < NewBitSize + auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt); + + auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); + auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack); + auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); + + // Long: ShAmt >= NewBitSize + auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. + auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part. + + auto Lo = MIRBuilder.buildSelect( + HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); + auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); + + ResultRegs[0] = Lo.getReg(0); + ResultRegs[1] = Hi.getReg(0); + break; + } + case TargetOpcode::G_ASHR: { + // Short: ShAmt < NewBitSize + auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt); + + auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); + auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack); + auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); + + // Long: ShAmt >= NewBitSize + + // Sign of Hi part. + auto HiL = MIRBuilder.buildAShr( + HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1)); + + auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part. + + auto Lo = MIRBuilder.buildSelect( + HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); + + auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); + + ResultRegs[0] = Lo.getReg(0); + ResultRegs[1] = Hi.getReg(0); + break; + } + default: + llvm_unreachable("not a shift"); + } + + MIRBuilder.buildMerge(DstReg, ResultRegs); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, + LLT MoreTy) { + assert(TypeIdx == 0 && "Expecting only Idx 0"); + + Observer.changingInstr(MI); + for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { + MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); + MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); + moreElementsVectorSrc(MI, MoreTy, I); + } + + MachineBasicBlock &MBB = *MI.getParent(); + MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, + LLT MoreTy) { + MIRBuilder.setInstr(MI); + unsigned Opc = MI.getOpcode(); + switch (Opc) { + case TargetOpcode::G_IMPLICIT_DEF: { + Observer.changingInstr(MI); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + } + case TargetOpcode::G_AND: + case TargetOpcode::G_OR: + case TargetOpcode::G_XOR: + case TargetOpcode::G_SMIN: + case TargetOpcode::G_SMAX: + case TargetOpcode::G_UMIN: + case TargetOpcode::G_UMAX: { + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 1); + moreElementsVectorSrc(MI, MoreTy, 2); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + } + case TargetOpcode::G_EXTRACT: + if (TypeIdx != 1) + return UnableToLegalize; + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 1); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_INSERT: + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 1); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_SELECT: + if (TypeIdx != 0) + return UnableToLegalize; + if (MRI.getType(MI.getOperand(1).getReg()).isVector()) + return UnableToLegalize; + + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 2); + moreElementsVectorSrc(MI, MoreTy, 3); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + case TargetOpcode::G_PHI: + return moreElementsVectorPhi(MI, TypeIdx, MoreTy); + default: + return UnableToLegalize; + } +} + +void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs, + ArrayRef<Register> Src1Regs, + ArrayRef<Register> Src2Regs, + LLT NarrowTy) { + MachineIRBuilder &B = MIRBuilder; + unsigned SrcParts = Src1Regs.size(); + unsigned DstParts = DstRegs.size(); + + unsigned DstIdx = 0; // Low bits of the result. + Register FactorSum = + B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0); + DstRegs[DstIdx] = FactorSum; + + unsigned CarrySumPrevDstIdx; + SmallVector<Register, 4> Factors; + + for (DstIdx = 1; DstIdx < DstParts; DstIdx++) { + // Collect low parts of muls for DstIdx. + for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1; + i <= std::min(DstIdx, SrcParts - 1); ++i) { + MachineInstrBuilder Mul = + B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]); + Factors.push_back(Mul.getReg(0)); + } + // Collect high parts of muls from previous DstIdx. + for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts; + i <= std::min(DstIdx - 1, SrcParts - 1); ++i) { + MachineInstrBuilder Umulh = + B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]); + Factors.push_back(Umulh.getReg(0)); + } + // Add CarrySum from additons calculated for previous DstIdx. + if (DstIdx != 1) { + Factors.push_back(CarrySumPrevDstIdx); + } + + Register CarrySum; + // Add all factors and accumulate all carries into CarrySum. + if (DstIdx != DstParts - 1) { + MachineInstrBuilder Uaddo = + B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]); + FactorSum = Uaddo.getReg(0); + CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0); + for (unsigned i = 2; i < Factors.size(); ++i) { + MachineInstrBuilder Uaddo = + B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]); + FactorSum = Uaddo.getReg(0); + MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1)); + CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0); + } + } else { + // Since value for the next index is not calculated, neither is CarrySum. + FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0); + for (unsigned i = 2; i < Factors.size(); ++i) + FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0); + } + + CarrySumPrevDstIdx = CarrySum; + DstRegs[DstIdx] = FactorSum; + Factors.clear(); + } +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { + Register DstReg = MI.getOperand(0).getReg(); + Register Src1 = MI.getOperand(1).getReg(); + Register Src2 = MI.getOperand(2).getReg(); + + LLT Ty = MRI.getType(DstReg); + if (Ty.isVector()) + return UnableToLegalize; + + unsigned SrcSize = MRI.getType(Src1).getSizeInBits(); + unsigned DstSize = Ty.getSizeInBits(); + unsigned NarrowSize = NarrowTy.getSizeInBits(); + if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0) + return UnableToLegalize; + + unsigned NumDstParts = DstSize / NarrowSize; + unsigned NumSrcParts = SrcSize / NarrowSize; + bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH; + unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1); + + SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs; + extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts); + extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts); + DstTmpRegs.resize(DstTmpParts); + multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy); + + // Take only high half of registers if this is high mul. + ArrayRef<Register> DstRegs( + IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts); + MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + uint64_t NarrowSize = NarrowTy.getSizeInBits(); + + int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + // FIXME: add support for when SizeOp1 isn't an exact multiple of + // NarrowSize. + if (SizeOp1 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp1 / NarrowSize; + + SmallVector<Register, 2> SrcRegs, DstRegs; + SmallVector<uint64_t, 2> Indexes; + extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); + + Register OpReg = MI.getOperand(0).getReg(); + uint64_t OpStart = MI.getOperand(2).getImm(); + uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); + for (int i = 0; i < NumParts; ++i) { + unsigned SrcStart = i * NarrowSize; + + if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) { + // No part of the extract uses this subregister, ignore it. + continue; + } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) { + // The entire subregister is extracted, forward the value. + DstRegs.push_back(SrcRegs[i]); + continue; + } + + // OpSegStart is where this destination segment would start in OpReg if it + // extended infinitely in both directions. + int64_t ExtractOffset; + uint64_t SegSize; + if (OpStart < SrcStart) { + ExtractOffset = 0; + SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart); + } else { + ExtractOffset = OpStart - SrcStart; + SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize); + } + + Register SegReg = SrcRegs[i]; + if (ExtractOffset != 0 || SegSize != NarrowSize) { + // A genuine extract is needed. + SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); + MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset); + } + + DstRegs.push_back(SegReg); + } + + Register DstReg = MI.getOperand(0).getReg(); + if(MRI.getType(DstReg).isVector()) + MIRBuilder.buildBuildVector(DstReg, DstRegs); + else + MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + // FIXME: Don't know how to handle secondary types yet. + if (TypeIdx != 0) + return UnableToLegalize; + + uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + uint64_t NarrowSize = NarrowTy.getSizeInBits(); + + // FIXME: add support for when SizeOp0 isn't an exact multiple of + // NarrowSize. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + + int NumParts = SizeOp0 / NarrowSize; + + SmallVector<Register, 2> SrcRegs, DstRegs; + SmallVector<uint64_t, 2> Indexes; + extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs); + + Register OpReg = MI.getOperand(2).getReg(); + uint64_t OpStart = MI.getOperand(3).getImm(); + uint64_t OpSize = MRI.getType(OpReg).getSizeInBits(); + for (int i = 0; i < NumParts; ++i) { + unsigned DstStart = i * NarrowSize; + + if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) { + // No part of the insert affects this subregister, forward the original. + DstRegs.push_back(SrcRegs[i]); + continue; + } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) { + // The entire subregister is defined by this insert, forward the new + // value. + DstRegs.push_back(OpReg); + continue; + } + + // OpSegStart is where this destination segment would start in OpReg if it + // extended infinitely in both directions. + int64_t ExtractOffset, InsertOffset; + uint64_t SegSize; + if (OpStart < DstStart) { + InsertOffset = 0; + ExtractOffset = DstStart - OpStart; + SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart); + } else { + InsertOffset = OpStart - DstStart; + ExtractOffset = 0; + SegSize = + std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart); + } + + Register SegReg = OpReg; + if (ExtractOffset != 0 || SegSize != OpSize) { + // A genuine extract is needed. + SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize)); + MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset); + } + + Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset); + DstRegs.push_back(DstReg); + } + + assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered"); + Register DstReg = MI.getOperand(0).getReg(); + if(MRI.getType(DstReg).isVector()) + MIRBuilder.buildBuildVector(DstReg, DstRegs); + else + MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + + assert(MI.getNumOperands() == 3 && TypeIdx == 0); + + SmallVector<Register, 4> DstRegs, DstLeftoverRegs; + SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs; + SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs; + LLT LeftoverTy; + if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy, + Src0Regs, Src0LeftoverRegs)) + return UnableToLegalize; + + LLT Unused; + if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused, + Src1Regs, Src1LeftoverRegs)) + llvm_unreachable("inconsistent extractParts result"); + + for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { + auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, + {Src0Regs[I], Src1Regs[I]}); + DstRegs.push_back(Inst->getOperand(0).getReg()); + } + + for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { + auto Inst = MIRBuilder.buildInstr( + MI.getOpcode(), + {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]}); + DstLeftoverRegs.push_back(Inst->getOperand(0).getReg()); + } + + insertParts(DstReg, DstTy, NarrowTy, DstRegs, + LeftoverTy, DstLeftoverRegs); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 0) + return UnableToLegalize; + + Register CondReg = MI.getOperand(1).getReg(); + LLT CondTy = MRI.getType(CondReg); + if (CondTy.isVector()) // TODO: Handle vselect + return UnableToLegalize; + + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + + SmallVector<Register, 4> DstRegs, DstLeftoverRegs; + SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs; + SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs; + LLT LeftoverTy; + if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy, + Src1Regs, Src1LeftoverRegs)) + return UnableToLegalize; + + LLT Unused; + if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused, + Src2Regs, Src2LeftoverRegs)) + llvm_unreachable("inconsistent extractParts result"); + + for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) { + auto Select = MIRBuilder.buildSelect(NarrowTy, + CondReg, Src1Regs[I], Src2Regs[I]); + DstRegs.push_back(Select->getOperand(0).getReg()); + } + + for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) { + auto Select = MIRBuilder.buildSelect( + LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]); + DstLeftoverRegs.push_back(Select->getOperand(0).getReg()); + } + + insertParts(DstReg, DstTy, NarrowTy, DstRegs, + LeftoverTy, DstLeftoverRegs); + + MI.eraseFromParent(); + return Legalized; } LegalizerHelper::LegalizeResult @@ -1288,9 +3099,9 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return Legalized; } case TargetOpcode::G_CTLZ: { - unsigned SrcReg = MI.getOperand(1).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); unsigned Len = Ty.getSizeInBits(); - if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) { + if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) { // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero. auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}, {SrcReg}); @@ -1314,7 +3125,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { // return Len - popcount(x); // // Ref: "Hacker's Delight" by Henry Warren - unsigned Op = SrcReg; + Register Op = SrcReg; unsigned NewLen = PowerOf2Ceil(Len); for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) { auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i); @@ -1338,9 +3149,9 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return Legalized; } case TargetOpcode::G_CTTZ: { - unsigned SrcReg = MI.getOperand(1).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); unsigned Len = Ty.getSizeInBits(); - if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) { + if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) { // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with // zero. auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF, @@ -1365,8 +3176,8 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { TargetOpcode::G_AND, {Ty}, {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty}, {SrcReg, MIBCstNeg1})}); - if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) && - isSupported({TargetOpcode::G_CTLZ, {Ty}})) { + if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) && + isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) { auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len); MIRBuilder.buildInstr( TargetOpcode::G_SUB, {MI.getOperand(0).getReg()}, @@ -1381,3 +3192,230 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { } } } + +// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float +// representation. +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + const LLT S64 = LLT::scalar(64); + const LLT S32 = LLT::scalar(32); + const LLT S1 = LLT::scalar(1); + + assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32); + + // unsigned cul2f(ulong u) { + // uint lz = clz(u); + // uint e = (u != 0) ? 127U + 63U - lz : 0; + // u = (u << lz) & 0x7fffffffffffffffUL; + // ulong t = u & 0xffffffffffUL; + // uint v = (e << 23) | (uint)(u >> 40); + // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U); + // return as_float(v + r); + // } + + auto Zero32 = MIRBuilder.buildConstant(S32, 0); + auto Zero64 = MIRBuilder.buildConstant(S64, 0); + + auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src); + + auto K = MIRBuilder.buildConstant(S32, 127U + 63U); + auto Sub = MIRBuilder.buildSub(S32, K, LZ); + + auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64); + auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32); + + auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1); + auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ); + + auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0); + + auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL); + auto T = MIRBuilder.buildAnd(S64, U, Mask1); + + auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40)); + auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23)); + auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl)); + + auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL); + auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C); + auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C); + auto One = MIRBuilder.buildConstant(S32, 1); + + auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One); + auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32); + auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0); + MIRBuilder.buildAdd(Dst, V, R); + + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + if (SrcTy != LLT::scalar(64)) + return UnableToLegalize; + + if (DstTy == LLT::scalar(32)) { + // TODO: SelectionDAG has several alternative expansions to port which may + // be more reasonble depending on the available instructions. If a target + // has sitofp, does not have CTLZ, or can efficiently use f64 as an + // intermediate type, this is probably worse. + return lowerU64ToF32BitOps(MI); + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + const LLT S64 = LLT::scalar(64); + const LLT S32 = LLT::scalar(32); + const LLT S1 = LLT::scalar(1); + + if (SrcTy != S64) + return UnableToLegalize; + + if (DstTy == S32) { + // signed cl2f(long l) { + // long s = l >> 63; + // float r = cul2f((l + s) ^ s); + // return s ? -r : r; + // } + Register L = Src; + auto SignBit = MIRBuilder.buildConstant(S64, 63); + auto S = MIRBuilder.buildAShr(S64, L, SignBit); + + auto LPlusS = MIRBuilder.buildAdd(S64, L, S); + auto Xor = MIRBuilder.buildXor(S64, LPlusS, S); + auto R = MIRBuilder.buildUITOFP(S32, Xor); + + auto RNeg = MIRBuilder.buildFNeg(S32, R); + auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S, + MIRBuilder.buildConstant(S64, 0)); + MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R); + return Legalized; + } + + return UnableToLegalize; +} + +static CmpInst::Predicate minMaxToCompare(unsigned Opc) { + switch (Opc) { + case TargetOpcode::G_SMIN: + return CmpInst::ICMP_SLT; + case TargetOpcode::G_SMAX: + return CmpInst::ICMP_SGT; + case TargetOpcode::G_UMIN: + return CmpInst::ICMP_ULT; + case TargetOpcode::G_UMAX: + return CmpInst::ICMP_UGT; + default: + llvm_unreachable("not in integer min/max"); + } +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); + + const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode()); + LLT CmpType = MRI.getType(Dst).changeElementSize(1); + + auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1); + MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); + + const LLT Src0Ty = MRI.getType(Src0); + const LLT Src1Ty = MRI.getType(Src1); + + const int Src0Size = Src0Ty.getScalarSizeInBits(); + const int Src1Size = Src1Ty.getScalarSizeInBits(); + + auto SignBitMask = MIRBuilder.buildConstant( + Src0Ty, APInt::getSignMask(Src0Size)); + + auto NotSignBitMask = MIRBuilder.buildConstant( + Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1)); + + auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask); + MachineInstr *Or; + + if (Src0Ty == Src1Ty) { + auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask); + Or = MIRBuilder.buildOr(Dst, And0, And1); + } else if (Src0Size > Src1Size) { + auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size); + auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1); + auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt); + auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask); + Or = MIRBuilder.buildOr(Dst, And0, And1); + } else { + auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size); + auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt); + auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift); + auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask); + Or = MIRBuilder.buildOr(Dst, And0, And1); + } + + // Be careful about setting nsz/nnan/ninf on every instruction, since the + // constants are a nan and -0.0, but the final result should preserve + // everything. + if (unsigned Flags = MI.getFlags()) + Or->setFlags(Flags); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { + unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ? + TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE; + + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(Dst); + + if (!MI.getFlag(MachineInstr::FmNoNans)) { + // Insert canonicalizes if it's possible we need to quiet to get correct + // sNaN behavior. + + // Note this must be done here, and not as an optimization combine in the + // absence of a dedicate quiet-snan instruction as we're using an + // omni-purpose G_FCANONICALIZE. + if (!isKnownNeverSNaN(Src0, MRI)) + Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0); + + if (!isKnownNeverSNaN(Src1, MRI)) + Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0); + } + + // If there are no nans, it's safe to simply replace this with the non-IEEE + // version. + MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags()); + MI.eraseFromParent(); + return Legalized; +} |