diff options
Diffstat (limited to 'llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp')
| -rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 970 |
1 files changed, 683 insertions, 287 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 6f77428ae721..cfaafc7b53d2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -37,6 +37,7 @@ namespace RISCV { #define GET_RISCVVSETable_IMPL #define GET_RISCVVLXTable_IMPL #define GET_RISCVVSXTable_IMPL +#define GET_RISCVMaskedPseudosTable_IMPL #include "RISCVGenSearchableTables.inc" } // namespace RISCV } // namespace llvm @@ -47,17 +48,36 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() { I != E;) { SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. + // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point + // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden. + if (N->getOpcode() == ISD::SPLAT_VECTOR) { + MVT VT = N->getSimpleValueType(0); + unsigned Opc = + VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL; + SDLoc DL(N); + SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()); + SDValue Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), + N->getOperand(0), VL); + + --I; + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); + ++I; + CurDAG->DeleteNode(N); + continue; + } + // Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector // load. Done after lowering and combining so that we have a chance to // optimize this to VMV_V_X_VL when the upper bits aren't needed. if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) continue; - assert(N->getNumOperands() == 3 && "Unexpected number of operands"); + assert(N->getNumOperands() == 4 && "Unexpected number of operands"); MVT VT = N->getSimpleValueType(0); - SDValue Lo = N->getOperand(0); - SDValue Hi = N->getOperand(1); - SDValue VL = N->getOperand(2); + SDValue Passthru = N->getOperand(0); + SDValue Lo = N->getOperand(1); + SDValue Hi = N->getOperand(2); + SDValue VL = N->getOperand(3); assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() && Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 && "Unexpected VTs!"); @@ -88,7 +108,7 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() { CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64); SDValue Ops[] = {Chain, IntID, - CurDAG->getUNDEF(VT), + Passthru, StackSlot, CurDAG->getRegister(RISCV::X0, MVT::i64), VL}; @@ -112,6 +132,7 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() { } void RISCVDAGToDAGISel::PostprocessISelDAG() { + HandleSDNode Dummy(CurDAG->getRoot()); SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); bool MadeChange = false; @@ -123,57 +144,70 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() { MadeChange |= doPeepholeSExtW(N); MadeChange |= doPeepholeLoadStoreADDI(N); + MadeChange |= doPeepholeMaskedRVV(N); } + CurDAG->setRoot(Dummy.getValue()); + if (MadeChange) CurDAG->RemoveDeadNodes(); } -static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL, - const MVT VT, int64_t Imm, - const RISCVSubtarget &Subtarget) { - assert(VT == MVT::i64 && "Expecting MVT::i64"); - const RISCVTargetLowering *TLI = Subtarget.getTargetLowering(); - ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(CurDAG->getConstantPool( - ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT)); - SDValue Addr = TLI->getAddr(CP, *CurDAG); - SDValue Offset = CurDAG->getTargetConstant(0, DL, VT); - // Since there is no data race, the chain can be the entry node. - SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset, - CurDAG->getEntryNode()); - MachineFunction &MF = CurDAG->getMachineFunction(); - MachineMemOperand *MemOp = MF.getMachineMemOperand( - MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, - LLT(VT), CP->getAlign()); - CurDAG->setNodeMemRefs(cast<MachineSDNode>(Load), {MemOp}); - return Load; -} - -static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, - int64_t Imm, const RISCVSubtarget &Subtarget) { - MVT XLenVT = Subtarget.getXLenVT(); - RISCVMatInt::InstSeq Seq = - RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); +// Returns true if N is a MachineSDNode that has a reg and simm12 memory +// operand. The indices of the base pointer and offset are returned in BaseOpIdx +// and OffsetOpIdx. +static bool hasMemOffset(SDNode *N, unsigned &BaseOpIdx, + unsigned &OffsetOpIdx) { + switch (N->getMachineOpcode()) { + case RISCV::LB: + case RISCV::LH: + case RISCV::LW: + case RISCV::LBU: + case RISCV::LHU: + case RISCV::LWU: + case RISCV::LD: + case RISCV::FLH: + case RISCV::FLW: + case RISCV::FLD: + BaseOpIdx = 0; + OffsetOpIdx = 1; + return true; + case RISCV::SB: + case RISCV::SH: + case RISCV::SW: + case RISCV::SD: + case RISCV::FSH: + case RISCV::FSW: + case RISCV::FSD: + BaseOpIdx = 1; + OffsetOpIdx = 2; + return true; + } - // If Imm is expensive to build, then we put it into constant pool. - if (Subtarget.useConstantPoolForLargeInts() && - Seq.size() > Subtarget.getMaxBuildIntsCost()) - return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget); + return false; +} +static SDNode *selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, + RISCVMatInt::InstSeq &Seq) { SDNode *Result = nullptr; - SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT); + SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT); for (RISCVMatInt::Inst &Inst : Seq) { - SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT); - if (Inst.Opc == RISCV::LUI) - Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm); - else if (Inst.Opc == RISCV::ADD_UW) - Result = CurDAG->getMachineNode(RISCV::ADD_UW, DL, XLenVT, SrcReg, - CurDAG->getRegister(RISCV::X0, XLenVT)); - else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD || - Inst.Opc == RISCV::SH3ADD) - Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SrcReg); - else - Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm); + SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, VT); + switch (Inst.getOpndKind()) { + case RISCVMatInt::Imm: + Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SDImm); + break; + case RISCVMatInt::RegX0: + Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, + CurDAG->getRegister(RISCV::X0, VT)); + break; + case RISCVMatInt::RegReg: + Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SrcReg); + break; + case RISCVMatInt::RegImm: + Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SDImm); + break; + } // Only the first instruction has X0 as its source. SrcReg = SDValue(Result, 0); @@ -182,51 +216,28 @@ static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, return Result; } -static SDValue createTupleImpl(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, - unsigned RegClassID, unsigned SubReg0) { - assert(Regs.size() >= 2 && Regs.size() <= 8); - - SDLoc DL(Regs[0]); - SmallVector<SDValue, 8> Ops; - - Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); +static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT, + int64_t Imm, const RISCVSubtarget &Subtarget) { + RISCVMatInt::InstSeq Seq = + RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits()); - for (unsigned I = 0; I < Regs.size(); ++I) { - Ops.push_back(Regs[I]); - Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); - } - SDNode *N = - CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); - return SDValue(N, 0); + return selectImmSeq(CurDAG, DL, VT, Seq); } -static SDValue createM1Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, - unsigned NF) { - static const unsigned RegClassIDs[] = { +static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, + unsigned NF, RISCVII::VLMUL LMUL) { + static const unsigned M1TupleRegClassIDs[] = { RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID, RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID, RISCV::VRN8M1RegClassID}; + static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID, + RISCV::VRN3M2RegClassID, + RISCV::VRN4M2RegClassID}; - return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm1_0); -} - -static SDValue createM2Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, - unsigned NF) { - static const unsigned RegClassIDs[] = {RISCV::VRN2M2RegClassID, - RISCV::VRN3M2RegClassID, - RISCV::VRN4M2RegClassID}; - - return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm2_0); -} - -static SDValue createM4Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, - unsigned NF) { - return createTupleImpl(CurDAG, Regs, RISCV::VRN2M4RegClassID, - RISCV::sub_vrm4_0); -} + assert(Regs.size() >= 2 && Regs.size() <= 8); -static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, - unsigned NF, RISCVII::VLMUL LMUL) { + unsigned RegClassID; + unsigned SubReg0; switch (LMUL) { default: llvm_unreachable("Invalid LMUL."); @@ -234,12 +245,37 @@ static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs, case RISCVII::VLMUL::LMUL_F4: case RISCVII::VLMUL::LMUL_F2: case RISCVII::VLMUL::LMUL_1: - return createM1Tuple(CurDAG, Regs, NF); + static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7, + "Unexpected subreg numbering"); + SubReg0 = RISCV::sub_vrm1_0; + RegClassID = M1TupleRegClassIDs[NF - 2]; + break; case RISCVII::VLMUL::LMUL_2: - return createM2Tuple(CurDAG, Regs, NF); + static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3, + "Unexpected subreg numbering"); + SubReg0 = RISCV::sub_vrm2_0; + RegClassID = M2TupleRegClassIDs[NF - 2]; + break; case RISCVII::VLMUL::LMUL_4: - return createM4Tuple(CurDAG, Regs, NF); + static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1, + "Unexpected subreg numbering"); + SubReg0 = RISCV::sub_vrm4_0; + RegClassID = RISCV::VRN2M4RegClassID; + break; + } + + SDLoc DL(Regs[0]); + SmallVector<SDValue, 8> Ops; + + Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32)); + + for (unsigned I = 0; I < Regs.size(); ++I) { + Ops.push_back(Regs[I]); + Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32)); } + SDNode *N = + CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); + return SDValue(N, 0); } void RISCVDAGToDAGISel::addVectorLoadStoreOperands( @@ -287,6 +323,10 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands( Operands.push_back(Glue); } +static bool isAllUndef(ArrayRef<SDValue> Values) { + return llvm::all_of(Values, [](SDValue V) { return V->isUndef(); }); +} + void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, bool IsStrided) { SDLoc DL(Node); @@ -297,19 +337,21 @@ void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, unsigned CurOp = 2; SmallVector<SDValue, 8> Operands; - if (IsMasked) { - SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, - Node->op_begin() + CurOp + NF); - SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); - Operands.push_back(MaskedOff); - CurOp += NF; + + SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, + Node->op_begin() + CurOp + NF); + bool IsTU = IsMasked || !isAllUndef(Regs); + if (IsTU) { + SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL); + Operands.push_back(Merge); } + CurOp += NF; addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, Operands, /*IsLoad=*/true); const RISCV::VLSEGPseudo *P = - RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW, + RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, static_cast<unsigned>(LMUL)); MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); @@ -338,25 +380,25 @@ void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { unsigned CurOp = 2; SmallVector<SDValue, 7> Operands; - if (IsMasked) { - SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, - Node->op_begin() + CurOp + NF); + + SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, + Node->op_begin() + CurOp + NF); + bool IsTU = IsMasked || !isAllUndef(Regs); + if (IsTU) { SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); Operands.push_back(MaskedOff); - CurOp += NF; } + CurOp += NF; addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, /*IsStridedOrIndexed*/ false, Operands, /*IsLoad=*/true); const RISCV::VLSEGPseudo *P = - RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true, + RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, Log2SEW, static_cast<unsigned>(LMUL)); MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, - MVT::Other, MVT::Glue, Operands); - SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT, - /*Glue*/ SDValue(Load, 2)); + XLenVT, MVT::Other, Operands); if (auto *MemOp = dyn_cast<MemSDNode>(Node)) CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); @@ -368,8 +410,8 @@ void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) { CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg)); } - ReplaceUses(SDValue(Node, NF), SDValue(ReadVL, 0)); // VL - ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 1)); // Chain + ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL + ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain CurDAG->RemoveDeadNode(Node); } @@ -383,13 +425,15 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, unsigned CurOp = 2; SmallVector<SDValue, 8> Operands; - if (IsMasked) { - SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, - Node->op_begin() + CurOp + NF); + + SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp, + Node->op_begin() + CurOp + NF); + bool IsTU = IsMasked || !isAllUndef(Regs); + if (IsTU) { SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); Operands.push_back(MaskedOff); - CurOp += NF; } + CurOp += NF; MVT IndexVT; addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, @@ -406,7 +450,7 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked, "values when XLEN=32"); } const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( - NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), + NF, IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL)); MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); @@ -596,32 +640,125 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { int64_t Imm = ConstNode->getSExtValue(); // If the upper XLen-16 bits are not used, try to convert this to a simm12 // by sign extending bit 15. - if (isUInt<16>(Imm) && isInt<12>(SignExtend64(Imm, 16)) && + if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) && hasAllHUsers(Node)) - Imm = SignExtend64(Imm, 16); + Imm = SignExtend64<16>(Imm); // If the upper 32-bits are not used try to convert this into a simm32 by // sign extending bit 32. if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node)) - Imm = SignExtend64(Imm, 32); + Imm = SignExtend64<32>(Imm); ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget)); return; } - case ISD::FrameIndex: { - SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT); - int FI = cast<FrameIndexSDNode>(Node)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); - ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm)); + case ISD::ADD: { + // Try to select ADD + immediate used as memory addresses to + // (ADDI (ADD X, Imm-Lo12), Lo12) if it will allow the ADDI to be removed by + // doPeepholeLoadStoreADDI. + + // LHS should be an immediate. + auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); + if (!N1C) + break; + + int64_t Offset = N1C->getSExtValue(); + int64_t Lo12 = SignExtend64<12>(Offset); + + // Don't do this if the lower 12 bits are 0 or we could use ADDI directly. + if (Lo12 == 0 || isInt<12>(Offset)) + break; + + // Don't do this if we can use a pair of ADDIs. + if (isInt<12>(Offset / 2) && isInt<12>(Offset - Offset / 2)) + break; + + RISCVMatInt::InstSeq Seq = + RISCVMatInt::generateInstSeq(Offset, Subtarget->getFeatureBits()); + + Offset -= Lo12; + // Restore sign bits for RV32. + if (!Subtarget->is64Bit()) + Offset = SignExtend64<32>(Offset); + + // We can fold if the last operation is an ADDI or its an ADDIW that could + // be treated as an ADDI. + if (Seq.back().Opc != RISCV::ADDI && + !(Seq.back().Opc == RISCV::ADDIW && isInt<32>(Offset))) + break; + assert(Seq.back().Imm == Lo12 && "Expected immediate to match Lo12"); + // Drop the last operation. + Seq.pop_back(); + assert(!Seq.empty() && "Expected more instructions in sequence"); + + bool AllPointerUses = true; + for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + + // Is this user a memory instruction that uses a register and immediate + // that has this ADD as its pointer. + unsigned BaseOpIdx, OffsetOpIdx; + if (!User->isMachineOpcode() || + !hasMemOffset(User, BaseOpIdx, OffsetOpIdx) || + UI.getOperandNo() != BaseOpIdx) { + AllPointerUses = false; + break; + } + + // If the memory instruction already has an offset, make sure the combined + // offset is foldable. + int64_t MemOffs = + cast<ConstantSDNode>(User->getOperand(OffsetOpIdx))->getSExtValue(); + MemOffs += Lo12; + if (!isInt<12>(MemOffs)) { + AllPointerUses = false; + break; + } + } + + if (!AllPointerUses) + break; + + // Emit (ADDI (ADD X, Hi), Lo) + SDNode *Imm = selectImmSeq(CurDAG, DL, VT, Seq); + SDNode *ADD = CurDAG->getMachineNode(RISCV::ADD, DL, VT, + Node->getOperand(0), SDValue(Imm, 0)); + SDNode *ADDI = + CurDAG->getMachineNode(RISCV::ADDI, DL, VT, SDValue(ADD, 0), + CurDAG->getTargetConstant(Lo12, DL, VT)); + ReplaceNode(Node, ADDI); return; } + case ISD::SHL: { + auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); + if (!N1C) + break; + SDValue N0 = Node->getOperand(0); + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || + !isa<ConstantSDNode>(N0.getOperand(1))) + break; + unsigned ShAmt = N1C->getZExtValue(); + uint64_t Mask = N0.getConstantOperandVal(1); + + // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has + // 32 leading zeros and C3 trailing zeros. + if (ShAmt <= 32 && isShiftedMask_64(Mask)) { + unsigned XLen = Subtarget->getXLen(); + unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask)); + unsigned TrailingZeros = countTrailingZeros(Mask); + if (TrailingZeros > 0 && LeadingZeros == 32) { + SDNode *SRLIW = CurDAG->getMachineNode( + RISCV::SRLIW, DL, VT, N0->getOperand(0), + CurDAG->getTargetConstant(TrailingZeros, DL, VT)); + SDNode *SLLI = CurDAG->getMachineNode( + RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), + CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT)); + ReplaceNode(Node, SLLI); + return; + } + } + break; + } case ISD::SRL: { - // Optimize (srl (and X, C2), C) -> - // (srli (slli X, (XLen-C3), (XLen-C3) + C) - // Where C2 is a mask with C3 trailing ones. - // Taking into account that the C2 may have had lower bits unset by - // SimplifyDemandedBits. This avoids materializing the C2 immediate. - // This pattern occurs when type legalizing right shifts for types with - // less than XLen bits. auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); if (!N1C) break; @@ -631,6 +768,32 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { break; unsigned ShAmt = N1C->getZExtValue(); uint64_t Mask = N0.getConstantOperandVal(1); + + // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has + // 32 leading zeros and C3 trailing zeros. + if (isShiftedMask_64(Mask)) { + unsigned XLen = Subtarget->getXLen(); + unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask)); + unsigned TrailingZeros = countTrailingZeros(Mask); + if (LeadingZeros == 32 && TrailingZeros > ShAmt) { + SDNode *SRLIW = CurDAG->getMachineNode( + RISCV::SRLIW, DL, VT, N0->getOperand(0), + CurDAG->getTargetConstant(TrailingZeros, DL, VT)); + SDNode *SLLI = CurDAG->getMachineNode( + RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), + CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT)); + ReplaceNode(Node, SLLI); + return; + } + } + + // Optimize (srl (and X, C2), C) -> + // (srli (slli X, (XLen-C3), (XLen-C3) + C) + // Where C2 is a mask with C3 trailing ones. + // Taking into account that the C2 may have had lower bits unset by + // SimplifyDemandedBits. This avoids materializing the C2 immediate. + // This pattern occurs when type legalizing right shifts for types with + // less than XLen bits. Mask |= maskTrailingOnes<uint64_t>(ShAmt); if (!isMask_64(Mask)) break; @@ -700,13 +863,12 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { uint64_t C1 = N1C->getZExtValue(); - // Keep track of whether this is a andi, zext.h, or zext.w. - bool ZExtOrANDI = isInt<12>(N1C->getSExtValue()); - if (C1 == UINT64_C(0xFFFF) && - (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) - ZExtOrANDI = true; - if (C1 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba()) - ZExtOrANDI = true; + // Keep track of whether this is a c.andi. If we can't use c.andi, the + // shift pair might offer more compression opportunities. + // TODO: We could check for C extension here, but we don't have many lit + // tests with the C extension enabled so not checking gets better coverage. + // TODO: What if ANDI faster than shift? + bool IsCANDI = isInt<6>(N1C->getSExtValue()); // Clear irrelevant bits in the mask. if (LeftShift) @@ -727,9 +889,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (C2 < C3) { // If the number of leading zeros is C2+32 this can be SRLIW. if (C2 + 32 == C3) { - SDNode *SRLIW = - CurDAG->getMachineNode(RISCV::SRLIW, DL, XLenVT, X, - CurDAG->getTargetConstant(C2, DL, XLenVT)); + SDNode *SRLIW = CurDAG->getMachineNode( + RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); ReplaceNode(Node, SRLIW); return; } @@ -739,27 +900,33 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type // legalized and goes through DAG combine. - SDValue Y; if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() && - selectSExti32(X, Y)) { + X.getOpcode() == ISD::SIGN_EXTEND_INREG && + cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) { SDNode *SRAIW = - CurDAG->getMachineNode(RISCV::SRAIW, DL, XLenVT, Y, - CurDAG->getTargetConstant(31, DL, XLenVT)); + CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0), + CurDAG->getTargetConstant(31, DL, VT)); SDNode *SRLIW = CurDAG->getMachineNode( - RISCV::SRLIW, DL, XLenVT, SDValue(SRAIW, 0), - CurDAG->getTargetConstant(C3 - 32, DL, XLenVT)); + RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0), + CurDAG->getTargetConstant(C3 - 32, DL, VT)); ReplaceNode(Node, SRLIW); return; } // (srli (slli x, c3-c2), c3). - if (OneUseOrZExtW && !ZExtOrANDI) { + // Skip if we could use (zext.w (sraiw X, C2)). + bool Skip = Subtarget->hasStdExtZba() && C3 == 32 && + X.getOpcode() == ISD::SIGN_EXTEND_INREG && + cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32; + // Also Skip if we can use bexti. + Skip |= Subtarget->hasStdExtZbs() && C3 == XLen - 1; + if (OneUseOrZExtW && !Skip) { SDNode *SLLI = CurDAG->getMachineNode( - RISCV::SLLI, DL, XLenVT, X, - CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); + RISCV::SLLI, DL, VT, X, + CurDAG->getTargetConstant(C3 - C2, DL, VT)); SDNode *SRLI = - CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), - CurDAG->getTargetConstant(C3, DL, XLenVT)); + CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0), + CurDAG->getTargetConstant(C3, DL, VT)); ReplaceNode(Node, SRLI); return; } @@ -775,21 +942,20 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) { // Use slli.uw when possible. if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) { - SDNode *SLLI_UW = - CurDAG->getMachineNode(RISCV::SLLI_UW, DL, XLenVT, X, - CurDAG->getTargetConstant(C2, DL, XLenVT)); + SDNode *SLLI_UW = CurDAG->getMachineNode( + RISCV::SLLI_UW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT)); ReplaceNode(Node, SLLI_UW); return; } // (srli (slli c2+c3), c3) - if (OneUseOrZExtW && !ZExtOrANDI) { + if (OneUseOrZExtW && !IsCANDI) { SDNode *SLLI = CurDAG->getMachineNode( - RISCV::SLLI, DL, XLenVT, X, - CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); + RISCV::SLLI, DL, VT, X, + CurDAG->getTargetConstant(C2 + C3, DL, VT)); SDNode *SRLI = - CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0), - CurDAG->getTargetConstant(C3, DL, XLenVT)); + CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0), + CurDAG->getTargetConstant(C3, DL, VT)); ReplaceNode(Node, SRLI); return; } @@ -801,25 +967,31 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (!LeftShift && isShiftedMask_64(C1)) { uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); uint64_t C3 = countTrailingZeros(C1); - if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !ZExtOrANDI) { + if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsCANDI) { + unsigned SrliOpc = RISCV::SRLI; + // If the input is zexti32 we should use SRLIW. + if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) && + X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) { + SrliOpc = RISCV::SRLIW; + X = X.getOperand(0); + } SDNode *SRLI = CurDAG->getMachineNode( - RISCV::SRLI, DL, XLenVT, X, - CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); + SrliOpc, DL, VT, X, CurDAG->getTargetConstant(C2 + C3, DL, VT)); SDNode *SLLI = - CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), - CurDAG->getTargetConstant(C3, DL, XLenVT)); + CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0), + CurDAG->getTargetConstant(C3, DL, VT)); ReplaceNode(Node, SLLI); return; } // If the leading zero count is C2+32, we can use SRLIW instead of SRLI. if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 && - OneUseOrZExtW && !ZExtOrANDI) { - SDNode *SRLIW = CurDAG->getMachineNode( - RISCV::SRLIW, DL, XLenVT, X, - CurDAG->getTargetConstant(C2 + C3, DL, XLenVT)); + OneUseOrZExtW && !IsCANDI) { + SDNode *SRLIW = + CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X, + CurDAG->getTargetConstant(C2 + C3, DL, VT)); SDNode *SLLI = - CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), - CurDAG->getTargetConstant(C3, DL, XLenVT)); + CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), + CurDAG->getTargetConstant(C3, DL, VT)); ReplaceNode(Node, SLLI); return; } @@ -830,24 +1002,23 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (LeftShift && isShiftedMask_64(C1)) { uint64_t Leading = XLen - (64 - countLeadingZeros(C1)); uint64_t C3 = countTrailingZeros(C1); - if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !ZExtOrANDI) { + if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsCANDI) { SDNode *SRLI = CurDAG->getMachineNode( - RISCV::SRLI, DL, XLenVT, X, - CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); + RISCV::SRLI, DL, VT, X, CurDAG->getTargetConstant(C3 - C2, DL, VT)); SDNode *SLLI = - CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0), - CurDAG->getTargetConstant(C3, DL, XLenVT)); + CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0), + CurDAG->getTargetConstant(C3, DL, VT)); ReplaceNode(Node, SLLI); return; } // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI. - if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !ZExtOrANDI) { - SDNode *SRLIW = CurDAG->getMachineNode( - RISCV::SRLIW, DL, XLenVT, X, - CurDAG->getTargetConstant(C3 - C2, DL, XLenVT)); + if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) { + SDNode *SRLIW = + CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X, + CurDAG->getTargetConstant(C3 - C2, DL, VT)); SDNode *SLLI = - CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0), - CurDAG->getTargetConstant(C3, DL, XLenVT)); + CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), + CurDAG->getTargetConstant(C3, DL, VT)); ReplaceNode(Node, SLLI); return; } @@ -908,7 +1079,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { uint64_t ShiftedC1 = C1 << ConstantShift; // If this RV32, we need to sign extend the constant. if (XLen == 32) - ShiftedC1 = SignExtend64(ShiftedC1, 32); + ShiftedC1 = SignExtend64<32>(ShiftedC1); // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget); @@ -1005,45 +1176,44 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { } MVT Src1VT = Src1.getSimpleValueType(); unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, - VMSetOpcode, VMANDOpcode; + VMOROpcode; switch (RISCVTargetLowering::getLMUL(Src1VT)) { default: llvm_unreachable("Unexpected LMUL!"); -#define CASE_VMSLT_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ +#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \ case RISCVII::VLMUL::lmulenum: \ VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ : RISCV::PseudoVMSLT_VX_##suffix; \ VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ - VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ break; - CASE_VMSLT_VMSET_OPCODES(LMUL_F8, MF8, B1) - CASE_VMSLT_VMSET_OPCODES(LMUL_F4, MF4, B2) - CASE_VMSLT_VMSET_OPCODES(LMUL_F2, MF2, B4) - CASE_VMSLT_VMSET_OPCODES(LMUL_1, M1, B8) - CASE_VMSLT_VMSET_OPCODES(LMUL_2, M2, B16) - CASE_VMSLT_VMSET_OPCODES(LMUL_4, M4, B32) - CASE_VMSLT_VMSET_OPCODES(LMUL_8, M8, B64) -#undef CASE_VMSLT_VMSET_OPCODES + CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1) + CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2) + CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4) + CASE_VMSLT_OPCODES(LMUL_1, M1, B8) + CASE_VMSLT_OPCODES(LMUL_2, M2, B16) + CASE_VMSLT_OPCODES(LMUL_4, M4, B32) + CASE_VMSLT_OPCODES(LMUL_8, M8, B64) +#undef CASE_VMSLT_OPCODES } // Mask operations use the LMUL from the mask type. switch (RISCVTargetLowering::getLMUL(VT)) { default: llvm_unreachable("Unexpected LMUL!"); -#define CASE_VMXOR_VMANDN_VMAND_OPCODES(lmulenum, suffix) \ +#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \ case RISCVII::VLMUL::lmulenum: \ VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ - VMANDOpcode = RISCV::PseudoVMAND_MM_##suffix; \ + VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \ break; - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F8, MF8) - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F4, MF4) - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F2, MF2) - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_1, M1) - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_2, M2) - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_4, M4) - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_8, M8) -#undef CASE_VMXOR_VMANDN_VMAND_OPCODES + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8) + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4) + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2) + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1) + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2) + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4) + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8) +#undef CASE_VMXOR_VMANDN_VMOR_OPCODES } SDValue SEW = CurDAG->getTargetConstant( Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); @@ -1053,12 +1223,17 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { SDValue MaskedOff = Node->getOperand(1); SDValue Mask = Node->getOperand(4); - // If vmsgeu_mask with 0 immediate, expand it to {vmset, vmand}. + // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff. if (IsCmpUnsignedZero) { - SDValue VMSet = - SDValue(CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW), 0); - ReplaceNode(Node, CurDAG->getMachineNode(VMANDOpcode, DL, VT, - {Mask, VMSet, VL, MaskSEW})); + // We don't need vmor if the MaskedOff and the Mask are the same + // value. + if (Mask == MaskedOff) { + ReplaceUses(Node, Mask.getNode()); + return; + } + ReplaceNode(Node, + CurDAG->getMachineNode(VMOROpcode, DL, VT, + {Mask, MaskedOff, VL, MaskSEW})); return; } @@ -1082,10 +1257,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // Otherwise use // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 + // The result is mask undisturbed. + // We use the same instructions to emulate mask agnostic behavior, because + // the agnostic result can be either undisturbed or all 1. SDValue Cmp = SDValue( CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), 0); + // vmxor.mm vd, vd, v0 is used to update active value. ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, {Cmp, Mask, VL, MaskSEW})); return; @@ -1215,7 +1394,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { unsigned CurOp = 2; // Masked intrinsic only have TU version pseduo instructions. - bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef()); + bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef(); SmallVector<SDValue, 8> Operands; if (IsTU) Operands.push_back(Node->getOperand(CurOp++)); @@ -1267,9 +1446,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // The riscv_vlm intrinsic are always tail agnostic and no passthru operand. bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; // Masked intrinsic only have TU version pseduo instructions. - bool IsTU = - HasPassthruOperand && - ((!IsMasked && !Node->getOperand(CurOp).isUndef()) || IsMasked); + bool IsTU = HasPassthruOperand && + (IsMasked || !Node->getOperand(CurOp).isUndef()); SmallVector<SDValue, 8> Operands; if (IsTU) Operands.push_back(Node->getOperand(CurOp++)); @@ -1302,7 +1480,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { unsigned CurOp = 2; // Masked intrinsic only have TU version pseduo instructions. - bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef()); + bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef(); SmallVector<SDValue, 7> Operands; if (IsTU) Operands.push_back(Node->getOperand(CurOp++)); @@ -1318,19 +1496,12 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, Log2SEW, static_cast<unsigned>(LMUL)); - MachineSDNode *Load = - CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0), - MVT::Other, MVT::Glue, Operands); - SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT, - /*Glue*/ SDValue(Load, 2)); - + MachineSDNode *Load = CurDAG->getMachineNode( + P->Pseudo, DL, Node->getVTList(), Operands); if (auto *MemOp = dyn_cast<MemSDNode>(Node)) CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); - ReplaceUses(SDValue(Node, 0), SDValue(Load, 0)); - ReplaceUses(SDValue(Node, 1), SDValue(ReadVL, 0)); // VL - ReplaceUses(SDValue(Node, 2), SDValue(Load, 1)); // Chain - CurDAG->RemoveDeadNode(Node); + ReplaceNode(Node, Load); return; } } @@ -1610,9 +1781,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // Try to match splat of a scalar load to a strided load with stride of x0. bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL || Node->getOpcode() == RISCVISD::VFMV_S_F_VL; - if (IsScalarMove && !Node->getOperand(0).isUndef()) + bool HasPassthruOperand = Node->getOpcode() != ISD::SPLAT_VECTOR; + if (HasPassthruOperand && !Node->getOperand(0).isUndef()) break; - SDValue Src = IsScalarMove ? Node->getOperand(1) : Node->getOperand(0); + SDValue Src = HasPassthruOperand ? Node->getOperand(1) : Node->getOperand(0); auto *Ld = dyn_cast<LoadSDNode>(Src); if (!Ld) break; @@ -1634,7 +1806,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { break; selectVLOp(Node->getOperand(2), VL); } else - selectVLOp(Node->getOperand(1), VL); + selectVLOp(Node->getOperand(2), VL); unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); @@ -1650,8 +1822,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); - if (auto *MemOp = dyn_cast<MemSDNode>(Node)) - CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()}); + CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()}); ReplaceNode(Node, Load); return; @@ -1680,11 +1851,37 @@ bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand( return true; } -bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) { +bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base, + SDValue &Offset) { if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT()); + Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT()); return true; } + + return false; +} + +// Select a frame index and an optional immediate offset from an ADD or OR. +bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) { + if (SelectAddrFrameIndex(Addr, Base, Offset)) + return true; + + if (!CurDAG->isBaseWithConstantOffset(Addr)) + return false; + + if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) { + int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); + if (isInt<12>(CVal)) { + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), + Subtarget->getXLenVT()); + Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr), + Subtarget->getXLenVT()); + return true; + } + } + return false; } @@ -1698,6 +1895,76 @@ bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) { return true; } +bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) { + if (SelectAddrFrameIndex(Addr, Base, Offset)) + return true; + + SDLoc DL(Addr); + MVT VT = Addr.getSimpleValueType(); + + if (Addr.getOpcode() == RISCVISD::ADD_LO) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } + + if (CurDAG->isBaseWithConstantOffset(Addr)) { + int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); + if (isInt<12>(CVal)) { + Base = Addr.getOperand(0); + if (Base.getOpcode() == RISCVISD::ADD_LO) { + SDValue LoOperand = Base.getOperand(1); + if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) { + // If the Lo in (ADD_LO hi, lo) is a global variable's address + // (its low part, really), then we can rely on the alignment of that + // variable to provide a margin of safety before low part can overflow + // the 12 bits of the load/store offset. Check if CVal falls within + // that margin; if so (low part + CVal) can't overflow. + const DataLayout &DL = CurDAG->getDataLayout(); + Align Alignment = commonAlignment( + GA->getGlobal()->getPointerAlignment(DL), GA->getOffset()); + if (CVal == 0 || Alignment > CVal) { + int64_t CombinedOffset = CVal + GA->getOffset(); + Base = Base.getOperand(0); + Offset = CurDAG->getTargetGlobalAddress( + GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(), + CombinedOffset, GA->getTargetFlags()); + return true; + } + } + } + + if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base)) + Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT); + Offset = CurDAG->getTargetConstant(CVal, DL, VT); + return true; + } + } + + // Handle ADD with large immediates. + if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) { + int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); + assert(!isInt<12>(CVal) && "simm12 not already handled?"); + + if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) { + // We can use an ADDI for part of the offset and fold the rest into the + // load/store. This mirrors the AddiPair PatFrag in RISCVInstrInfo.td. + int64_t Adj = CVal < 0 ? -2048 : 2047; + Base = SDValue( + CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0), + CurDAG->getTargetConstant(Adj, DL, VT)), + 0); + Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT); + return true; + } + } + + Base = Addr; + Offset = CurDAG->getTargetConstant(0, DL, VT); + return true; +} + bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt) { // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift @@ -1723,6 +1990,21 @@ bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, ShAmt = N.getOperand(0); return true; } + } else if (N.getOpcode() == ISD::SUB && + isa<ConstantSDNode>(N.getOperand(0))) { + uint64_t Imm = N.getConstantOperandVal(0); + // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to + // generate a NEG instead of a SUB of a constant. + if (Imm != 0 && Imm % ShiftWidth == 0) { + SDLoc DL(N); + EVT VT = N.getValueType(); + SDValue Zero = CurDAG->getRegister(RISCV::X0, VT); + unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB; + MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero, + N.getOperand(1)); + ShAmt = SDValue(Neg, 0); + return true; + } } ShAmt = N; @@ -1778,6 +2060,8 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL || Node->getOpcode() == ISD::SRL || Node->getOpcode() == ISD::SIGN_EXTEND_INREG || + Node->getOpcode() == RISCVISD::GREV || + Node->getOpcode() == RISCVISD::GORC || isa<ConstantSDNode>(Node)) && "Unexpected opcode"); @@ -1812,6 +2096,7 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { case RISCV::CTZW: case RISCV::CPOPW: case RISCV::SLLI_UW: + case RISCV::FMV_W_X: case RISCV::FCVT_H_W: case RISCV::FCVT_H_WU: case RISCV::FCVT_S_W: @@ -1835,6 +2120,7 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { return false; break; case RISCV::SEXT_H: + case RISCV::FMV_H_X: case RISCV::ZEXT_H_RV32: case RISCV::ZEXT_H_RV64: if (Bits < 16) @@ -1871,22 +2157,32 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const { // allows us to choose betwen VSETIVLI or VSETVLI later. bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) { auto *C = dyn_cast<ConstantSDNode>(N); - if (C && (isUInt<5>(C->getZExtValue()) || - C->getSExtValue() == RISCV::VLMaxSentinel)) + if (C && isUInt<5>(C->getZExtValue())) { VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N), N->getValueType(0)); - else + } else if (C && C->isAllOnesValue()) { + // Treat all ones as VLMax. + VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), + N->getValueType(0)); + } else if (isa<RegisterSDNode>(N) && + cast<RegisterSDNode>(N)->getReg() == RISCV::X0) { + // All our VL operands use an operand that allows GPRNoX0 or an immediate + // as the register class. Convert X0 to a special immediate to pass the + // MachineVerifier. This is recognized specially by the vsetvli insertion + // pass. + VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N), + N->getValueType(0)); + } else { VL = N; + } return true; } bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) { - if (N.getOpcode() != ISD::SPLAT_VECTOR && - N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 && - N.getOpcode() != RISCVISD::VMV_V_X_VL) + if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef()) return false; - SplatVal = N.getOperand(0); + SplatVal = N.getOperand(1); return true; } @@ -1896,23 +2192,22 @@ static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, ValidateFn ValidateImm) { - if ((N.getOpcode() != ISD::SPLAT_VECTOR && - N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 && - N.getOpcode() != RISCVISD::VMV_V_X_VL) || - !isa<ConstantSDNode>(N.getOperand(0))) + if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || + !isa<ConstantSDNode>(N.getOperand(1))) return false; - int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue(); + int64_t SplatImm = + cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); - // ISD::SPLAT_VECTOR, RISCVISD::SPLAT_VECTOR_I64 and RISCVISD::VMV_V_X_VL - // share semantics when the operand type is wider than the resulting vector - // element type: an implicit truncation first takes place. Therefore, perform - // a manual truncation/sign-extension in order to ignore any truncated bits - // and catch any zero-extended immediate. + // The semantics of RISCVISD::VMV_V_X_VL is that when the operand + // type is wider than the resulting vector element type: an implicit + // truncation first takes place. Therefore, perform a manual + // truncation/sign-extension in order to ignore any truncated bits and catch + // any zero-extended immediate. // For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first // sign-extending to (XLenVT -1). MVT XLenVT = Subtarget.getXLenVT(); - assert(XLenVT == N.getOperand(0).getSimpleValueType() && + assert(XLenVT == N.getOperand(1).getSimpleValueType() && "Unexpected splat operand type"); MVT EltVT = N.getSimpleValueType().getVectorElementType(); if (EltVT.bitsLT(XLenVT)) @@ -1945,13 +2240,12 @@ bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, } bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) { - if ((N.getOpcode() != ISD::SPLAT_VECTOR && - N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 && - N.getOpcode() != RISCVISD::VMV_V_X_VL) || - !isa<ConstantSDNode>(N.getOperand(0))) + if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() || + !isa<ConstantSDNode>(N.getOperand(1))) return false; - int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue(); + int64_t SplatImm = + cast<ConstantSDNode>(N.getOperand(1))->getSExtValue(); if (!isUInt<5>(SplatImm)) return false; @@ -1980,49 +2274,42 @@ bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width, // Merge an ADDI into the offset of a load/store instruction where possible. // (load (addi base, off1), off2) -> (load base, off1+off2) // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) +// (load (add base, (addi src, off1)), off2) +// -> (load (add base, src), off1+off2) +// (store val, (add base, (addi src, off1)), off2) +// -> (store val, (add base, src), off1+off2) // This is possible when off1+off2 fits a 12-bit immediate. bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { - int OffsetOpIdx; - int BaseOpIdx; - - // Only attempt this optimisation for I-type loads and S-type stores. - switch (N->getMachineOpcode()) { - default: + unsigned OffsetOpIdx, BaseOpIdx; + if (!hasMemOffset(N, BaseOpIdx, OffsetOpIdx)) return false; - case RISCV::LB: - case RISCV::LH: - case RISCV::LW: - case RISCV::LBU: - case RISCV::LHU: - case RISCV::LWU: - case RISCV::LD: - case RISCV::FLH: - case RISCV::FLW: - case RISCV::FLD: - BaseOpIdx = 0; - OffsetOpIdx = 1; - break; - case RISCV::SB: - case RISCV::SH: - case RISCV::SW: - case RISCV::SD: - case RISCV::FSH: - case RISCV::FSW: - case RISCV::FSD: - BaseOpIdx = 1; - OffsetOpIdx = 2; - break; - } if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx))) return false; SDValue Base = N->getOperand(BaseOpIdx); - // If the base is an ADDI, we can merge it in to the load/store. - if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI) + if (!Base.isMachineOpcode()) return false; + if (Base.getMachineOpcode() == RISCV::ADDI) { + // If the base is an ADDI, we can merge it in to the load/store. + } else if (Base.getMachineOpcode() == RISCV::ADDIW && + isa<ConstantSDNode>(Base.getOperand(1)) && + Base.getOperand(0).isMachineOpcode() && + Base.getOperand(0).getMachineOpcode() == RISCV::LUI && + isa<ConstantSDNode>(Base.getOperand(0).getOperand(0))) { + // ADDIW can be merged if it's part of LUI+ADDIW constant materialization + // and LUI+ADDI would have produced the same result. This is true for all + // simm32 values except 0x7ffff800-0x7fffffff. + int64_t Offset = + SignExtend64<32>(Base.getOperand(0).getConstantOperandVal(0) << 12); + Offset += cast<ConstantSDNode>(Base.getOperand(1))->getSExtValue(); + if (!isInt<32>(Offset)) + return false; + } else + return false; + SDValue ImmOperand = Base.getOperand(1); uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx); @@ -2039,7 +2326,8 @@ bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { // to provide a margin of safety before off1 can overflow the 12 bits. // Check if off2 falls within that margin; if so off1+off2 can't overflow. const DataLayout &DL = CurDAG->getDataLayout(); - Align Alignment = GA->getGlobal()->getPointerAlignment(DL); + Align Alignment = commonAlignment(GA->getGlobal()->getPointerAlignment(DL), + GA->getOffset()); if (Offset2 != 0 && Alignment <= Offset2) return false; int64_t Offset1 = GA->getOffset(); @@ -2049,7 +2337,7 @@ bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { CombinedOffset, GA->getTargetFlags()); } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) { // Ditto. - Align Alignment = CP->getAlign(); + Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset()); if (Offset2 != 0 && Alignment <= Offset2) return false; int64_t Offset1 = CP->getOffset(); @@ -2068,12 +2356,13 @@ bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { LLVM_DEBUG(dbgs() << "\n"); // Modify the offset operand of the load/store. - if (BaseOpIdx == 0) // Load - CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, - N->getOperand(2)); - else // Store - CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), - ImmOperand, N->getOperand(3)); + if (BaseOpIdx == 0) { // Load + N = CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, + N->getOperand(2)); + } else { // Store + N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), + ImmOperand, N->getOperand(3)); + } return true; } @@ -2130,6 +2419,8 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { case RISCV::SUBW: case RISCV::MULW: case RISCV::SLLIW: + case RISCV::GREVIW: + case RISCV::GORCIW: // Result is already sign extended just remove the sext.w. // NOTE: We only handle the nodes that are selected with hasAllWUsers. ReplaceUses(N, N0.getNode()); @@ -2139,8 +2430,113 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { return false; } +// Optimize masked RVV pseudo instructions with a known all-ones mask to their +// corresponding "unmasked" pseudo versions. The mask we're interested in will +// take the form of a V0 physical register operand, with a glued +// register-setting instruction. +bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) { + const RISCV::RISCVMaskedPseudoInfo *I = + RISCV::getMaskedPseudoInfo(N->getMachineOpcode()); + if (!I) + return false; + + unsigned MaskOpIdx = I->MaskOpIdx; + + // Check that we're using V0 as a mask register. + if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) || + cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0) + return false; + + // The glued user defines V0. + const auto *Glued = N->getGluedNode(); + + if (!Glued || Glued->getOpcode() != ISD::CopyToReg) + return false; + + // Check that we're defining V0 as a mask register. + if (!isa<RegisterSDNode>(Glued->getOperand(1)) || + cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0) + return false; + + // Check the instruction defining V0; it needs to be a VMSET pseudo. + SDValue MaskSetter = Glued->getOperand(2); + + const auto IsVMSet = [](unsigned Opc) { + return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || + Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || + Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 || + Opc == RISCV::PseudoVMSET_M_B8; + }; + + // TODO: Check that the VMSET is the expected bitwidth? The pseudo has + // undefined behaviour if it's the wrong bitwidth, so we could choose to + // assume that it's all-ones? Same applies to its VL. + if (!MaskSetter->isMachineOpcode() || !IsVMSet(MaskSetter.getMachineOpcode())) + return false; + + // Retrieve the tail policy operand index, if any. + Optional<unsigned> TailPolicyOpIdx; + const RISCVInstrInfo &TII = *Subtarget->getInstrInfo(); + const MCInstrDesc &MaskedMCID = TII.get(N->getMachineOpcode()); + + bool IsTA = true; + if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) { + // The last operand of the pseudo is the policy op, but we might have a + // Glue operand last. We might also have a chain. + TailPolicyOpIdx = N->getNumOperands() - 1; + if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Glue) + (*TailPolicyOpIdx)--; + if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Other) + (*TailPolicyOpIdx)--; + + if (!(N->getConstantOperandVal(*TailPolicyOpIdx) & + RISCVII::TAIL_AGNOSTIC)) { + // Keep the true-masked instruction when there is no unmasked TU + // instruction + if (I->UnmaskedTUPseudo == I->MaskedPseudo && !N->getOperand(0).isUndef()) + return false; + // We can't use TA if the tie-operand is not IMPLICIT_DEF + if (!N->getOperand(0).isUndef()) + IsTA = false; + } + } + + unsigned Opc = IsTA ? I->UnmaskedPseudo : I->UnmaskedTUPseudo; + + // Check that we're dropping the mask operand and any policy operand + // when we transform to this unmasked pseudo. Additionally, if this insturtion + // is tail agnostic, the unmasked instruction should not have a merge op. + uint64_t TSFlags = TII.get(Opc).TSFlags; + assert((IsTA != RISCVII::hasMergeOp(TSFlags)) && + RISCVII::hasDummyMaskOp(TSFlags) && + !RISCVII::hasVecPolicyOp(TSFlags) && + "Unexpected pseudo to transform to"); + (void)TSFlags; + + SmallVector<SDValue, 8> Ops; + // Skip the merge operand at index 0 if IsTA + for (unsigned I = IsTA, E = N->getNumOperands(); I != E; I++) { + // Skip the mask, the policy, and the Glue. + SDValue Op = N->getOperand(I); + if (I == MaskOpIdx || I == TailPolicyOpIdx || + Op.getValueType() == MVT::Glue) + continue; + Ops.push_back(Op); + } + + // Transitively apply any node glued to our new node. + if (auto *TGlued = Glued->getGluedNode()) + Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); + + SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); + ReplaceUses(N, Result); + + return true; +} + // This pass converts a legalized DAG into a RISCV-specific DAG, ready // for instruction scheduling. -FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) { - return new RISCVDAGToDAGISel(TM); +FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM, + CodeGenOpt::Level OptLevel) { + return new RISCVDAGToDAGISel(TM, OptLevel); } |
