diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2021-12-02 21:02:54 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2021-12-02 21:02:54 +0000 |
| commit | f65dcba83ce5035ab88a85fe17628b447eb56e1b (patch) | |
| tree | 35f37bb72b3cfc6060193e66c76ee7c9478969b0 /llvm/lib/Target/RISCV | |
| parent | 846a2208a8ab099f595fe7e8b2e6d54a7b5e67fb (diff) | |
Diffstat (limited to 'llvm/lib/Target/RISCV')
| -rw-r--r-- | llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 16 | ||||
| -rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 137 | ||||
| -rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelLowering.h | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfo.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 28 | ||||
| -rw-r--r-- | llvm/lib/Target/RISCV/RISCVRegisterInfo.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/RISCV/RISCVSystemOperands.td | 2 |
9 files changed, 186 insertions, 13 deletions
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index d1979b5456ce..f1c3810f4ee5 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -170,6 +170,14 @@ void RISCVInstPrinter::printAtomicMemOp(const MCInst *MI, unsigned OpNo, void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNo).getImm(); + // Print the raw immediate for reserved values: vlmul[2:0]=4, vsew[2:0]=0b1xx, + // or non-zero bits 8/9/10. + if (RISCVVType::getVLMUL(Imm) == RISCVII::VLMUL::LMUL_RESERVED || + RISCVVType::getSEW(Imm) > 64 || (Imm & 0x700) != 0) { + O << Imm; + return; + } + // Print the text form. RISCVVType::printVType(Imm, O); } diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 595c3cdfbb1d..f5d491938050 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -237,7 +237,13 @@ bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *TRI = STI.getRegisterInfo(); - return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF); + // If we do not reserve stack space for outgoing arguments in prologue, + // we will adjust the stack pointer before call instruction. After the + // adjustment, we can not use SP to access the stack objects for the + // arguments. Instead, use BP to access these stack objects. + return (MFI.hasVarSizedObjects() || + (!hasReservedCallFrame(MF) && MFI.getMaxCallFrameSize() != 0)) && + TRI->hasStackRealignment(MF); } // Determines the size of the frame and maximum call frame size. @@ -1065,10 +1071,14 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters( if (MI != MBB.end() && !MI->isDebugInstr()) DL = MI->getDebugLoc(); - // Manually restore values not restored by libcall. Insert in reverse order. + // Manually restore values not restored by libcall. + // Keep the same order as in the prologue. There is no need to reverse the + // order in the epilogue. In addition, the return address will be restored + // first in the epilogue. It increases the opportunity to avoid the + // load-to-use data hazard between loading RA and return by RA. // loadRegFromStackSlot can insert multiple instructions. const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI); - for (auto &CS : reverse(NonLibcallCSI)) { + for (auto &CS : NonLibcallCSI) { Register Reg = CS.getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 0f1a6e5f9154..f3331571fc55 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -335,17 +335,29 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); setOperationAction(ISD::SELECT, MVT::f16, Custom); setOperationAction(ISD::BR_CC, MVT::f16, Expand); - for (auto Op : FPOpToExpand) - setOperationAction(Op, MVT::f16, Expand); setOperationAction(ISD::FREM, MVT::f16, Promote); - setOperationAction(ISD::FCEIL, MVT::f16, Promote); - setOperationAction(ISD::FFLOOR, MVT::f16, Promote); - setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); - setOperationAction(ISD::FRINT, MVT::f16, Promote); - setOperationAction(ISD::FROUND, MVT::f16, Promote); - setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote); - setOperationAction(ISD::FTRUNC, MVT::f16, Promote); + setOperationAction(ISD::FCEIL, MVT::f16, Promote); + setOperationAction(ISD::FFLOOR, MVT::f16, Promote); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); + setOperationAction(ISD::FRINT, MVT::f16, Promote); + setOperationAction(ISD::FROUND, MVT::f16, Promote); + setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote); + setOperationAction(ISD::FTRUNC, MVT::f16, Promote); + setOperationAction(ISD::FPOW, MVT::f16, Promote); + setOperationAction(ISD::FPOWI, MVT::f16, Promote); + setOperationAction(ISD::FCOS, MVT::f16, Promote); + setOperationAction(ISD::FSIN, MVT::f16, Promote); + setOperationAction(ISD::FSINCOS, MVT::f16, Promote); + setOperationAction(ISD::FEXP, MVT::f16, Promote); + setOperationAction(ISD::FEXP2, MVT::f16, Promote); + setOperationAction(ISD::FLOG, MVT::f16, Promote); + setOperationAction(ISD::FLOG2, MVT::f16, Promote); + setOperationAction(ISD::FLOG10, MVT::f16, Promote); + + // We need to custom promote this. + if (Subtarget.is64Bit()) + setOperationAction(ISD::FPOWI, MVT::i32, Custom); } if (Subtarget.hasStdExtF()) { @@ -676,6 +688,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMINNUM, VT, Legal); setOperationAction(ISD::FMAXNUM, VT, Legal); + setOperationAction(ISD::FTRUNC, VT, Custom); + setOperationAction(ISD::FCEIL, VT, Custom); + setOperationAction(ISD::FFLOOR, VT, Custom); + setOperationAction(ISD::VECREDUCE_FADD, VT, Custom); setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom); setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom); @@ -924,6 +940,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_ROUND, VT, Custom); setOperationAction(ISD::FP_EXTEND, VT, Custom); + setOperationAction(ISD::FTRUNC, VT, Custom); + setOperationAction(ISD::FCEIL, VT, Custom); + setOperationAction(ISD::FFLOOR, VT, Custom); + for (auto CC : VFPCCToExpand) setCondCodeAction(CC, VT, Expand); @@ -1165,6 +1185,10 @@ bool RISCVTargetLowering::shouldSinkOperands( case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: return Operand == 1; case Instruction::Call: if (auto *II = dyn_cast<IntrinsicInst>(I)) { @@ -1631,6 +1655,66 @@ static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) { return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); } +// Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain +// and back. Taking care to avoid converting values that are nan or already +// correct. +// TODO: Floor and ceil could be shorter by changing rounding mode, but we don't +// have FRM dependencies modeled yet. +static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + assert(VT.isVector() && "Unexpected type"); + + SDLoc DL(Op); + + // Freeze the source since we are increasing the number of uses. + SDValue Src = DAG.getNode(ISD::FREEZE, DL, VT, Op.getOperand(0)); + + // Truncate to integer and convert back to FP. + MVT IntVT = VT.changeVectorElementTypeToInteger(); + SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Src); + Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated); + + MVT SetccVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount()); + + if (Op.getOpcode() == ISD::FCEIL) { + // If the truncated value is the greater than or equal to the original + // value, we've computed the ceil. Otherwise, we went the wrong way and + // need to increase by 1. + // FIXME: This should use a masked operation. Handle here or in isel? + SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Truncated, + DAG.getConstantFP(1.0, DL, VT)); + SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOLT); + Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated); + } else if (Op.getOpcode() == ISD::FFLOOR) { + // If the truncated value is the less than or equal to the original value, + // we've computed the floor. Otherwise, we went the wrong way and need to + // decrease by 1. + // FIXME: This should use a masked operation. Handle here or in isel? + SDValue Adjust = DAG.getNode(ISD::FSUB, DL, VT, Truncated, + DAG.getConstantFP(1.0, DL, VT)); + SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOGT); + Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated); + } + + // Restore the original sign so that -0.0 is preserved. + Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src); + + // Determine the largest integer that can be represented exactly. This and + // values larger than it don't have any fractional bits so don't need to + // be converted. + const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); + unsigned Precision = APFloat::semanticsPrecision(FltSem); + APFloat MaxVal = APFloat(FltSem); + MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1), + /*IsSigned*/ false, APFloat::rmNearestTiesToEven); + SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT); + + // If abs(Src) was larger than MaxVal or nan, keep it. + SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src); + SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT); + return DAG.getSelect(DL, VT, Setcc, Truncated, Src); +} + static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); @@ -2670,6 +2754,20 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, DAG.getConstant(3, DL, VT)); return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0)); } + case ISD::FPOWI: { + // Custom promote f16 powi with illegal i32 integer type on RV64. Once + // promoted this will be legalized into a libcall by LegalizeIntegerTypes. + if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() && + Op.getOperand(1).getValueType() == MVT::i32) { + SDLoc DL(Op); + SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0)); + SDValue Powi = + DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1)); + return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi, + DAG.getIntPtrConstant(0, DL)); + } + return SDValue(); + } case ISD::FP_EXTEND: { // RVV can only do fp_extend to types double the size as the source. We // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going @@ -2858,6 +2956,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: return lowerFP_TO_INT_SAT(Op, DAG); + case ISD::FTRUNC: + case ISD::FCEIL: + case ISD::FFLOOR: + return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG); case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_UMAX: case ISD::VECREDUCE_SMAX: @@ -9834,6 +9936,23 @@ bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const { return false; } +bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT, + EVT VT) const { + if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple()) + return false; + + switch (FPVT.getSimpleVT().SimpleTy) { + case MVT::f16: + return Subtarget.hasStdExtZfh(); + case MVT::f32: + return Subtarget.hasStdExtF(); + case MVT::f64: + return Subtarget.hasStdExtD(); + default: + return false; + } +} + bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const { VT = VT.getScalarType(); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 8e3d716ae919..849928eb46ae 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -514,6 +514,8 @@ public: bool isLegalElementTypeForRVV(Type *ScalarTy) const; + bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; + private: /// RISCVCCAssignFn - This target-specific function extends the default /// CCValAssign with additional information used to lower RISC-V calling diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index b653928ccea9..6f9cde966132 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -705,6 +705,7 @@ def PseudoLD : PseudoLoad<"ld">; def PseudoSD : PseudoStore<"sd">; } // Predicates = [IsRV64] +def : InstAlias<"li $rd, $imm", (ADDI GPR:$rd, X0, simm12:$imm)>; def : InstAlias<"mv $rd, $rs", (ADDI GPR:$rd, GPR:$rs, 0)>; def : InstAlias<"not $rd, $rs", (XORI GPR:$rd, GPR:$rs, -1)>; def : InstAlias<"neg $rd, $rs", (SUB GPR:$rd, X0, GPR:$rs)>; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 388cce00bdf3..798532d5bc44 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Support/ErrorHandling.h" #define GET_REGINFO_TARGET_DESC @@ -320,3 +321,30 @@ RISCVRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC, return &RISCV::VRRegClass; return RC; } + +void RISCVRegisterInfo::getOffsetOpcodes(const StackOffset &Offset, + SmallVectorImpl<uint64_t> &Ops) const { + // VLENB is the length of a vector register in bytes. We use <vscale x 8 x i8> + // to represent one vector register. The dwarf offset is + // VLENB * scalable_offset / 8. + assert(Offset.getScalable() % 8 == 0 && "Invalid frame offset"); + + // Add fixed-sized offset using existing DIExpression interface. + DIExpression::appendOffset(Ops, Offset.getFixed()); + + unsigned VLENB = getDwarfRegNum(RISCV::VLENB, true); + int64_t VLENBSized = Offset.getScalable() / 8; + if (VLENBSized > 0) { + Ops.push_back(dwarf::DW_OP_constu); + Ops.push_back(VLENBSized); + Ops.append({dwarf::DW_OP_bregx, VLENB, 0ULL}); + Ops.push_back(dwarf::DW_OP_mul); + Ops.push_back(dwarf::DW_OP_plus); + } else if (VLENBSized < 0) { + Ops.push_back(dwarf::DW_OP_constu); + Ops.push_back(-VLENBSized); + Ops.append({dwarf::DW_OP_bregx, VLENB, 0ULL}); + Ops.push_back(dwarf::DW_OP_mul); + Ops.push_back(dwarf::DW_OP_minus); + } +} diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h index 74a5b83ff6f3..2b2bbdfbdf32 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h @@ -63,6 +63,9 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const override; + + void getOffsetOpcodes(const StackOffset &Offset, + SmallVectorImpl<uint64_t> &Ops) const override; }; } diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index a915a572f3b7..a56f992d320e 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -480,6 +480,8 @@ let RegAltNameIndices = [ABIRegAltName] in { def VL : RISCVReg<0, "vl", ["vl"]>; def VXSAT : RISCVReg<0, "vxsat", ["vxsat"]>; def VXRM : RISCVReg<0, "vxrm", ["vxrm"]>; + def VLENB : RISCVReg<0, "vlenb", ["vlenb"]>, + DwarfRegNum<[!add(4096, SysRegVLENB.Encoding)]>; } foreach m = [1, 2, 4] in { diff --git a/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/llvm/lib/Target/RISCV/RISCVSystemOperands.td index 41599dd8bb3f..5a4c579dd708 100644 --- a/llvm/lib/Target/RISCV/RISCVSystemOperands.td +++ b/llvm/lib/Target/RISCV/RISCVSystemOperands.td @@ -388,4 +388,4 @@ def : SysReg<"vxrm", 0x00A>; def : SysReg<"vcsr", 0x00F>; def : SysReg<"vl", 0xC20>; def : SysReg<"vtype", 0xC21>; -def : SysReg<"vlenb", 0xC22>; +def SysRegVLENB: SysReg<"vlenb", 0xC22>; |
