diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:12 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 20:50:12 +0000 |
commit | e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (patch) | |
tree | 599ab169a01f1c86eda9adc774edaedde2f2db5b /lib/Target/RISCV/RISCVISelLowering.cpp | |
parent | 1a56a5ead7a2e84bee8240f5f6b033b5f1707154 (diff) | |
download | src-e6d1592492a3a379186bfb02bd0f4eda0669c0d5.tar.gz src-e6d1592492a3a379186bfb02bd0f4eda0669c0d5.zip |
Vendor import of stripped llvm trunk r366426 (just before the release_90vendor/llvm/llvm-trunk-r366426
branch point):
https://llvm.org/svn/llvm-project/llvm/trunk@366426
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=351278
svn path=/vendor/llvm/llvm-trunk-r366426/; revision=351279; tag=vendor/llvm/llvm-trunk-r366426
Diffstat (limited to 'lib/Target/RISCV/RISCVISelLowering.cpp')
-rw-r--r-- | lib/Target/RISCV/RISCVISelLowering.cpp | 1185 |
1 files changed, 1003 insertions, 182 deletions
diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp index 508dcbd009ed..ce7b85911ab6 100644 --- a/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1,9 +1,8 @@ //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -18,6 +17,8 @@ #include "RISCVRegisterInfo.h" #include "RISCVSubtarget.h" #include "RISCVTargetMachine.h" +#include "Utils/RISCVMatInt.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -43,6 +44,24 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { + if (Subtarget.isRV32E()) + report_fatal_error("Codegen not yet implemented for RV32E"); + + RISCVABI::ABI ABI = Subtarget.getTargetABI(); + assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); + + switch (ABI) { + default: + report_fatal_error("Don't know how to lower this ABI"); + case RISCVABI::ABI_ILP32: + case RISCVABI::ABI_ILP32F: + case RISCVABI::ABI_ILP32D: + case RISCVABI::ABI_LP64: + case RISCVABI::ABI_LP64F: + case RISCVABI::ABI_LP64D: + break; + } + MVT XLenVT = Subtarget.getXLenVT(); // Set up the register classes. @@ -81,10 +100,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); if (Subtarget.is64Bit()) { - setTargetDAGCombine(ISD::SHL); - setTargetDAGCombine(ISD::SRL); - setTargetDAGCombine(ISD::SRA); - setTargetDAGCombine(ISD::ANY_EXTEND); + setOperationAction(ISD::SHL, MVT::i32, Custom); + setOperationAction(ISD::SRA, MVT::i32, Custom); + setOperationAction(ISD::SRL, MVT::i32, Custom); } if (!Subtarget.hasStdExtM()) { @@ -97,14 +115,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::UREM, XLenVT, Expand); } + if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { + setOperationAction(ISD::SDIV, MVT::i32, Custom); + setOperationAction(ISD::UDIV, MVT::i32, Custom); + setOperationAction(ISD::UREM, MVT::i32, Custom); + } + setOperationAction(ISD::SDIVREM, XLenVT, Expand); setOperationAction(ISD::UDIVREM, XLenVT, Expand); setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); - setOperationAction(ISD::SHL_PARTS, XLenVT, Expand); - setOperationAction(ISD::SRL_PARTS, XLenVT, Expand); - setOperationAction(ISD::SRA_PARTS, XLenVT, Expand); + setOperationAction(ISD::SHL_PARTS, XLenVT, Custom); + setOperationAction(ISD::SRL_PARTS, XLenVT, Custom); + setOperationAction(ISD::SRA_PARTS, XLenVT, Custom); setOperationAction(ISD::ROTL, XLenVT, Expand); setOperationAction(ISD::ROTR, XLenVT, Expand); @@ -114,9 +138,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTPOP, XLenVT, Expand); ISD::CondCode FPCCToExtend[] = { - ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETO, ISD::SETUEQ, - ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, - ISD::SETGT, ISD::SETGE, ISD::SETNE}; + ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT, + ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT, + ISD::SETGE, ISD::SETNE}; ISD::NodeType FPOpToExtend[] = { ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM}; @@ -133,6 +157,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(Op, MVT::f32, Expand); } + if (Subtarget.hasStdExtF() && Subtarget.is64Bit()) + setOperationAction(ISD::BITCAST, MVT::i32, Custom); + if (Subtarget.hasStdExtD()) { setOperationAction(ISD::FMINNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); @@ -151,6 +178,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BlockAddress, XLenVT, Custom); setOperationAction(ISD::ConstantPool, XLenVT, Custom); + setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom); + + // TODO: On M-mode only targets, the cycle[h] CSR may not be present. + // Unfortunately this can't be determined just from the ISA naming string. + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, + Subtarget.is64Bit() ? Legal : Custom); + if (Subtarget.hasStdExtA()) { setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); setMinCmpXchgSizeInBits(32); @@ -276,6 +310,11 @@ bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const { return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64; } +bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { + return (VT == MVT::f32 && Subtarget.hasStdExtF()) || + (VT == MVT::f64 && Subtarget.hasStdExtD()); +} + // Changes the condition code and swaps operands if necessary, so the SetCC // operation matches one of the comparisons supported directly in the RISC-V // ISA. @@ -326,6 +365,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerBlockAddress(Op, DAG); case ISD::ConstantPool: return lowerConstantPool(Op, DAG); + case ISD::GlobalTLSAddress: + return lowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return lowerSELECT(Op, DAG); case ISD::VASTART: @@ -334,6 +375,81 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerFRAMEADDR(Op, DAG); case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG); + case ISD::SHL_PARTS: + return lowerShiftLeftParts(Op, DAG); + case ISD::SRA_PARTS: + return lowerShiftRightParts(Op, DAG, true); + case ISD::SRL_PARTS: + return lowerShiftRightParts(Op, DAG, false); + case ISD::BITCAST: { + assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() && + "Unexpected custom legalisation"); + SDLoc DL(Op); + SDValue Op0 = Op.getOperand(0); + if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32) + return SDValue(); + SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); + SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0); + return FPConv; + } + } +} + +static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty, + SelectionDAG &DAG, unsigned Flags) { + return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags); +} + +static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty, + SelectionDAG &DAG, unsigned Flags) { + return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(), + Flags); +} + +static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty, + SelectionDAG &DAG, unsigned Flags) { + return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(), + N->getOffset(), Flags); +} + +template <class NodeTy> +SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, + bool IsLocal) const { + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + + if (isPositionIndependent()) { + SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); + if (IsLocal) + // Use PC-relative addressing to access the symbol. This generates the + // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym)) + // %pcrel_lo(auipc)). + return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); + + // Use PC-relative addressing to access the GOT for this symbol, then load + // the address from the GOT. This generates the pattern (PseudoLA sym), + // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))). + return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0); + } + + switch (getTargetMachine().getCodeModel()) { + default: + report_fatal_error("Unsupported code model for lowering"); + case CodeModel::Small: { + // Generate a sequence for accessing addresses within the first 2 GiB of + // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). + SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); + SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); + SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); + return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0); + } + case CodeModel::Medium: { + // Generate a sequence for accessing addresses within any 2GiB range within + // the address space. This generates the pattern (PseudoLLA sym), which + // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)). + SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); + return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0); + } } } @@ -342,67 +458,145 @@ SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, SDLoc DL(Op); EVT Ty = Op.getValueType(); GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); - const GlobalValue *GV = N->getGlobal(); int64_t Offset = N->getOffset(); MVT XLenVT = Subtarget.getXLenVT(); - if (isPositionIndependent()) - report_fatal_error("Unable to lowerGlobalAddress"); + const GlobalValue *GV = N->getGlobal(); + bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); + SDValue Addr = getAddr(N, DAG, IsLocal); + // In order to maximise the opportunity for common subexpression elimination, // emit a separate ADD node for the global address offset instead of folding // it in the global address node. Later peephole optimisations may choose to // fold it back in when profitable. - SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI); - SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO); - SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0); - SDValue MNLo = - SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0); if (Offset != 0) - return DAG.getNode(ISD::ADD, DL, Ty, MNLo, + return DAG.getNode(ISD::ADD, DL, Ty, Addr, DAG.getConstant(Offset, DL, XLenVT)); - return MNLo; + return Addr; } SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { - SDLoc DL(Op); - EVT Ty = Op.getValueType(); BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); - const BlockAddress *BA = N->getBlockAddress(); - int64_t Offset = N->getOffset(); - - if (isPositionIndependent()) - report_fatal_error("Unable to lowerBlockAddress"); - SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI); - SDValue BALo = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_LO); - SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, BAHi), 0); - SDValue MNLo = - SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, BALo), 0); - return MNLo; + return getAddr(N, DAG); } SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, SelectionDAG &DAG) const { + ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); + + return getAddr(N, DAG); +} + +SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, + SelectionDAG &DAG, + bool UseGOT) const { + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + const GlobalValue *GV = N->getGlobal(); + MVT XLenVT = Subtarget.getXLenVT(); + + if (UseGOT) { + // Use PC-relative addressing to access the GOT for this TLS symbol, then + // load the address from the GOT and add the thread pointer. This generates + // the pattern (PseudoLA_TLS_IE sym), which expands to + // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)). + SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); + SDValue Load = + SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0); + + // Add the thread pointer. + SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); + return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg); + } + + // Generate a sequence for accessing the address relative to the thread + // pointer, with the appropriate adjustment for the thread pointer offset. + // This generates the pattern + // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym)) + SDValue AddrHi = + DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI); + SDValue AddrAdd = + DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD); + SDValue AddrLo = + DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO); + + SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0); + SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT); + SDValue MNAdd = SDValue( + DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd), + 0); + return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0); +} + +SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, + SelectionDAG &DAG) const { + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); + const GlobalValue *GV = N->getGlobal(); + + // Use a PC-relative addressing mode to access the global dynamic GOT address. + // This generates the pattern (PseudoLA_TLS_GD sym), which expands to + // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)). + SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); + SDValue Load = + SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0); + + // Prepare argument list to generate call. + ArgListTy Args; + ArgListEntry Entry; + Entry.Node = Load; + Entry.Ty = CallTy; + Args.push_back(Entry); + + // Setup call to __tls_get_addr. + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(DL) + .setChain(DAG.getEntryNode()) + .setLibCallee(CallingConv::C, CallTy, + DAG.getExternalSymbol("__tls_get_addr", Ty), + std::move(Args)); + + return LowerCallTo(CLI).first; +} + +SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { SDLoc DL(Op); EVT Ty = Op.getValueType(); - ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); - const Constant *CPA = N->getConstVal(); + GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); int64_t Offset = N->getOffset(); - unsigned Alignment = N->getAlignment(); - - if (!isPositionIndependent()) { - SDValue CPAHi = - DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_HI); - SDValue CPALo = - DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_LO); - SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, CPAHi), 0); - SDValue MNLo = - SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0); - return MNLo; - } else { - report_fatal_error("Unable to lowerConstantPool"); + MVT XLenVT = Subtarget.getXLenVT(); + + // Non-PIC TLS lowering should always use the LocalExec model. + TLSModel::Model Model = isPositionIndependent() + ? getTargetMachine().getTLSModel(N->getGlobal()) + : TLSModel::LocalExec; + + SDValue Addr; + switch (Model) { + case TLSModel::LocalExec: + Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false); + break; + case TLSModel::InitialExec: + Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true); + break; + case TLSModel::LocalDynamic: + case TLSModel::GeneralDynamic: + Addr = getDynamicTLSAddr(N, DAG); + break; } + + // In order to maximise the opportunity for common subexpression elimination, + // emit a separate ADD node for the global address offset instead of folding + // it in the global address node. Later peephole optimisations may choose to + // fold it back in when profitable. + if (Offset != 0) + return DAG.getNode(ISD::ADD, DL, Ty, Addr, + DAG.getConstant(Offset, DL, XLenVT)); + return Addr; } SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { @@ -513,29 +707,184 @@ SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op, return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); } -// Return true if the given node is a shift with a non-constant shift amount. -static bool isVariableShift(SDValue Val) { - switch (Val.getOpcode()) { +SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + SDValue Shamt = Op.getOperand(2); + EVT VT = Lo.getValueType(); + + // if Shamt-XLEN < 0: // Shamt < XLEN + // Lo = Lo << Shamt + // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt)) + // else: + // Lo = 0 + // Hi = Lo << (Shamt-XLEN) + + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue One = DAG.getConstant(1, DL, VT); + SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); + SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); + SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); + SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); + + SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); + SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); + SDValue ShiftRightLo = + DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt); + SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); + SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); + SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen); + + SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); + + Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); + Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); + + SDValue Parts[2] = {Lo, Hi}; + return DAG.getMergeValues(Parts, DL); +} + +SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, + bool IsSRA) const { + SDLoc DL(Op); + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + SDValue Shamt = Op.getOperand(2); + EVT VT = Lo.getValueType(); + + // SRA expansion: + // if Shamt-XLEN < 0: // Shamt < XLEN + // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) + // Hi = Hi >>s Shamt + // else: + // Lo = Hi >>s (Shamt-XLEN); + // Hi = Hi >>s (XLEN-1) + // + // SRL expansion: + // if Shamt-XLEN < 0: // Shamt < XLEN + // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt)) + // Hi = Hi >>u Shamt + // else: + // Lo = Hi >>u (Shamt-XLEN); + // Hi = 0; + + unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; + + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue One = DAG.getConstant(1, DL, VT); + SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT); + SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT); + SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen); + SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt); + + SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); + SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); + SDValue ShiftLeftHi = + DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt); + SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); + SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); + SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen); + SDValue HiFalse = + IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero; + + SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT); + + Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); + Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); + + SDValue Parts[2] = {Lo, Hi}; + return DAG.getMergeValues(Parts, DL); +} + +// Returns the opcode of the target-specific SDNode that implements the 32-bit +// form of the given Opcode. +static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { + switch (Opcode) { default: - return false; + llvm_unreachable("Unexpected opcode"); case ISD::SHL: + return RISCVISD::SLLW; case ISD::SRA: + return RISCVISD::SRAW; case ISD::SRL: - return Val.getOperand(1).getOpcode() != ISD::Constant; + return RISCVISD::SRLW; + case ISD::SDIV: + return RISCVISD::DIVW; + case ISD::UDIV: + return RISCVISD::DIVUW; + case ISD::UREM: + return RISCVISD::REMUW; } } -// Returns true if the given node is an sdiv, udiv, or urem with non-constant -// operands. -static bool isVariableSDivUDivURem(SDValue Val) { - switch (Val.getOpcode()) { +// Converts the given 32-bit operation to a target-specific SelectionDAG node. +// Because i32 isn't a legal type for RV64, these operations would otherwise +// be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W +// later one because the fact the operation was originally of type i32 is +// lost. +static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); + SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); + SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); + SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); + // ReplaceNodeResults requires we maintain the same type for the return value. + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes); +} + +void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const { + SDLoc DL(N); + switch (N->getOpcode()) { default: - return false; + llvm_unreachable("Don't know how to custom type legalize this operation!"); + case ISD::READCYCLECOUNTER: { + assert(!Subtarget.is64Bit() && + "READCYCLECOUNTER only has custom type legalization on riscv32"); + + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); + SDValue RCW = + DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0)); + + Results.push_back(RCW); + Results.push_back(RCW.getValue(1)); + Results.push_back(RCW.getValue(2)); + break; + } + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + if (N->getOperand(1).getOpcode() == ISD::Constant) + return; + Results.push_back(customLegalizeToWOp(N, DAG)); + break; case ISD::SDIV: case ISD::UDIV: case ISD::UREM: - return Val.getOperand(0).getOpcode() != ISD::Constant && - Val.getOperand(1).getOpcode() != ISD::Constant; + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + Subtarget.hasStdExtM() && "Unexpected custom legalisation"); + if (N->getOperand(0).getOpcode() == ISD::Constant || + N->getOperand(1).getOpcode() == ISD::Constant) + return; + Results.push_back(customLegalizeToWOp(N, DAG)); + break; + case ISD::BITCAST: { + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + Subtarget.hasStdExtF() && "Unexpected custom legalisation"); + SDLoc DL(N); + SDValue Op0 = N->getOperand(0); + if (Op0.getValueType() != MVT::f32) + return; + SDValue FPConv = + DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv)); + break; + } } } @@ -546,51 +895,225 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, switch (N->getOpcode()) { default: break; - case ISD::SHL: - case ISD::SRL: - case ISD::SRA: { - assert(Subtarget.getXLen() == 64 && "Combine should be 64-bit only"); - if (!DCI.isBeforeLegalize()) - break; - SDValue RHS = N->getOperand(1); - if (N->getValueType(0) != MVT::i32 || RHS->getOpcode() == ISD::Constant || - (RHS->getOpcode() == ISD::AssertZext && - cast<VTSDNode>(RHS->getOperand(1))->getVT().getSizeInBits() <= 5)) - break; - SDValue LHS = N->getOperand(0); - SDLoc DL(N); - SDValue NewRHS = - DAG.getNode(ISD::AssertZext, DL, RHS.getValueType(), RHS, - DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 5))); - return DCI.CombineTo( - N, DAG.getNode(N->getOpcode(), DL, LHS.getValueType(), LHS, NewRHS)); - } - case ISD::ANY_EXTEND: { - // If any-extending an i32 variable-length shift or sdiv/udiv/urem to i64, - // then instead sign-extend in order to increase the chance of being able - // to select the sllw/srlw/sraw/divw/divuw/remuw instructions. - SDValue Src = N->getOperand(0); - if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32) - break; - if (!isVariableShift(Src) && - !(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src))) - break; - SDLoc DL(N); - return DCI.CombineTo(N, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src)); - } case RISCVISD::SplitF64: { + SDValue Op0 = N->getOperand(0); // If the input to SplitF64 is just BuildPairF64 then the operation is // redundant. Instead, use BuildPairF64's operands directly. + if (Op0->getOpcode() == RISCVISD::BuildPairF64) + return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); + + SDLoc DL(N); + + // It's cheaper to materialise two 32-bit integers than to load a double + // from the constant pool and transfer it to integer registers through the + // stack. + if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) { + APInt V = C->getValueAPF().bitcastToAPInt(); + SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32); + SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32); + return DCI.CombineTo(N, Lo, Hi); + } + + // This is a target-specific version of a DAGCombine performed in + // DAGCombiner::visitBITCAST. It performs the equivalent of: + // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) + // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) + if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || + !Op0.getNode()->hasOneUse()) + break; + SDValue NewSplitF64 = + DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), + Op0.getOperand(0)); + SDValue Lo = NewSplitF64.getValue(0); + SDValue Hi = NewSplitF64.getValue(1); + APInt SignBit = APInt::getSignMask(32); + if (Op0.getOpcode() == ISD::FNEG) { + SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi, + DAG.getConstant(SignBit, DL, MVT::i32)); + return DCI.CombineTo(N, Lo, NewHi); + } + assert(Op0.getOpcode() == ISD::FABS); + SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi, + DAG.getConstant(~SignBit, DL, MVT::i32)); + return DCI.CombineTo(N, Lo, NewHi); + } + case RISCVISD::SLLW: + case RISCVISD::SRAW: + case RISCVISD::SRLW: { + // Only the lower 32 bits of LHS and lower 5 bits of RHS are read. + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32); + APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5); + if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) || + (SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI))) + return SDValue(); + break; + } + case RISCVISD::FMV_X_ANYEXTW_RV64: { + SDLoc DL(N); SDValue Op0 = N->getOperand(0); - if (Op0->getOpcode() != RISCVISD::BuildPairF64) + // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the + // conversion is unnecessary and can be replaced with an ANY_EXTEND + // of the FMV_W_X_RV64 operand. + if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) { + SDValue AExtOp = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0.getOperand(0)); + return DCI.CombineTo(N, AExtOp); + } + + // This is a target-specific version of a DAGCombine performed in + // DAGCombiner::visitBITCAST. It performs the equivalent of: + // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) + // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) + if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) || + !Op0.getNode()->hasOneUse()) break; - return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1)); + SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, + Op0.getOperand(0)); + APInt SignBit = APInt::getSignMask(32).sext(64); + if (Op0.getOpcode() == ISD::FNEG) { + return DCI.CombineTo(N, + DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV, + DAG.getConstant(SignBit, DL, MVT::i64))); + } + assert(Op0.getOpcode() == ISD::FABS); + return DCI.CombineTo(N, + DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV, + DAG.getConstant(~SignBit, DL, MVT::i64))); } } return SDValue(); } +bool RISCVTargetLowering::isDesirableToCommuteWithShift( + const SDNode *N, CombineLevel Level) const { + // The following folds are only desirable if `(OP _, c1 << c2)` can be + // materialised in fewer instructions than `(OP _, c1)`: + // + // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) + // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) + SDValue N0 = N->getOperand(0); + EVT Ty = N0.getValueType(); + if (Ty.isScalarInteger() && + (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) { + auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); + auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (C1 && C2) { + APInt C1Int = C1->getAPIntValue(); + APInt ShiftedC1Int = C1Int << C2->getAPIntValue(); + + // We can materialise `c1 << c2` into an add immediate, so it's "free", + // and the combine should happen, to potentially allow further combines + // later. + if (isLegalAddImmediate(ShiftedC1Int.getSExtValue())) + return true; + + // We can materialise `c1` in an add immediate, so it's "free", and the + // combine should be prevented. + if (isLegalAddImmediate(C1Int.getSExtValue())) + return false; + + // Neither constant will fit into an immediate, so find materialisation + // costs. + int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(), + Subtarget.is64Bit()); + int ShiftedC1Cost = RISCVMatInt::getIntMatCost( + ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit()); + + // Materialising `c1` is cheaper than materialising `c1 << c2`, so the + // combine should be prevented. + if (C1Cost < ShiftedC1Cost) + return false; + } + } + return true; +} + +unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( + SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, + unsigned Depth) const { + switch (Op.getOpcode()) { + default: + break; + case RISCVISD::SLLW: + case RISCVISD::SRAW: + case RISCVISD::SRLW: + case RISCVISD::DIVW: + case RISCVISD::DIVUW: + case RISCVISD::REMUW: + // TODO: As the result is sign-extended, this is conservatively correct. A + // more precise answer could be calculated for SRAW depending on known + // bits in the shift amount. + return 33; + } + + return 1; +} + +MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI, + MachineBasicBlock *BB) { + assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction"); + + // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves. + // Should the count have wrapped while it was being read, we need to try + // again. + // ... + // read: + // rdcycleh x3 # load high word of cycle + // rdcycle x2 # load low word of cycle + // rdcycleh x4 # load high word of cycle + // bne x3, x4, read # check if high word reads match, otherwise try again + // ... + + MachineFunction &MF = *BB->getParent(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = ++BB->getIterator(); + + MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); + MF.insert(It, LoopMBB); + + MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB); + MF.insert(It, DoneMBB); + + // Transfer the remainder of BB and its successor edges to DoneMBB. + DoneMBB->splice(DoneMBB->begin(), BB, + std::next(MachineBasicBlock::iterator(MI)), BB->end()); + DoneMBB->transferSuccessorsAndUpdatePHIs(BB); + + BB->addSuccessor(LoopMBB); + + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); + unsigned LoReg = MI.getOperand(0).getReg(); + unsigned HiReg = MI.getOperand(1).getReg(); + DebugLoc DL = MI.getDebugLoc(); + + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg) + .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) + .addReg(RISCV::X0); + BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg) + .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding) + .addReg(RISCV::X0); + BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg) + .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding) + .addReg(RISCV::X0); + + BuildMI(LoopMBB, DL, TII->get(RISCV::BNE)) + .addReg(HiReg) + .addReg(ReadAgainReg) + .addMBB(LoopMBB); + + LoopMBB->addSuccessor(LoopMBB); + LoopMBB->addSuccessor(DoneMBB); + + MI.eraseFromParent(); + + return DoneMBB; +} + static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, MachineBasicBlock *BB) { assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); @@ -655,24 +1178,21 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, return BB; } -MachineBasicBlock * -RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, - MachineBasicBlock *BB) const { +static bool isSelectPseudo(MachineInstr &MI) { switch (MI.getOpcode()) { default: - llvm_unreachable("Unexpected instr type to insert"); + return false; case RISCV::Select_GPR_Using_CC_GPR: case RISCV::Select_FPR32_Using_CC_GPR: case RISCV::Select_FPR64_Using_CC_GPR: - break; - case RISCV::BuildPairF64Pseudo: - return emitBuildPairF64Pseudo(MI, BB); - case RISCV::SplitF64Pseudo: - return emitSplitF64Pseudo(MI, BB); + return true; } +} - // To "insert" a SELECT instruction, we actually have to insert the triangle - // control-flow pattern. The incoming instruction knows the destination vreg +static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, + MachineBasicBlock *BB) { + // To "insert" Select_* instructions, we actually have to insert the triangle + // control-flow pattern. The incoming instructions know the destination vreg // to set, the condition code register to branch on, the true/false values to // select between, and the condcode to use to select the appropriate branch. // @@ -682,6 +1202,54 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // | IfFalseMBB // | / // TailMBB + // + // When we find a sequence of selects we attempt to optimize their emission + // by sharing the control flow. Currently we only handle cases where we have + // multiple selects with the exact same condition (same LHS, RHS and CC). + // The selects may be interleaved with other instructions if the other + // instructions meet some requirements we deem safe: + // - They are debug instructions. Otherwise, + // - They do not have side-effects, do not access memory and their inputs do + // not depend on the results of the select pseudo-instructions. + // The TrueV/FalseV operands of the selects cannot depend on the result of + // previous selects in the sequence. + // These conditions could be further relaxed. See the X86 target for a + // related approach and more information. + unsigned LHS = MI.getOperand(1).getReg(); + unsigned RHS = MI.getOperand(2).getReg(); + auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); + + SmallVector<MachineInstr *, 4> SelectDebugValues; + SmallSet<unsigned, 4> SelectDests; + SelectDests.insert(MI.getOperand(0).getReg()); + + MachineInstr *LastSelectPseudo = &MI; + + for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); + SequenceMBBI != E; ++SequenceMBBI) { + if (SequenceMBBI->isDebugInstr()) + continue; + else if (isSelectPseudo(*SequenceMBBI)) { + if (SequenceMBBI->getOperand(1).getReg() != LHS || + SequenceMBBI->getOperand(2).getReg() != RHS || + SequenceMBBI->getOperand(3).getImm() != CC || + SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || + SelectDests.count(SequenceMBBI->getOperand(5).getReg())) + break; + LastSelectPseudo = &*SequenceMBBI; + SequenceMBBI->collectDebugValues(SelectDebugValues); + SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); + } else { + if (SequenceMBBI->hasUnmodeledSideEffects() || + SequenceMBBI->mayLoadOrStore()) + break; + if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { + return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); + })) + break; + } + } + const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); DebugLoc DL = MI.getDebugLoc(); @@ -694,20 +1262,23 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, F->insert(I, IfFalseMBB); F->insert(I, TailMBB); - // Move all remaining instructions to TailMBB. - TailMBB->splice(TailMBB->begin(), HeadMBB, - std::next(MachineBasicBlock::iterator(MI)), HeadMBB->end()); + + // Transfer debug instructions associated with the selects to TailMBB. + for (MachineInstr *DebugInstr : SelectDebugValues) { + TailMBB->push_back(DebugInstr->removeFromParent()); + } + + // Move all instructions after the sequence to TailMBB. + TailMBB->splice(TailMBB->end(), HeadMBB, + std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); // Update machine-CFG edges by transferring all successors of the current - // block to the new block which will contain the Phi node for the select. + // block to the new block which will contain the Phi nodes for the selects. TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); // Set the successors for HeadMBB. HeadMBB->addSuccessor(IfFalseMBB); HeadMBB->addSuccessor(TailMBB); // Insert appropriate branch. - unsigned LHS = MI.getOperand(1).getReg(); - unsigned RHS = MI.getOperand(2).getReg(); - auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm()); unsigned Opcode = getBranchOpcodeForIntCondCode(CC); BuildMI(HeadMBB, DL, TII.get(Opcode)) @@ -718,18 +1289,50 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // IfFalseMBB just falls through to TailMBB. IfFalseMBB->addSuccessor(TailMBB); - // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] - BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI), - MI.getOperand(0).getReg()) - .addReg(MI.getOperand(4).getReg()) - .addMBB(HeadMBB) - .addReg(MI.getOperand(5).getReg()) - .addMBB(IfFalseMBB); + // Create PHIs for all of the select pseudo-instructions. + auto SelectMBBI = MI.getIterator(); + auto SelectEnd = std::next(LastSelectPseudo->getIterator()); + auto InsertionPoint = TailMBB->begin(); + while (SelectMBBI != SelectEnd) { + auto Next = std::next(SelectMBBI); + if (isSelectPseudo(*SelectMBBI)) { + // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] + BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(), + TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) + .addReg(SelectMBBI->getOperand(4).getReg()) + .addMBB(HeadMBB) + .addReg(SelectMBBI->getOperand(5).getReg()) + .addMBB(IfFalseMBB); + SelectMBBI->eraseFromParent(); + } + SelectMBBI = Next; + } - MI.eraseFromParent(); // The pseudo instruction is gone now. + F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); return TailMBB; } +MachineBasicBlock * +RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const { + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected instr type to insert"); + case RISCV::ReadCycleWide: + assert(!Subtarget.is64Bit() && + "ReadCycleWrite is only to be used on riscv32"); + return emitReadCycleWidePseudo(MI, BB); + case RISCV::Select_GPR_Using_CC_GPR: + case RISCV::Select_FPR32_Using_CC_GPR: + case RISCV::Select_FPR64_Using_CC_GPR: + return emitSelectPseudo(MI, BB); + case RISCV::BuildPairF64Pseudo: + return emitBuildPairF64Pseudo(MI, BB); + case RISCV::SplitF64Pseudo: + return emitSplitF64Pseudo(MI, BB); + } +} + // Calling Convention Implementation. // The expectations for frontend ABI lowering vary from target to target. // Ideally, an LLVM frontend would be able to avoid worrying about many ABI @@ -759,6 +1362,14 @@ static const MCPhysReg ArgGPRs[] = { RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17 }; +static const MCPhysReg ArgFPR32s[] = { + RISCV::F10_32, RISCV::F11_32, RISCV::F12_32, RISCV::F13_32, + RISCV::F14_32, RISCV::F15_32, RISCV::F16_32, RISCV::F17_32 +}; +static const MCPhysReg ArgFPR64s[] = { + RISCV::F10_64, RISCV::F11_64, RISCV::F12_64, RISCV::F13_64, + RISCV::F14_64, RISCV::F15_64, RISCV::F16_64, RISCV::F17_64 +}; // Pass a 2*XLEN argument that has been split into two XLEN values through // registers or the stack as necessary. @@ -799,22 +1410,59 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, } // Implements the RISC-V calling convention. Returns true upon failure. -static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) { +static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, + MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, + bool IsRet, Type *OrigTy) { unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); assert(XLen == 32 || XLen == 64); MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; - if (ValVT == MVT::f32) { - LocVT = MVT::i32; - LocInfo = CCValAssign::BCvt; - } // Any return value split in to more than two values can't be returned // directly. if (IsRet && ValNo > 1) return true; + // UseGPRForF32 if targeting one of the soft-float ABIs, if passing a + // variadic argument, or if no F32 argument registers are available. + bool UseGPRForF32 = true; + // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a + // variadic argument, or if no F64 argument registers are available. + bool UseGPRForF64 = true; + + switch (ABI) { + default: + llvm_unreachable("Unexpected ABI"); + case RISCVABI::ABI_ILP32: + case RISCVABI::ABI_LP64: + break; + case RISCVABI::ABI_ILP32F: + case RISCVABI::ABI_LP64F: + UseGPRForF32 = !IsFixed; + break; + case RISCVABI::ABI_ILP32D: + case RISCVABI::ABI_LP64D: + UseGPRForF32 = !IsFixed; + UseGPRForF64 = !IsFixed; + break; + } + + if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) + UseGPRForF32 = true; + if (State.getFirstUnallocated(ArgFPR64s) == array_lengthof(ArgFPR64s)) + UseGPRForF64 = true; + + // From this point on, rely on UseGPRForF32, UseGPRForF64 and similar local + // variables rather than directly checking against the target ABI. + + if (UseGPRForF32 && ValVT == MVT::f32) { + LocVT = XLenVT; + LocInfo = CCValAssign::BCvt; + } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) { + LocVT = MVT::i64; + LocInfo = CCValAssign::BCvt; + } + // If this is a variadic argument, the RISC-V calling convention requires // that it is assigned an 'even' or 'aligned' register if it has 8-byte // alignment (RV32) or 16-byte alignment (RV64). An aligned register should @@ -838,8 +1486,9 @@ static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT, assert(PendingLocs.size() == PendingArgFlags.size() && "PendingLocs and PendingArgFlags out of sync"); - // Handle passing f64 on RV32D with a soft float ABI. - if (XLen == 32 && ValVT == MVT::f64) { + // Handle passing f64 on RV32D with a soft float ABI or when floating point + // registers are exhausted. + if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) { assert(!ArgFlags.isSplit() && PendingLocs.empty() && "Can't lower f64 if it is split"); // Depending on available argument GPRS, f64 may be passed in a pair of @@ -888,7 +1537,13 @@ static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT, } // Allocate to a register if possible, or else a stack slot. - unsigned Reg = State.AllocateReg(ArgGPRs); + unsigned Reg; + if (ValVT == MVT::f32 && !UseGPRForF32) + Reg = State.AllocateReg(ArgFPR32s, ArgFPR64s); + else if (ValVT == MVT::f64 && !UseGPRForF64) + Reg = State.AllocateReg(ArgFPR64s, ArgFPR32s); + else + Reg = State.AllocateReg(ArgGPRs); unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8); // If we reach this point and PendingLocs is non-empty, we must be at the @@ -909,15 +1564,17 @@ static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT, return false; } - assert(LocVT == XLenVT && "Expected an XLenVT at this stage"); + assert((!UseGPRForF32 || !UseGPRForF64 || LocVT == XLenVT) && + "Expected an XLenVT at this stage"); if (Reg) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } - if (ValVT == MVT::f32) { - LocVT = MVT::f32; + // When an f32 or f64 is passed on the stack, no bit-conversion is needed. + if (ValVT == MVT::f32 || ValVT == MVT::f64) { + LocVT = ValVT; LocInfo = CCValAssign::Full; } State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); @@ -940,7 +1597,8 @@ void RISCVTargetLowering::analyzeInputArgs( else if (Ins[i].isOrigArg()) ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); - if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, + RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); + if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) { LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << '\n'); @@ -960,7 +1618,8 @@ void RISCVTargetLowering::analyzeOutputArgs( ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; - if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, + RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); + if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << "\n"); @@ -979,6 +1638,10 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, case CCValAssign::Full: break; case CCValAssign::BCvt: + if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { + Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val); + break; + } Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); break; } @@ -993,8 +1656,24 @@ static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, MachineRegisterInfo &RegInfo = MF.getRegInfo(); EVT LocVT = VA.getLocVT(); SDValue Val; + const TargetRegisterClass *RC; + + switch (LocVT.getSimpleVT().SimpleTy) { + default: + llvm_unreachable("Unexpected register type"); + case MVT::i32: + case MVT::i64: + RC = &RISCV::GPRRegClass; + break; + case MVT::f32: + RC = &RISCV::FPR32RegClass; + break; + case MVT::f64: + RC = &RISCV::FPR64RegClass; + break; + } - unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); + unsigned VReg = RegInfo.createVirtualRegister(RC); RegInfo.addLiveIn(VA.getLocReg(), VReg); Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); @@ -1014,6 +1693,10 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, case CCValAssign::Full: break; case CCValAssign::BCvt: + if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) { + Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val); + break; + } Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); break; } @@ -1040,6 +1723,7 @@ static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, llvm_unreachable("Unexpected CCValAssign::LocInfo"); case CCValAssign::Full: case CCValAssign::Indirect: + case CCValAssign::BCvt: ExtType = ISD::NON_EXTLOAD; break; } @@ -1227,12 +1911,12 @@ SDValue RISCVTargetLowering::LowerFormalArguments( return Chain; } -/// IsEligibleForTailCallOptimization - Check whether the call is eligible +/// isEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. -bool RISCVTargetLowering::IsEligibleForTailCallOptimization( - CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, - const SmallVector<CCValAssign, 16> &ArgLocs) const { +bool RISCVTargetLowering::isEligibleForTailCallOptimization( + CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, + const SmallVector<CCValAssign, 16> &ArgLocs) const { auto &Callee = CLI.Callee; auto CalleeCC = CLI.CallConv; @@ -1335,8 +2019,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, // Check if it's really possible to do a tail call. if (IsTailCall) - IsTailCall = IsEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, - ArgLocs); + IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); if (IsTailCall) ++NumTailCalls; @@ -1482,9 +2165,21 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't // split it and then direct call can be matched by PseudoCALL. if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { - Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, 0); + const GlobalValue *GV = S->getGlobal(); + + unsigned OpFlags = RISCVII::MO_CALL; + if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) + OpFlags = RISCVII::MO_PLT; + + Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { - Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, 0); + unsigned OpFlags = RISCVII::MO_CALL; + + if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(), + nullptr)) + OpFlags = RISCVII::MO_PLT; + + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags); } // The first call operand is the chain and the second is the target address. @@ -1567,8 +2262,9 @@ bool RISCVTargetLowering::CanLowerReturn( for (unsigned i = 0, e = Outs.size(); i != e; ++i) { MVT VT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags, - CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr)) + RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI(); + if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full, + ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr)) return false; } return true; @@ -1679,6 +2375,24 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { return "RISCVISD::SplitF64"; case RISCVISD::TAIL: return "RISCVISD::TAIL"; + case RISCVISD::SLLW: + return "RISCVISD::SLLW"; + case RISCVISD::SRAW: + return "RISCVISD::SRAW"; + case RISCVISD::SRLW: + return "RISCVISD::SRLW"; + case RISCVISD::DIVW: + return "RISCVISD::DIVW"; + case RISCVISD::DIVUW: + return "RISCVISD::DIVUW"; + case RISCVISD::REMUW: + return "RISCVISD::REMUW"; + case RISCVISD::FMV_W_X_RV64: + return "RISCVISD::FMV_W_X_RV64"; + case RISCVISD::FMV_X_ANYEXTW_RV64: + return "RISCVISD::FMV_X_ANYEXTW_RV64"; + case RISCVISD::READ_CYCLE_WIDE: + return "RISCVISD::READ_CYCLE_WIDE"; } return nullptr; } @@ -1701,6 +2415,44 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } +void RISCVTargetLowering::LowerAsmOperandForConstraint( + SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops, + SelectionDAG &DAG) const { + // Currently only support length 1 constraints. + if (Constraint.length() == 1) { + switch (Constraint[0]) { + case 'I': + // Validate & create a 12-bit signed immediate operand. + if (auto *C = dyn_cast<ConstantSDNode>(Op)) { + uint64_t CVal = C->getSExtValue(); + if (isInt<12>(CVal)) + Ops.push_back( + DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); + } + return; + case 'J': + // Validate & create an integer zero operand. + if (auto *C = dyn_cast<ConstantSDNode>(Op)) + if (C->getZExtValue() == 0) + Ops.push_back( + DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT())); + return; + case 'K': + // Validate & create a 5-bit unsigned immediate operand. + if (auto *C = dyn_cast<ConstantSDNode>(Op)) { + uint64_t CVal = C->getZExtValue(); + if (isUInt<5>(CVal)) + Ops.push_back( + DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT())); + } + return; + default: + break; + } + } + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const { @@ -1721,6 +2473,12 @@ Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, TargetLowering::AtomicExpansionKind RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating + // point operations can't be used in an lr/sc sequence without breaking the + // forward-progress guarantee. + if (AI->isFloatingPointOperation()) + return AtomicExpansionKind::CmpXChg; + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); if (Size == 8 || Size == 16) return AtomicExpansionKind::MaskedIntrinsic; @@ -1728,37 +2486,74 @@ RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { } static Intrinsic::ID -getIntrinsicForMaskedAtomicRMWBinOp32(AtomicRMWInst::BinOp BinOp) { - switch (BinOp) { - default: - llvm_unreachable("Unexpected AtomicRMW BinOp"); - case AtomicRMWInst::Xchg: - return Intrinsic::riscv_masked_atomicrmw_xchg_i32; - case AtomicRMWInst::Add: - return Intrinsic::riscv_masked_atomicrmw_add_i32; - case AtomicRMWInst::Sub: - return Intrinsic::riscv_masked_atomicrmw_sub_i32; - case AtomicRMWInst::Nand: - return Intrinsic::riscv_masked_atomicrmw_nand_i32; - case AtomicRMWInst::Max: - return Intrinsic::riscv_masked_atomicrmw_max_i32; - case AtomicRMWInst::Min: - return Intrinsic::riscv_masked_atomicrmw_min_i32; - case AtomicRMWInst::UMax: - return Intrinsic::riscv_masked_atomicrmw_umax_i32; - case AtomicRMWInst::UMin: - return Intrinsic::riscv_masked_atomicrmw_umin_i32; +getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) { + if (XLen == 32) { + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Xchg: + return Intrinsic::riscv_masked_atomicrmw_xchg_i32; + case AtomicRMWInst::Add: + return Intrinsic::riscv_masked_atomicrmw_add_i32; + case AtomicRMWInst::Sub: + return Intrinsic::riscv_masked_atomicrmw_sub_i32; + case AtomicRMWInst::Nand: + return Intrinsic::riscv_masked_atomicrmw_nand_i32; + case AtomicRMWInst::Max: + return Intrinsic::riscv_masked_atomicrmw_max_i32; + case AtomicRMWInst::Min: + return Intrinsic::riscv_masked_atomicrmw_min_i32; + case AtomicRMWInst::UMax: + return Intrinsic::riscv_masked_atomicrmw_umax_i32; + case AtomicRMWInst::UMin: + return Intrinsic::riscv_masked_atomicrmw_umin_i32; + } + } + + if (XLen == 64) { + switch (BinOp) { + default: + llvm_unreachable("Unexpected AtomicRMW BinOp"); + case AtomicRMWInst::Xchg: + return Intrinsic::riscv_masked_atomicrmw_xchg_i64; + case AtomicRMWInst::Add: + return Intrinsic::riscv_masked_atomicrmw_add_i64; + case AtomicRMWInst::Sub: + return Intrinsic::riscv_masked_atomicrmw_sub_i64; + case AtomicRMWInst::Nand: + return Intrinsic::riscv_masked_atomicrmw_nand_i64; + case AtomicRMWInst::Max: + return Intrinsic::riscv_masked_atomicrmw_max_i64; + case AtomicRMWInst::Min: + return Intrinsic::riscv_masked_atomicrmw_min_i64; + case AtomicRMWInst::UMax: + return Intrinsic::riscv_masked_atomicrmw_umax_i64; + case AtomicRMWInst::UMin: + return Intrinsic::riscv_masked_atomicrmw_umin_i64; + } } + + llvm_unreachable("Unexpected XLen\n"); } Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { - Value *Ordering = Builder.getInt32(static_cast<uint32_t>(AI->getOrdering())); + unsigned XLen = Subtarget.getXLen(); + Value *Ordering = + Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering())); Type *Tys[] = {AlignedAddr->getType()}; Function *LrwOpScwLoop = Intrinsic::getDeclaration( AI->getModule(), - getIntrinsicForMaskedAtomicRMWBinOp32(AI->getOperation()), Tys); + getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys); + + if (XLen == 64) { + Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty()); + Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); + ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty()); + } + + Value *Result; // Must pass the shift amount needed to sign extend the loaded value prior // to performing a signed comparison for min/max. ShiftAmt is the number of @@ -1770,13 +2565,18 @@ Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic( const DataLayout &DL = AI->getModule()->getDataLayout(); unsigned ValWidth = DL.getTypeStoreSizeInBits(AI->getValOperand()->getType()); - Value *SextShamt = Builder.CreateSub( - Builder.getInt32(Subtarget.getXLen() - ValWidth), ShiftAmt); - return Builder.CreateCall(LrwOpScwLoop, - {AlignedAddr, Incr, Mask, SextShamt, Ordering}); + Value *SextShamt = + Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt); + Result = Builder.CreateCall(LrwOpScwLoop, + {AlignedAddr, Incr, Mask, SextShamt, Ordering}); + } else { + Result = + Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); } - return Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering}); + if (XLen == 64) + Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); + return Result; } TargetLowering::AtomicExpansionKind @@ -1791,10 +2591,31 @@ RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR( Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { - Value *Ordering = Builder.getInt32(static_cast<uint32_t>(Ord)); + unsigned XLen = Subtarget.getXLen(); + Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord)); + Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32; + if (XLen == 64) { + CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty()); + NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty()); + Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty()); + CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64; + } Type *Tys[] = {AlignedAddr->getType()}; - Function *MaskedCmpXchg = Intrinsic::getDeclaration( - CI->getModule(), Intrinsic::riscv_masked_cmpxchg_i32, Tys); - return Builder.CreateCall(MaskedCmpXchg, - {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); + Function *MaskedCmpXchg = + Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys); + Value *Result = Builder.CreateCall( + MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering}); + if (XLen == 64) + Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); + return Result; +} + +unsigned RISCVTargetLowering::getExceptionPointerRegister( + const Constant *PersonalityFn) const { + return RISCV::X10; +} + +unsigned RISCVTargetLowering::getExceptionSelectorRegister( + const Constant *PersonalityFn) const { + return RISCV::X11; } |