src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2019-08-20 20:50:12 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2019-08-20 20:50:12 +0000
commit	e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (patch)
tree	599ab169a01f1c86eda9adc774edaedde2f2db5b /lib/Target/RISCV/RISCVISelLowering.cpp
parent	1a56a5ead7a2e84bee8240f5f6b033b5f1707154 (diff)
download	src-e6d1592492a3a379186bfb02bd0f4eda0669c0d5.tar.gz src-e6d1592492a3a379186bfb02bd0f4eda0669c0d5.zip

Vendor import of stripped llvm trunk r366426 (just before the release_90vendor/llvm/llvm-trunk-r366426

branch point): https://llvm.org/svn/llvm-project/llvm/trunk@366426

Notes

Notes: svn path=/vendor/llvm/dist/; revision=351278 svn path=/vendor/llvm/llvm-trunk-r366426/; revision=351279; tag=vendor/llvm/llvm-trunk-r366426

Diffstat (limited to 'lib/Target/RISCV/RISCVISelLowering.cpp')

-rw-r--r--

lib/Target/RISCV/RISCVISelLowering.cpp

1185

1 files changed, 1003 insertions, 182 deletions

diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp
index 508dcbd009ed..ce7b85911ab6 100644
--- a/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/lib/Target/RISCV/RISCVISelLowering.cpp

@@ -1,9 +1,8 @@

//===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//

-// The LLVM Compiler Infrastructure

-//

-// This file is distributed under the University of Illinois Open Source

-// License. See LICENSE.TXT for details.

+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

+// See https://llvm.org/LICENSE.txt for license information.

+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//===----------------------------------------------------------------------===//

@@ -18,6 +17,8 @@

#include "RISCVRegisterInfo.h"

#include "RISCVSubtarget.h"

#include "RISCVTargetMachine.h"

+#include "Utils/RISCVMatInt.h"

+#include "llvm/ADT/SmallSet.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/CodeGen/CallingConvLower.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

@@ -43,6 +44,24 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

const RISCVSubtarget &STI)

: TargetLowering(TM), Subtarget(STI) {

+ if (Subtarget.isRV32E())

+ report_fatal_error("Codegen not yet implemented for RV32E");

+ RISCVABI::ABI ABI = Subtarget.getTargetABI();

+ assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");

+ switch (ABI) {

+ default:

+ report_fatal_error("Don't know how to lower this ABI");

+ case RISCVABI::ABI_ILP32:

+ case RISCVABI::ABI_ILP32F:

+ case RISCVABI::ABI_ILP32D:

+ case RISCVABI::ABI_LP64:

+ case RISCVABI::ABI_LP64F:

+ case RISCVABI::ABI_LP64D:

+ break;

+ }

MVT XLenVT = Subtarget.getXLenVT();

// Set up the register classes.

@@ -81,10 +100,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);

if (Subtarget.is64Bit()) {

- setTargetDAGCombine(ISD::SHL);

- setTargetDAGCombine(ISD::SRL);

- setTargetDAGCombine(ISD::SRA);

- setTargetDAGCombine(ISD::ANY_EXTEND);

+ setOperationAction(ISD::SHL, MVT::i32, Custom);

+ setOperationAction(ISD::SRA, MVT::i32, Custom);

+ setOperationAction(ISD::SRL, MVT::i32, Custom);

}

if (!Subtarget.hasStdExtM()) {

@@ -97,14 +115,20 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

setOperationAction(ISD::UREM, XLenVT, Expand);

}

+ if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {

+ setOperationAction(ISD::SDIV, MVT::i32, Custom);

+ setOperationAction(ISD::UDIV, MVT::i32, Custom);

+ setOperationAction(ISD::UREM, MVT::i32, Custom);

+ }

setOperationAction(ISD::SDIVREM, XLenVT, Expand);

setOperationAction(ISD::UDIVREM, XLenVT, Expand);

setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);

setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);

- setOperationAction(ISD::SHL_PARTS, XLenVT, Expand);

- setOperationAction(ISD::SRL_PARTS, XLenVT, Expand);

- setOperationAction(ISD::SRA_PARTS, XLenVT, Expand);

+ setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);

+ setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);

+ setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);

setOperationAction(ISD::ROTL, XLenVT, Expand);

setOperationAction(ISD::ROTR, XLenVT, Expand);

@@ -114,9 +138,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

setOperationAction(ISD::CTPOP, XLenVT, Expand);

ISD::CondCode FPCCToExtend[] = {

- ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETO, ISD::SETUEQ,

- ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE,

- ISD::SETGT, ISD::SETGE, ISD::SETNE};

+ ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,

+ ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,

+ ISD::SETGE, ISD::SETNE};

ISD::NodeType FPOpToExtend[] = {

ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM};

@@ -133,6 +157,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

setOperationAction(Op, MVT::f32, Expand);

}

+ if (Subtarget.hasStdExtF() && Subtarget.is64Bit())

+ setOperationAction(ISD::BITCAST, MVT::i32, Custom);

if (Subtarget.hasStdExtD()) {

setOperationAction(ISD::FMINNUM, MVT::f64, Legal);

setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);

@@ -151,6 +178,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

setOperationAction(ISD::BlockAddress, XLenVT, Custom);

setOperationAction(ISD::ConstantPool, XLenVT, Custom);

+ setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);

+ // TODO: On M-mode only targets, the cycle[h] CSR may not be present.

+ // Unfortunately this can't be determined just from the ISA naming string.

+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,

+ Subtarget.is64Bit() ? Legal : Custom);

if (Subtarget.hasStdExtA()) {

setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());

setMinCmpXchgSizeInBits(32);

@@ -276,6 +310,11 @@ bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {

return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;

}

+bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {

+ return (VT == MVT::f32 && Subtarget.hasStdExtF()) ||

+ (VT == MVT::f64 && Subtarget.hasStdExtD());

// Changes the condition code and swaps operands if necessary, so the SetCC

// operation matches one of the comparisons supported directly in the RISC-V

// ISA.

@@ -326,6 +365,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,

return lowerBlockAddress(Op, DAG);

case ISD::ConstantPool:

return lowerConstantPool(Op, DAG);

+ case ISD::GlobalTLSAddress:

+ return lowerGlobalTLSAddress(Op, DAG);

case ISD::SELECT:

return lowerSELECT(Op, DAG);

case ISD::VASTART:

@@ -334,6 +375,81 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,

return lowerFRAMEADDR(Op, DAG);

case ISD::RETURNADDR:

return lowerRETURNADDR(Op, DAG);

+ case ISD::SHL_PARTS:

+ return lowerShiftLeftParts(Op, DAG);

+ case ISD::SRA_PARTS:

+ return lowerShiftRightParts(Op, DAG, true);

+ case ISD::SRL_PARTS:

+ return lowerShiftRightParts(Op, DAG, false);

+ case ISD::BITCAST: {

+ assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() &&

+ "Unexpected custom legalisation");

+ SDLoc DL(Op);

+ SDValue Op0 = Op.getOperand(0);

+ if (Op.getValueType() != MVT::f32 || Op0.getValueType() != MVT::i32)

+ return SDValue();

+ SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);

+ SDValue FPConv = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);

+ return FPConv;

+ }

+static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,

+ SelectionDAG &DAG, unsigned Flags) {

+ return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);

+static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,

+ SelectionDAG &DAG, unsigned Flags) {

+ return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),

+ Flags);

+static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,

+ SelectionDAG &DAG, unsigned Flags) {

+ return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),

+ N->getOffset(), Flags);

+template <class NodeTy>

+SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,

+ bool IsLocal) const {

+ SDLoc DL(N);

+ EVT Ty = getPointerTy(DAG.getDataLayout());

+ if (isPositionIndependent()) {

+ SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);

+ if (IsLocal)

+ // Use PC-relative addressing to access the symbol. This generates the

+ // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))

+ // %pcrel_lo(auipc)).

+ return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);

+ // Use PC-relative addressing to access the GOT for this symbol, then load

+ // the address from the GOT. This generates the pattern (PseudoLA sym),

+ // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).

+ return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);

+ }

+ switch (getTargetMachine().getCodeModel()) {

+ default:

+ report_fatal_error("Unsupported code model for lowering");

+ case CodeModel::Small: {

+ // Generate a sequence for accessing addresses within the first 2 GiB of

+ // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).

+ SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);

+ SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);

+ SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);

+ return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);

+ }

+ case CodeModel::Medium: {

+ // Generate a sequence for accessing addresses within any 2GiB range within

+ // the address space. This generates the pattern (PseudoLLA sym), which

+ // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).

+ SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);

+ return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);

+ }

}

@@ -342,67 +458,145 @@ SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,

SDLoc DL(Op);

EVT Ty = Op.getValueType();

GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);

- const GlobalValue *GV = N->getGlobal();

int64_t Offset = N->getOffset();

MVT XLenVT = Subtarget.getXLenVT();

- if (isPositionIndependent())

- report_fatal_error("Unable to lowerGlobalAddress");

+ const GlobalValue *GV = N->getGlobal();

+ bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);

+ SDValue Addr = getAddr(N, DAG, IsLocal);

// In order to maximise the opportunity for common subexpression elimination,

// emit a separate ADD node for the global address offset instead of folding

// it in the global address node. Later peephole optimisations may choose to

// fold it back in when profitable.

- SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI);

- SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO);

- SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0);

- SDValue MNLo =

- SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0);

if (Offset != 0)

- return DAG.getNode(ISD::ADD, DL, Ty, MNLo,

+ return DAG.getNode(ISD::ADD, DL, Ty, Addr,

DAG.getConstant(Offset, DL, XLenVT));

- return MNLo;

+ return Addr;

}

SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,

SelectionDAG &DAG) const {

- SDLoc DL(Op);

- EVT Ty = Op.getValueType();

BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);

- const BlockAddress *BA = N->getBlockAddress();

- int64_t Offset = N->getOffset();

- if (isPositionIndependent())

- report_fatal_error("Unable to lowerBlockAddress");

- SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI);

- SDValue BALo = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_LO);

- SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, BAHi), 0);

- SDValue MNLo =

- SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, BALo), 0);

- return MNLo;

+ return getAddr(N, DAG);

}

SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,

SelectionDAG &DAG) const {

+ ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);

+ return getAddr(N, DAG);

+SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,

+ SelectionDAG &DAG,

+ bool UseGOT) const {

+ SDLoc DL(N);

+ EVT Ty = getPointerTy(DAG.getDataLayout());

+ const GlobalValue *GV = N->getGlobal();

+ MVT XLenVT = Subtarget.getXLenVT();

+ if (UseGOT) {

+ // Use PC-relative addressing to access the GOT for this TLS symbol, then

+ // load the address from the GOT and add the thread pointer. This generates

+ // the pattern (PseudoLA_TLS_IE sym), which expands to

+ // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).

+ SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);

+ SDValue Load =

+ SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);

+ // Add the thread pointer.

+ SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);

+ return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);

+ }

+ // Generate a sequence for accessing the address relative to the thread

+ // pointer, with the appropriate adjustment for the thread pointer offset.

+ // This generates the pattern

+ // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))

+ SDValue AddrHi =

+ DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);

+ SDValue AddrAdd =

+ DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);

+ SDValue AddrLo =

+ DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);

+ SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);

+ SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);

+ SDValue MNAdd = SDValue(

+ DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),

+ 0);

+ return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);

+SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,

+ SelectionDAG &DAG) const {

+ SDLoc DL(N);

+ EVT Ty = getPointerTy(DAG.getDataLayout());

+ IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());

+ const GlobalValue *GV = N->getGlobal();

+ // Use a PC-relative addressing mode to access the global dynamic GOT address.

+ // This generates the pattern (PseudoLA_TLS_GD sym), which expands to

+ // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).

+ SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);

+ SDValue Load =

+ SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);

+ // Prepare argument list to generate call.

+ ArgListTy Args;

+ ArgListEntry Entry;

+ Entry.Node = Load;

+ Entry.Ty = CallTy;

+ Args.push_back(Entry);

+ // Setup call to __tls_get_addr.

+ TargetLowering::CallLoweringInfo CLI(DAG);

+ CLI.setDebugLoc(DL)

+ .setChain(DAG.getEntryNode())

+ .setLibCallee(CallingConv::C, CallTy,

+ DAG.getExternalSymbol("__tls_get_addr", Ty),

+ std::move(Args));

+ return LowerCallTo(CLI).first;

+SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,

+ SelectionDAG &DAG) const {

SDLoc DL(Op);

EVT Ty = Op.getValueType();

- ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);

- const Constant *CPA = N->getConstVal();

+ GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);

int64_t Offset = N->getOffset();

- unsigned Alignment = N->getAlignment();

- if (!isPositionIndependent()) {

- SDValue CPAHi =

- DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_HI);

- SDValue CPALo =

- DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_LO);

- SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, CPAHi), 0);

- SDValue MNLo =

- SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0);

- return MNLo;

- } else {

- report_fatal_error("Unable to lowerConstantPool");

+ MVT XLenVT = Subtarget.getXLenVT();

+ // Non-PIC TLS lowering should always use the LocalExec model.

+ TLSModel::Model Model = isPositionIndependent()

+ ? getTargetMachine().getTLSModel(N->getGlobal())

+ : TLSModel::LocalExec;

+ SDValue Addr;

+ switch (Model) {

+ case TLSModel::LocalExec:

+ Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);

+ break;

+ case TLSModel::InitialExec:

+ Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);

+ break;

+ case TLSModel::LocalDynamic:

+ case TLSModel::GeneralDynamic:

+ Addr = getDynamicTLSAddr(N, DAG);

+ break;

}

+ // In order to maximise the opportunity for common subexpression elimination,

+ // emit a separate ADD node for the global address offset instead of folding

+ // it in the global address node. Later peephole optimisations may choose to

+ // fold it back in when profitable.

+ if (Offset != 0)

+ return DAG.getNode(ISD::ADD, DL, Ty, Addr,

+ DAG.getConstant(Offset, DL, XLenVT));

+ return Addr;

}

SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {

@@ -513,29 +707,184 @@ SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,

return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);

}

-// Return true if the given node is a shift with a non-constant shift amount.

-static bool isVariableShift(SDValue Val) {

- switch (Val.getOpcode()) {

+SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,

+ SelectionDAG &DAG) const {

+ SDLoc DL(Op);

+ SDValue Lo = Op.getOperand(0);

+ SDValue Hi = Op.getOperand(1);

+ SDValue Shamt = Op.getOperand(2);

+ EVT VT = Lo.getValueType();

+ // if Shamt-XLEN < 0: // Shamt < XLEN

+ // Lo = Lo << Shamt

+ // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))

+ // else:

+ // Lo = 0

+ // Hi = Lo << (Shamt-XLEN)

+ SDValue Zero = DAG.getConstant(0, DL, VT);

+ SDValue One = DAG.getConstant(1, DL, VT);

+ SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);

+ SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);

+ SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);

+ SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);

+ SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);

+ SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);

+ SDValue ShiftRightLo =

+ DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);

+ SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);

+ SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);

+ SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);

+ SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);

+ Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);

+ Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);

+ SDValue Parts[2] = {Lo, Hi};

+ return DAG.getMergeValues(Parts, DL);

+SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,

+ bool IsSRA) const {

+ SDLoc DL(Op);

+ SDValue Lo = Op.getOperand(0);

+ SDValue Hi = Op.getOperand(1);

+ SDValue Shamt = Op.getOperand(2);

+ EVT VT = Lo.getValueType();

+ // SRA expansion:

+ // if Shamt-XLEN < 0: // Shamt < XLEN

+ // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))

+ // Hi = Hi >>s Shamt

+ // else:

+ // Lo = Hi >>s (Shamt-XLEN);

+ // Hi = Hi >>s (XLEN-1)

+ //

+ // SRL expansion:

+ // if Shamt-XLEN < 0: // Shamt < XLEN

+ // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))

+ // Hi = Hi >>u Shamt

+ // else:

+ // Lo = Hi >>u (Shamt-XLEN);

+ // Hi = 0;

+ unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;

+ SDValue Zero = DAG.getConstant(0, DL, VT);

+ SDValue One = DAG.getConstant(1, DL, VT);

+ SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);

+ SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);

+ SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);

+ SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);

+ SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);

+ SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);

+ SDValue ShiftLeftHi =

+ DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);

+ SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);

+ SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);

+ SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);

+ SDValue HiFalse =

+ IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;

+ SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);

+ Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);

+ Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);

+ SDValue Parts[2] = {Lo, Hi};

+ return DAG.getMergeValues(Parts, DL);

+// Returns the opcode of the target-specific SDNode that implements the 32-bit

+// form of the given Opcode.

+static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {

+ switch (Opcode) {

default:

- return false;

+ llvm_unreachable("Unexpected opcode");

case ISD::SHL:

+ return RISCVISD::SLLW;

case ISD::SRA:

+ return RISCVISD::SRAW;

case ISD::SRL:

- return Val.getOperand(1).getOpcode() != ISD::Constant;

+ return RISCVISD::SRLW;

+ case ISD::SDIV:

+ return RISCVISD::DIVW;

+ case ISD::UDIV:

+ return RISCVISD::DIVUW;

+ case ISD::UREM:

+ return RISCVISD::REMUW;

}

-// Returns true if the given node is an sdiv, udiv, or urem with non-constant

-// operands.

-static bool isVariableSDivUDivURem(SDValue Val) {

- switch (Val.getOpcode()) {

+// Converts the given 32-bit operation to a target-specific SelectionDAG node.

+// Because i32 isn't a legal type for RV64, these operations would otherwise

+// be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W

+// later one because the fact the operation was originally of type i32 is

+// lost.

+static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG) {

+ SDLoc DL(N);

+ RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());

+ SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));

+ SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));

+ SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);

+ // ReplaceNodeResults requires we maintain the same type for the return value.

+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);

+void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,

+ SmallVectorImpl<SDValue> &Results,

+ SelectionDAG &DAG) const {

+ SDLoc DL(N);

+ switch (N->getOpcode()) {

default:

- return false;

+ llvm_unreachable("Don't know how to custom type legalize this operation!");

+ case ISD::READCYCLECOUNTER: {

+ assert(!Subtarget.is64Bit() &&

+ "READCYCLECOUNTER only has custom type legalization on riscv32");

+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);

+ SDValue RCW =

+ DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));

+ Results.push_back(RCW);

+ Results.push_back(RCW.getValue(1));

+ Results.push_back(RCW.getValue(2));

+ break;

+ }

+ case ISD::SHL:

+ case ISD::SRA:

+ case ISD::SRL:

+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

+ "Unexpected custom legalisation");

+ if (N->getOperand(1).getOpcode() == ISD::Constant)

+ return;

+ Results.push_back(customLegalizeToWOp(N, DAG));

+ break;

case ISD::SDIV:

case ISD::UDIV:

case ISD::UREM:

- return Val.getOperand(0).getOpcode() != ISD::Constant &&

- Val.getOperand(1).getOpcode() != ISD::Constant;

+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

+ Subtarget.hasStdExtM() && "Unexpected custom legalisation");

+ if (N->getOperand(0).getOpcode() == ISD::Constant ||

+ N->getOperand(1).getOpcode() == ISD::Constant)

+ return;

+ Results.push_back(customLegalizeToWOp(N, DAG));

+ break;

+ case ISD::BITCAST: {

+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&

+ Subtarget.hasStdExtF() && "Unexpected custom legalisation");

+ SDLoc DL(N);

+ SDValue Op0 = N->getOperand(0);

+ if (Op0.getValueType() != MVT::f32)

+ return;

+ SDValue FPConv =

+ DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);

+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));

+ break;

+ }

}

@@ -546,51 +895,225 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,

switch (N->getOpcode()) {

default:

break;

- case ISD::SHL:

- case ISD::SRL:

- case ISD::SRA: {

- assert(Subtarget.getXLen() == 64 && "Combine should be 64-bit only");

- if (!DCI.isBeforeLegalize())

- break;

- SDValue RHS = N->getOperand(1);

- if (N->getValueType(0) != MVT::i32 || RHS->getOpcode() == ISD::Constant ||

- (RHS->getOpcode() == ISD::AssertZext &&

- cast<VTSDNode>(RHS->getOperand(1))->getVT().getSizeInBits() <= 5))

- break;

- SDValue LHS = N->getOperand(0);

- SDLoc DL(N);

- SDValue NewRHS =

- DAG.getNode(ISD::AssertZext, DL, RHS.getValueType(), RHS,

- DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 5)));

- return DCI.CombineTo(

- N, DAG.getNode(N->getOpcode(), DL, LHS.getValueType(), LHS, NewRHS));

- }

- case ISD::ANY_EXTEND: {

- // If any-extending an i32 variable-length shift or sdiv/udiv/urem to i64,

- // then instead sign-extend in order to increase the chance of being able

- // to select the sllw/srlw/sraw/divw/divuw/remuw instructions.

- SDValue Src = N->getOperand(0);

- if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32)

- break;

- if (!isVariableShift(Src) &&

- !(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src)))

- break;

- SDLoc DL(N);

- return DCI.CombineTo(N, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src));

- }

case RISCVISD::SplitF64: {

+ SDValue Op0 = N->getOperand(0);

// If the input to SplitF64 is just BuildPairF64 then the operation is

// redundant. Instead, use BuildPairF64's operands directly.

+ if (Op0->getOpcode() == RISCVISD::BuildPairF64)

+ return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));

+ SDLoc DL(N);

+ // It's cheaper to materialise two 32-bit integers than to load a double

+ // from the constant pool and transfer it to integer registers through the

+ // stack.

+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {

+ APInt V = C->getValueAPF().bitcastToAPInt();

+ SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);

+ SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);

+ return DCI.CombineTo(N, Lo, Hi);

+ }

+ // This is a target-specific version of a DAGCombine performed in

+ // DAGCombiner::visitBITCAST. It performs the equivalent of:

+ // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)

+ // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))

+ if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||

+ !Op0.getNode()->hasOneUse())

+ break;

+ SDValue NewSplitF64 =

+ DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),

+ Op0.getOperand(0));

+ SDValue Lo = NewSplitF64.getValue(0);

+ SDValue Hi = NewSplitF64.getValue(1);

+ APInt SignBit = APInt::getSignMask(32);

+ if (Op0.getOpcode() == ISD::FNEG) {

+ SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,

+ DAG.getConstant(SignBit, DL, MVT::i32));

+ return DCI.CombineTo(N, Lo, NewHi);

+ }

+ assert(Op0.getOpcode() == ISD::FABS);

+ SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,

+ DAG.getConstant(~SignBit, DL, MVT::i32));

+ return DCI.CombineTo(N, Lo, NewHi);

+ }

+ case RISCVISD::SLLW:

+ case RISCVISD::SRAW:

+ case RISCVISD::SRLW: {

+ // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.

+ SDValue LHS = N->getOperand(0);

+ SDValue RHS = N->getOperand(1);

+ APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);

+ APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);

+ if ((SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI)) ||

+ (SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)))

+ return SDValue();

+ break;

+ }

+ case RISCVISD::FMV_X_ANYEXTW_RV64: {

+ SDLoc DL(N);

SDValue Op0 = N->getOperand(0);

- if (Op0->getOpcode() != RISCVISD::BuildPairF64)

+ // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the

+ // conversion is unnecessary and can be replaced with an ANY_EXTEND

+ // of the FMV_W_X_RV64 operand.

+ if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {

+ SDValue AExtOp =

+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0.getOperand(0));

+ return DCI.CombineTo(N, AExtOp);

+ }

+ // This is a target-specific version of a DAGCombine performed in

+ // DAGCombiner::visitBITCAST. It performs the equivalent of:

+ // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)

+ // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))

+ if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||

+ !Op0.getNode()->hasOneUse())

break;

- return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));

+ SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,

+ Op0.getOperand(0));

+ APInt SignBit = APInt::getSignMask(32).sext(64);

+ if (Op0.getOpcode() == ISD::FNEG) {

+ return DCI.CombineTo(N,

+ DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,

+ DAG.getConstant(SignBit, DL, MVT::i64)));

+ }

+ assert(Op0.getOpcode() == ISD::FABS);

+ return DCI.CombineTo(N,

+ DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,

+ DAG.getConstant(~SignBit, DL, MVT::i64)));

}

return SDValue();

}

+bool RISCVTargetLowering::isDesirableToCommuteWithShift(

+ const SDNode *N, CombineLevel Level) const {

+ // The following folds are only desirable if `(OP _, c1 << c2)` can be

+ // materialised in fewer instructions than `(OP _, c1)`:

+ //

+ // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)

+ // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)

+ SDValue N0 = N->getOperand(0);

+ EVT Ty = N0.getValueType();

+ if (Ty.isScalarInteger() &&

+ (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {

+ auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));

+ auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));

+ if (C1 && C2) {

+ APInt C1Int = C1->getAPIntValue();

+ APInt ShiftedC1Int = C1Int << C2->getAPIntValue();

+ // We can materialise `c1 << c2` into an add immediate, so it's "free",

+ // and the combine should happen, to potentially allow further combines

+ // later.

+ if (isLegalAddImmediate(ShiftedC1Int.getSExtValue()))

+ return true;

+ // We can materialise `c1` in an add immediate, so it's "free", and the

+ // combine should be prevented.

+ if (isLegalAddImmediate(C1Int.getSExtValue()))

+ return false;

+ // Neither constant will fit into an immediate, so find materialisation

+ // costs.

+ int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),

+ Subtarget.is64Bit());

+ int ShiftedC1Cost = RISCVMatInt::getIntMatCost(

+ ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());

+ // Materialising `c1` is cheaper than materialising `c1 << c2`, so the

+ // combine should be prevented.

+ if (C1Cost < ShiftedC1Cost)

+ return false;

+ }

+ return true;

+unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(

+ SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,

+ unsigned Depth) const {

+ switch (Op.getOpcode()) {

+ default:

+ break;

+ case RISCVISD::SLLW:

+ case RISCVISD::SRAW:

+ case RISCVISD::SRLW:

+ case RISCVISD::DIVW:

+ case RISCVISD::DIVUW:

+ case RISCVISD::REMUW:

+ // TODO: As the result is sign-extended, this is conservatively correct. A

+ // more precise answer could be calculated for SRAW depending on known

+ // bits in the shift amount.

+ return 33;

+ }

+ return 1;

+MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,

+ MachineBasicBlock *BB) {

+ assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");

+ // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.

+ // Should the count have wrapped while it was being read, we need to try

+ // again.

+ // ...

+ // read:

+ // rdcycleh x3 # load high word of cycle

+ // rdcycle x2 # load low word of cycle

+ // rdcycleh x4 # load high word of cycle

+ // bne x3, x4, read # check if high word reads match, otherwise try again

+ // ...

+ MachineFunction &MF = *BB->getParent();

+ const BasicBlock *LLVM_BB = BB->getBasicBlock();

+ MachineFunction::iterator It = ++BB->getIterator();

+ MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);

+ MF.insert(It, LoopMBB);

+ MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);

+ MF.insert(It, DoneMBB);

+ // Transfer the remainder of BB and its successor edges to DoneMBB.

+ DoneMBB->splice(DoneMBB->begin(), BB,

+ std::next(MachineBasicBlock::iterator(MI)), BB->end());

+ DoneMBB->transferSuccessorsAndUpdatePHIs(BB);

+ BB->addSuccessor(LoopMBB);

+ MachineRegisterInfo &RegInfo = MF.getRegInfo();

+ unsigned ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);

+ unsigned LoReg = MI.getOperand(0).getReg();

+ unsigned HiReg = MI.getOperand(1).getReg();

+ DebugLoc DL = MI.getDebugLoc();

+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();

+ BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)

+ .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)

+ .addReg(RISCV::X0);

+ BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)

+ .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)

+ .addReg(RISCV::X0);

+ BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)

+ .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)

+ .addReg(RISCV::X0);

+ BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))

+ .addReg(HiReg)

+ .addReg(ReadAgainReg)

+ .addMBB(LoopMBB);

+ LoopMBB->addSuccessor(LoopMBB);

+ LoopMBB->addSuccessor(DoneMBB);

+ MI.eraseFromParent();

+ return DoneMBB;

static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,

MachineBasicBlock *BB) {

assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");

@@ -655,24 +1178,21 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,

return BB;

}

-MachineBasicBlock *

-RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,

- MachineBasicBlock *BB) const {

+static bool isSelectPseudo(MachineInstr &MI) {

switch (MI.getOpcode()) {

default:

- llvm_unreachable("Unexpected instr type to insert");

+ return false;

case RISCV::Select_GPR_Using_CC_GPR:

case RISCV::Select_FPR32_Using_CC_GPR:

case RISCV::Select_FPR64_Using_CC_GPR:

- break;

- case RISCV::BuildPairF64Pseudo:

- return emitBuildPairF64Pseudo(MI, BB);

- case RISCV::SplitF64Pseudo:

- return emitSplitF64Pseudo(MI, BB);

+ return true;

}

- // To "insert" a SELECT instruction, we actually have to insert the triangle

- // control-flow pattern. The incoming instruction knows the destination vreg

+static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,

+ MachineBasicBlock *BB) {

+ // To "insert" Select_* instructions, we actually have to insert the triangle

+ // control-flow pattern. The incoming instructions know the destination vreg

// to set, the condition code register to branch on, the true/false values to

// select between, and the condcode to use to select the appropriate branch.

@@ -682,6 +1202,54 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,

// | IfFalseMBB

// | /

// TailMBB

+ //

+ // When we find a sequence of selects we attempt to optimize their emission

+ // by sharing the control flow. Currently we only handle cases where we have

+ // multiple selects with the exact same condition (same LHS, RHS and CC).

+ // The selects may be interleaved with other instructions if the other

+ // instructions meet some requirements we deem safe:

+ // - They are debug instructions. Otherwise,

+ // - They do not have side-effects, do not access memory and their inputs do

+ // not depend on the results of the select pseudo-instructions.

+ // The TrueV/FalseV operands of the selects cannot depend on the result of

+ // previous selects in the sequence.

+ // These conditions could be further relaxed. See the X86 target for a

+ // related approach and more information.

+ unsigned LHS = MI.getOperand(1).getReg();

+ unsigned RHS = MI.getOperand(2).getReg();

+ auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());

+ SmallVector<MachineInstr *, 4> SelectDebugValues;

+ SmallSet<unsigned, 4> SelectDests;

+ SelectDests.insert(MI.getOperand(0).getReg());

+ MachineInstr *LastSelectPseudo = &MI;

+ for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);

+ SequenceMBBI != E; ++SequenceMBBI) {

+ if (SequenceMBBI->isDebugInstr())

+ continue;

+ else if (isSelectPseudo(*SequenceMBBI)) {

+ if (SequenceMBBI->getOperand(1).getReg() != LHS ||

+ SequenceMBBI->getOperand(2).getReg() != RHS ||

+ SequenceMBBI->getOperand(3).getImm() != CC ||

+ SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||

+ SelectDests.count(SequenceMBBI->getOperand(5).getReg()))

+ break;

+ LastSelectPseudo = &*SequenceMBBI;

+ SequenceMBBI->collectDebugValues(SelectDebugValues);

+ SelectDests.insert(SequenceMBBI->getOperand(0).getReg());

+ } else {

+ if (SequenceMBBI->hasUnmodeledSideEffects() ||

+ SequenceMBBI->mayLoadOrStore())

+ break;

+ if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {

+ return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());

+ }))

+ break;

+ }

const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();

const BasicBlock *LLVM_BB = BB->getBasicBlock();

DebugLoc DL = MI.getDebugLoc();

@@ -694,20 +1262,23 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,

F->insert(I, IfFalseMBB);

F->insert(I, TailMBB);

- // Move all remaining instructions to TailMBB.

- TailMBB->splice(TailMBB->begin(), HeadMBB,

- std::next(MachineBasicBlock::iterator(MI)), HeadMBB->end());

+ // Transfer debug instructions associated with the selects to TailMBB.

+ for (MachineInstr *DebugInstr : SelectDebugValues) {

+ TailMBB->push_back(DebugInstr->removeFromParent());

+ }

+ // Move all instructions after the sequence to TailMBB.

+ TailMBB->splice(TailMBB->end(), HeadMBB,

+ std::next(LastSelectPseudo->getIterator()), HeadMBB->end());

// Update machine-CFG edges by transferring all successors of the current

- // block to the new block which will contain the Phi node for the select.

+ // block to the new block which will contain the Phi nodes for the selects.

TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);

// Set the successors for HeadMBB.

HeadMBB->addSuccessor(IfFalseMBB);

HeadMBB->addSuccessor(TailMBB);

// Insert appropriate branch.

- unsigned LHS = MI.getOperand(1).getReg();

- unsigned RHS = MI.getOperand(2).getReg();

- auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());

unsigned Opcode = getBranchOpcodeForIntCondCode(CC);

BuildMI(HeadMBB, DL, TII.get(Opcode))

@@ -718,18 +1289,50 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,

// IfFalseMBB just falls through to TailMBB.

IfFalseMBB->addSuccessor(TailMBB);

- // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]

- BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI),

- MI.getOperand(0).getReg())

- .addReg(MI.getOperand(4).getReg())

- .addMBB(HeadMBB)

- .addReg(MI.getOperand(5).getReg())

- .addMBB(IfFalseMBB);

+ // Create PHIs for all of the select pseudo-instructions.

+ auto SelectMBBI = MI.getIterator();

+ auto SelectEnd = std::next(LastSelectPseudo->getIterator());

+ auto InsertionPoint = TailMBB->begin();

+ while (SelectMBBI != SelectEnd) {

+ auto Next = std::next(SelectMBBI);

+ if (isSelectPseudo(*SelectMBBI)) {

+ // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]

+ BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),

+ TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())

+ .addReg(SelectMBBI->getOperand(4).getReg())

+ .addMBB(HeadMBB)

+ .addReg(SelectMBBI->getOperand(5).getReg())

+ .addMBB(IfFalseMBB);

+ SelectMBBI->eraseFromParent();

+ }

+ SelectMBBI = Next;

+ }

- MI.eraseFromParent(); // The pseudo instruction is gone now.

+ F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);

return TailMBB;

}

+MachineBasicBlock *

+RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,

+ MachineBasicBlock *BB) const {

+ switch (MI.getOpcode()) {

+ default:

+ llvm_unreachable("Unexpected instr type to insert");

+ case RISCV::ReadCycleWide:

+ assert(!Subtarget.is64Bit() &&

+ "ReadCycleWrite is only to be used on riscv32");

+ return emitReadCycleWidePseudo(MI, BB);

+ case RISCV::Select_GPR_Using_CC_GPR:

+ case RISCV::Select_FPR32_Using_CC_GPR:

+ case RISCV::Select_FPR64_Using_CC_GPR:

+ return emitSelectPseudo(MI, BB);

+ case RISCV::BuildPairF64Pseudo:

+ return emitBuildPairF64Pseudo(MI, BB);

+ case RISCV::SplitF64Pseudo:

+ return emitSplitF64Pseudo(MI, BB);

+ }

// Calling Convention Implementation.

// The expectations for frontend ABI lowering vary from target to target.

// Ideally, an LLVM frontend would be able to avoid worrying about many ABI

@@ -759,6 +1362,14 @@ static const MCPhysReg ArgGPRs[] = {

RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,

RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17

};

+static const MCPhysReg ArgFPR32s[] = {

+ RISCV::F10_32, RISCV::F11_32, RISCV::F12_32, RISCV::F13_32,

+ RISCV::F14_32, RISCV::F15_32, RISCV::F16_32, RISCV::F17_32

+};

+static const MCPhysReg ArgFPR64s[] = {

+ RISCV::F10_64, RISCV::F11_64, RISCV::F12_64, RISCV::F13_64,

+ RISCV::F14_64, RISCV::F15_64, RISCV::F16_64, RISCV::F17_64

+};

// Pass a 2*XLEN argument that has been split into two XLEN values through

// registers or the stack as necessary.

@@ -799,22 +1410,59 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,

}

// Implements the RISC-V calling convention. Returns true upon failure.

-static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,

- CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,

- CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) {

+static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,

+ MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,

+ ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,

+ bool IsRet, Type *OrigTy) {

unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();

assert(XLen == 32 || XLen == 64);

MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;

- if (ValVT == MVT::f32) {

- LocVT = MVT::i32;

- LocInfo = CCValAssign::BCvt;

- }

// Any return value split in to more than two values can't be returned

// directly.

if (IsRet && ValNo > 1)

return true;

+ // UseGPRForF32 if targeting one of the soft-float ABIs, if passing a

+ // variadic argument, or if no F32 argument registers are available.

+ bool UseGPRForF32 = true;

+ // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a

+ // variadic argument, or if no F64 argument registers are available.

+ bool UseGPRForF64 = true;

+ switch (ABI) {

+ default:

+ llvm_unreachable("Unexpected ABI");

+ case RISCVABI::ABI_ILP32:

+ case RISCVABI::ABI_LP64:

+ break;

+ case RISCVABI::ABI_ILP32F:

+ case RISCVABI::ABI_LP64F:

+ UseGPRForF32 = !IsFixed;

+ break;

+ case RISCVABI::ABI_ILP32D:

+ case RISCVABI::ABI_LP64D:

+ UseGPRForF32 = !IsFixed;

+ UseGPRForF64 = !IsFixed;

+ break;

+ }

+ if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s))

+ UseGPRForF32 = true;

+ if (State.getFirstUnallocated(ArgFPR64s) == array_lengthof(ArgFPR64s))

+ UseGPRForF64 = true;

+ // From this point on, rely on UseGPRForF32, UseGPRForF64 and similar local

+ // variables rather than directly checking against the target ABI.

+ if (UseGPRForF32 && ValVT == MVT::f32) {

+ LocVT = XLenVT;

+ LocInfo = CCValAssign::BCvt;

+ } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {

+ LocVT = MVT::i64;

+ LocInfo = CCValAssign::BCvt;

+ }

// If this is a variadic argument, the RISC-V calling convention requires

// that it is assigned an 'even' or 'aligned' register if it has 8-byte

// alignment (RV32) or 16-byte alignment (RV64). An aligned register should

@@ -838,8 +1486,9 @@ static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,

assert(PendingLocs.size() == PendingArgFlags.size() &&

"PendingLocs and PendingArgFlags out of sync");

- // Handle passing f64 on RV32D with a soft float ABI.

- if (XLen == 32 && ValVT == MVT::f64) {

+ // Handle passing f64 on RV32D with a soft float ABI or when floating point

+ // registers are exhausted.

+ if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {

assert(!ArgFlags.isSplit() && PendingLocs.empty() &&

"Can't lower f64 if it is split");

// Depending on available argument GPRS, f64 may be passed in a pair of

@@ -888,7 +1537,13 @@ static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,

}

// Allocate to a register if possible, or else a stack slot.

- unsigned Reg = State.AllocateReg(ArgGPRs);

+ unsigned Reg;

+ if (ValVT == MVT::f32 && !UseGPRForF32)

+ Reg = State.AllocateReg(ArgFPR32s, ArgFPR64s);

+ else if (ValVT == MVT::f64 && !UseGPRForF64)

+ Reg = State.AllocateReg(ArgFPR64s, ArgFPR32s);

+ else

+ Reg = State.AllocateReg(ArgGPRs);

unsigned StackOffset = Reg ? 0 : State.AllocateStack(XLen / 8, XLen / 8);

// If we reach this point and PendingLocs is non-empty, we must be at the

@@ -909,15 +1564,17 @@ static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT,

return false;

}

- assert(LocVT == XLenVT && "Expected an XLenVT at this stage");

+ assert((!UseGPRForF32 || !UseGPRForF64 || LocVT == XLenVT) &&

+ "Expected an XLenVT at this stage");

if (Reg) {

State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));

return false;

}

- if (ValVT == MVT::f32) {

- LocVT = MVT::f32;

+ // When an f32 or f64 is passed on the stack, no bit-conversion is needed.

+ if (ValVT == MVT::f32 || ValVT == MVT::f64) {

+ LocVT = ValVT;

LocInfo = CCValAssign::Full;

}

State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));

@@ -940,7 +1597,8 @@ void RISCVTargetLowering::analyzeInputArgs(

else if (Ins[i].isOrigArg())

ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());

- if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,

+ RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();

+ if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,

ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) {

LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "

<< EVT(ArgVT).getEVTString() << '\n');

@@ -960,7 +1618,8 @@ void RISCVTargetLowering::analyzeOutputArgs(

ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;

Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;

- if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full,

+ RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();

+ if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,

ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {

LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "

<< EVT(ArgVT).getEVTString() << "\n");

@@ -979,6 +1638,10 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,

case CCValAssign::Full:

break;

case CCValAssign::BCvt:

+ if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {

+ Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);

+ break;

+ }

Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);

break;

}

@@ -993,8 +1656,24 @@ static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,

MachineRegisterInfo &RegInfo = MF.getRegInfo();

EVT LocVT = VA.getLocVT();

SDValue Val;

+ const TargetRegisterClass *RC;

+ switch (LocVT.getSimpleVT().SimpleTy) {

+ default:

+ llvm_unreachable("Unexpected register type");

+ case MVT::i32:

+ case MVT::i64:

+ RC = &RISCV::GPRRegClass;

+ break;

+ case MVT::f32:

+ RC = &RISCV::FPR32RegClass;

+ break;

+ case MVT::f64:

+ RC = &RISCV::FPR64RegClass;

+ break;

+ }

- unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);

+ unsigned VReg = RegInfo.createVirtualRegister(RC);

RegInfo.addLiveIn(VA.getLocReg(), VReg);

Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);

@@ -1014,6 +1693,10 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,

case CCValAssign::Full:

break;

case CCValAssign::BCvt:

+ if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) {

+ Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);

+ break;

+ }

Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);

break;

}

@@ -1040,6 +1723,7 @@ static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,

llvm_unreachable("Unexpected CCValAssign::LocInfo");

case CCValAssign::Full:

case CCValAssign::Indirect:

+ case CCValAssign::BCvt:

ExtType = ISD::NON_EXTLOAD;

break;

}

@@ -1227,12 +1911,12 @@ SDValue RISCVTargetLowering::LowerFormalArguments(

return Chain;

}

-/// IsEligibleForTailCallOptimization - Check whether the call is eligible

+/// isEligibleForTailCallOptimization - Check whether the call is eligible

/// for tail call optimization.

/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.

-bool RISCVTargetLowering::IsEligibleForTailCallOptimization(

- CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,

- const SmallVector<CCValAssign, 16> &ArgLocs) const {

+bool RISCVTargetLowering::isEligibleForTailCallOptimization(

+ CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,

+ const SmallVector<CCValAssign, 16> &ArgLocs) const {

auto &Callee = CLI.Callee;

auto CalleeCC = CLI.CallConv;

@@ -1335,8 +2019,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,

// Check if it's really possible to do a tail call.

if (IsTailCall)

- IsTailCall = IsEligibleForTailCallOptimization(ArgCCInfo, CLI, MF,

- ArgLocs);

+ IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);

if (IsTailCall)

++NumTailCalls;

@@ -1482,9 +2165,21 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,

// TargetGlobalAddress/TargetExternalSymbol node so that legalize won't

// split it and then direct call can be matched by PseudoCALL.

if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {

- Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, 0);

+ const GlobalValue *GV = S->getGlobal();

+ unsigned OpFlags = RISCVII::MO_CALL;

+ if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))

+ OpFlags = RISCVII::MO_PLT;

+ Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);

} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {

- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, 0);

+ unsigned OpFlags = RISCVII::MO_CALL;

+ if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),

+ nullptr))

+ OpFlags = RISCVII::MO_PLT;

+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);

}

// The first call operand is the chain and the second is the target address.

@@ -1567,8 +2262,9 @@ bool RISCVTargetLowering::CanLowerReturn(

for (unsigned i = 0, e = Outs.size(); i != e; ++i) {

MVT VT = Outs[i].VT;

ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;

- if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags,

- CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr))

+ RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();

+ if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,

+ ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr))

return false;

}

return true;

@@ -1679,6 +2375,24 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {

return "RISCVISD::SplitF64";

case RISCVISD::TAIL:

return "RISCVISD::TAIL";

+ case RISCVISD::SLLW:

+ return "RISCVISD::SLLW";

+ case RISCVISD::SRAW:

+ return "RISCVISD::SRAW";

+ case RISCVISD::SRLW:

+ return "RISCVISD::SRLW";

+ case RISCVISD::DIVW:

+ return "RISCVISD::DIVW";

+ case RISCVISD::DIVUW:

+ return "RISCVISD::DIVUW";

+ case RISCVISD::REMUW:

+ return "RISCVISD::REMUW";

+ case RISCVISD::FMV_W_X_RV64:

+ return "RISCVISD::FMV_W_X_RV64";

+ case RISCVISD::FMV_X_ANYEXTW_RV64:

+ return "RISCVISD::FMV_X_ANYEXTW_RV64";

+ case RISCVISD::READ_CYCLE_WIDE:

+ return "RISCVISD::READ_CYCLE_WIDE";

}

return nullptr;

}

@@ -1701,6 +2415,44 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,

return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);

}

+void RISCVTargetLowering::LowerAsmOperandForConstraint(

+ SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,

+ SelectionDAG &DAG) const {

+ // Currently only support length 1 constraints.

+ if (Constraint.length() == 1) {

+ switch (Constraint[0]) {

+ case 'I':

+ // Validate & create a 12-bit signed immediate operand.

+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {

+ uint64_t CVal = C->getSExtValue();

+ if (isInt<12>(CVal))

+ Ops.push_back(

+ DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));

+ }

+ return;

+ case 'J':

+ // Validate & create an integer zero operand.

+ if (auto *C = dyn_cast<ConstantSDNode>(Op))

+ if (C->getZExtValue() == 0)

+ Ops.push_back(

+ DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));

+ return;

+ case 'K':

+ // Validate & create a 5-bit unsigned immediate operand.

+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {

+ uint64_t CVal = C->getZExtValue();

+ if (isUInt<5>(CVal))

+ Ops.push_back(

+ DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));

+ }

+ return;

+ default:

+ break;

+ }

+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);

Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,

Instruction *Inst,

AtomicOrdering Ord) const {

@@ -1721,6 +2473,12 @@ Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,

TargetLowering::AtomicExpansionKind

RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {

+ // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating

+ // point operations can't be used in an lr/sc sequence without breaking the

+ // forward-progress guarantee.

+ if (AI->isFloatingPointOperation())

+ return AtomicExpansionKind::CmpXChg;

unsigned Size = AI->getType()->getPrimitiveSizeInBits();

if (Size == 8 || Size == 16)

return AtomicExpansionKind::MaskedIntrinsic;

@@ -1728,37 +2486,74 @@ RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {

}

static Intrinsic::ID

-getIntrinsicForMaskedAtomicRMWBinOp32(AtomicRMWInst::BinOp BinOp) {

- switch (BinOp) {

- default:

- llvm_unreachable("Unexpected AtomicRMW BinOp");

- case AtomicRMWInst::Xchg:

- return Intrinsic::riscv_masked_atomicrmw_xchg_i32;

- case AtomicRMWInst::Add:

- return Intrinsic::riscv_masked_atomicrmw_add_i32;

- case AtomicRMWInst::Sub:

- return Intrinsic::riscv_masked_atomicrmw_sub_i32;

- case AtomicRMWInst::Nand:

- return Intrinsic::riscv_masked_atomicrmw_nand_i32;

- case AtomicRMWInst::Max:

- return Intrinsic::riscv_masked_atomicrmw_max_i32;

- case AtomicRMWInst::Min:

- return Intrinsic::riscv_masked_atomicrmw_min_i32;

- case AtomicRMWInst::UMax:

- return Intrinsic::riscv_masked_atomicrmw_umax_i32;

- case AtomicRMWInst::UMin:

- return Intrinsic::riscv_masked_atomicrmw_umin_i32;

+getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {

+ if (XLen == 32) {

+ switch (BinOp) {

+ default:

+ llvm_unreachable("Unexpected AtomicRMW BinOp");

+ case AtomicRMWInst::Xchg:

+ return Intrinsic::riscv_masked_atomicrmw_xchg_i32;

+ case AtomicRMWInst::Add:

+ return Intrinsic::riscv_masked_atomicrmw_add_i32;

+ case AtomicRMWInst::Sub:

+ return Intrinsic::riscv_masked_atomicrmw_sub_i32;

+ case AtomicRMWInst::Nand:

+ return Intrinsic::riscv_masked_atomicrmw_nand_i32;

+ case AtomicRMWInst::Max:

+ return Intrinsic::riscv_masked_atomicrmw_max_i32;

+ case AtomicRMWInst::Min:

+ return Intrinsic::riscv_masked_atomicrmw_min_i32;

+ case AtomicRMWInst::UMax:

+ return Intrinsic::riscv_masked_atomicrmw_umax_i32;

+ case AtomicRMWInst::UMin:

+ return Intrinsic::riscv_masked_atomicrmw_umin_i32;

+ }

+ if (XLen == 64) {

+ switch (BinOp) {

+ default:

+ llvm_unreachable("Unexpected AtomicRMW BinOp");

+ case AtomicRMWInst::Xchg:

+ return Intrinsic::riscv_masked_atomicrmw_xchg_i64;

+ case AtomicRMWInst::Add:

+ return Intrinsic::riscv_masked_atomicrmw_add_i64;

+ case AtomicRMWInst::Sub:

+ return Intrinsic::riscv_masked_atomicrmw_sub_i64;

+ case AtomicRMWInst::Nand:

+ return Intrinsic::riscv_masked_atomicrmw_nand_i64;

+ case AtomicRMWInst::Max:

+ return Intrinsic::riscv_masked_atomicrmw_max_i64;

+ case AtomicRMWInst::Min:

+ return Intrinsic::riscv_masked_atomicrmw_min_i64;

+ case AtomicRMWInst::UMax:

+ return Intrinsic::riscv_masked_atomicrmw_umax_i64;

+ case AtomicRMWInst::UMin:

+ return Intrinsic::riscv_masked_atomicrmw_umin_i64;

+ }

}

+ llvm_unreachable("Unexpected XLen\n");

}

Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(

IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,

Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {

- Value *Ordering = Builder.getInt32(static_cast<uint32_t>(AI->getOrdering()));

+ unsigned XLen = Subtarget.getXLen();

+ Value *Ordering =

+ Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));

Type *Tys[] = {AlignedAddr->getType()};

Function *LrwOpScwLoop = Intrinsic::getDeclaration(

AI->getModule(),

- getIntrinsicForMaskedAtomicRMWBinOp32(AI->getOperation()), Tys);

+ getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);

+ if (XLen == 64) {

+ Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());

+ Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());

+ ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());

+ }

+ Value *Result;

// Must pass the shift amount needed to sign extend the loaded value prior

// to performing a signed comparison for min/max. ShiftAmt is the number of

@@ -1770,13 +2565,18 @@ Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(

const DataLayout &DL = AI->getModule()->getDataLayout();

unsigned ValWidth =

DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());

- Value *SextShamt = Builder.CreateSub(

- Builder.getInt32(Subtarget.getXLen() - ValWidth), ShiftAmt);

- return Builder.CreateCall(LrwOpScwLoop,

- {AlignedAddr, Incr, Mask, SextShamt, Ordering});

+ Value *SextShamt =

+ Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);

+ Result = Builder.CreateCall(LrwOpScwLoop,

+ {AlignedAddr, Incr, Mask, SextShamt, Ordering});

+ } else {

+ Result =

+ Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});

}

- return Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});

+ if (XLen == 64)

+ Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());

+ return Result;

}

TargetLowering::AtomicExpansionKind

@@ -1791,10 +2591,31 @@ RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(

Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(

IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,

Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {

- Value *Ordering = Builder.getInt32(static_cast<uint32_t>(Ord));

+ unsigned XLen = Subtarget.getXLen();

+ Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));

+ Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;

+ if (XLen == 64) {

+ CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());

+ NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());

+ Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());

+ CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;

+ }

Type *Tys[] = {AlignedAddr->getType()};

- Function *MaskedCmpXchg = Intrinsic::getDeclaration(

- CI->getModule(), Intrinsic::riscv_masked_cmpxchg_i32, Tys);

- return Builder.CreateCall(MaskedCmpXchg,

- {AlignedAddr, CmpVal, NewVal, Mask, Ordering});

+ Function *MaskedCmpXchg =

+ Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);

+ Value *Result = Builder.CreateCall(

+ MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});

+ if (XLen == 64)

+ Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());

+ return Result;

+unsigned RISCVTargetLowering::getExceptionPointerRegister(

+ const Constant *PersonalityFn) const {

+ return RISCV::X10;

+unsigned RISCVTargetLowering::getExceptionSelectorRegister(

+ const Constant *PersonalityFn) const {

+ return RISCV::X11;

}