diff options
Diffstat (limited to 'lib/Target/RISCV/RISCVISelLowering.cpp')
-rw-r--r-- | lib/Target/RISCV/RISCVISelLowering.cpp | 882 |
1 files changed, 804 insertions, 78 deletions
diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp index 7d32954936be..87796e5b1097 100644 --- a/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/lib/Target/RISCV/RISCVISelLowering.cpp @@ -14,9 +14,11 @@ #include "RISCVISelLowering.h" #include "RISCV.h" +#include "RISCVMachineFunctionInfo.h" #include "RISCVRegisterInfo.h" #include "RISCVSubtarget.h" #include "RISCVTargetMachine.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -35,6 +37,8 @@ using namespace llvm; #define DEBUG_TYPE "riscv-lower" +STATISTIC(NumTailCalls, "Number of tail calls"); + RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { @@ -44,6 +48,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // Set up the register classes. addRegisterClass(XLenVT, &RISCV::GPRRegClass); + if (Subtarget.hasStdExtF()) + addRegisterClass(MVT::f32, &RISCV::FPR32RegClass); + if (Subtarget.hasStdExtD()) + addRegisterClass(MVT::f64, &RISCV::FPR64RegClass); + // Compute derived properties from the register classes. computeRegisterProperties(STI.getRegisterInfo()); @@ -63,26 +72,28 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + for (auto VT : {MVT::i1, MVT::i8, MVT::i16}) setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); - setOperationAction(ISD::ADDC, XLenVT, Expand); - setOperationAction(ISD::ADDE, XLenVT, Expand); - setOperationAction(ISD::SUBC, XLenVT, Expand); - setOperationAction(ISD::SUBE, XLenVT, Expand); + if (!Subtarget.hasStdExtM()) { + setOperationAction(ISD::MUL, XLenVT, Expand); + setOperationAction(ISD::MULHS, XLenVT, Expand); + setOperationAction(ISD::MULHU, XLenVT, Expand); + setOperationAction(ISD::SDIV, XLenVT, Expand); + setOperationAction(ISD::UDIV, XLenVT, Expand); + setOperationAction(ISD::SREM, XLenVT, Expand); + setOperationAction(ISD::UREM, XLenVT, Expand); + } - setOperationAction(ISD::SREM, XLenVT, Expand); setOperationAction(ISD::SDIVREM, XLenVT, Expand); - setOperationAction(ISD::SDIV, XLenVT, Expand); - setOperationAction(ISD::UREM, XLenVT, Expand); setOperationAction(ISD::UDIVREM, XLenVT, Expand); - setOperationAction(ISD::UDIV, XLenVT, Expand); - - setOperationAction(ISD::MUL, XLenVT, Expand); setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand); - setOperationAction(ISD::MULHS, XLenVT, Expand); - setOperationAction(ISD::MULHU, XLenVT, Expand); setOperationAction(ISD::SHL_PARTS, XLenVT, Expand); setOperationAction(ISD::SRL_PARTS, XLenVT, Expand); @@ -95,19 +106,128 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTLZ, XLenVT, Expand); setOperationAction(ISD::CTPOP, XLenVT, Expand); + ISD::CondCode FPCCToExtend[] = { + ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETO, ISD::SETUEQ, + ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, + ISD::SETGT, ISD::SETGE, ISD::SETNE}; + + if (Subtarget.hasStdExtF()) { + setOperationAction(ISD::FMINNUM, MVT::f32, Legal); + setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); + for (auto CC : FPCCToExtend) + setCondCodeAction(CC, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::f32, Custom); + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + } + + if (Subtarget.hasStdExtD()) { + setOperationAction(ISD::FMINNUM, MVT::f64, Legal); + setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); + for (auto CC : FPCCToExtend) + setCondCodeAction(CC, MVT::f64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + setOperationAction(ISD::SELECT, MVT::f64, Custom); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + } + setOperationAction(ISD::GlobalAddress, XLenVT, Custom); setOperationAction(ISD::BlockAddress, XLenVT, Custom); + setOperationAction(ISD::ConstantPool, XLenVT, Custom); + + if (Subtarget.hasStdExtA()) + setMaxAtomicSizeInBitsSupported(Subtarget.getXLen()); + else + setMaxAtomicSizeInBitsSupported(0); setBooleanContents(ZeroOrOneBooleanContent); // Function alignments (log2). - setMinFunctionAlignment(3); - setPrefFunctionAlignment(3); + unsigned FunctionAlignment = Subtarget.hasStdExtC() ? 1 : 2; + setMinFunctionAlignment(FunctionAlignment); + setPrefFunctionAlignment(FunctionAlignment); // Effectively disable jump table generation. setMinimumJumpTableEntries(INT_MAX); } +EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, + EVT VT) const { + if (!VT.isVector()) + return getPointerTy(DL); + return VT.changeVectorElementTypeToInteger(); +} + +bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, + unsigned AS, + Instruction *I) const { + // No global is ever allowed as a base. + if (AM.BaseGV) + return false; + + // Require a 12-bit signed offset. + if (!isInt<12>(AM.BaseOffs)) + return false; + + switch (AM.Scale) { + case 0: // "r+i" or just "i", depending on HasBaseReg. + break; + case 1: + if (!AM.HasBaseReg) // allow "r+i". + break; + return false; // disallow "r+r" or "r+r+i". + default: + return false; + } + + return true; +} + +bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + return isInt<12>(Imm); +} + +bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const { + return isInt<12>(Imm); +} + +// On RV32, 64-bit integers are split into their high and low parts and held +// in two different registers, so the trunc is free since the low register can +// just be used. +bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { + if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) + return false; + unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); + unsigned DestBits = DstTy->getPrimitiveSizeInBits(); + return (SrcBits == 64 && DestBits == 32); +} + +bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { + if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() || + !SrcVT.isInteger() || !DstVT.isInteger()) + return false; + unsigned SrcBits = SrcVT.getSizeInBits(); + unsigned DestBits = DstVT.getSizeInBits(); + return (SrcBits == 64 && DestBits == 32); +} + +bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { + // Zexts are free if they can be combined with a load. + if (auto *LD = dyn_cast<LoadSDNode>(Val)) { + EVT MemVT = LD->getMemoryVT(); + if ((MemVT == MVT::i8 || MemVT == MVT::i16 || + (Subtarget.is64Bit() && MemVT == MVT::i32)) && + (LD->getExtensionType() == ISD::NON_EXTLOAD || + LD->getExtensionType() == ISD::ZEXTLOAD)) + return true; + } + + return TargetLowering::isZExtFree(Val, VT2); +} + // Changes the condition code and swaps operands if necessary, so the SetCC // operation matches one of the comparisons supported directly in the RISC-V // ISA. @@ -156,8 +276,16 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerGlobalAddress(Op, DAG); case ISD::BlockAddress: return lowerBlockAddress(Op, DAG); + case ISD::ConstantPool: + return lowerConstantPool(Op, DAG); case ISD::SELECT: return lowerSELECT(Op, DAG); + case ISD::VASTART: + return lowerVASTART(Op, DAG); + case ISD::FRAMEADDR: + return LowerFRAMEADDR(Op, DAG); + case ISD::RETURNADDR: + return LowerRETURNADDR(Op, DAG); } } @@ -168,17 +296,22 @@ SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op, GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = N->getGlobal(); int64_t Offset = N->getOffset(); + MVT XLenVT = Subtarget.getXLenVT(); if (isPositionIndependent() || Subtarget.is64Bit()) report_fatal_error("Unable to lowerGlobalAddress"); - - SDValue GAHi = - DAG.getTargetGlobalAddress(GV, DL, Ty, Offset, RISCVII::MO_HI); - SDValue GALo = - DAG.getTargetGlobalAddress(GV, DL, Ty, Offset, RISCVII::MO_LO); + // In order to maximise the opportunity for common subexpression elimination, + // emit a separate ADD node for the global address offset instead of folding + // it in the global address node. Later peephole optimisations may choose to + // fold it back in when profitable. + SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI); + SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO); SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0); SDValue MNLo = SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0); + if (Offset != 0) + return DAG.getNode(ISD::ADD, DL, Ty, MNLo, + DAG.getConstant(Offset, DL, XLenVT)); return MNLo; } @@ -201,6 +334,29 @@ SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, return MNLo; } +SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT Ty = Op.getValueType(); + ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); + const Constant *CPA = N->getConstVal(); + int64_t Offset = N->getOffset(); + unsigned Alignment = N->getAlignment(); + + if (!isPositionIndependent()) { + SDValue CPAHi = + DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_HI); + SDValue CPALo = + DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, RISCVII::MO_LO); + SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, CPAHi), 0); + SDValue MNLo = + SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0); + return MNLo; + } else { + report_fatal_error("Unable to lowerConstantPool"); + } +} + SDValue RISCVTargetLowering::lowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -261,14 +417,153 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(RISCVISD::SELECT_CC, DL, VTs, Ops); } +SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>(); + + SDLoc DL(Op); + SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), + getPointerTy(MF.getDataLayout())); + + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), + MachinePointerInfo(SV)); +} + +SDValue RISCVTargetLowering::LowerFRAMEADDR(SDValue Op, + SelectionDAG &DAG) const { + const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setFrameAddressIsTaken(true); + unsigned FrameReg = RI.getFrameRegister(MF); + int XLenInBytes = Subtarget.getXLen() / 8; + + EVT VT = Op.getValueType(); + SDLoc DL(Op); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + while (Depth--) { + int Offset = -(XLenInBytes * 2); + SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr, + DAG.getIntPtrConstant(Offset, DL)); + FrameAddr = + DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo()); + } + return FrameAddr; +} + +SDValue RISCVTargetLowering::LowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { + const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo(); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MFI.setReturnAddressIsTaken(true); + MVT XLenVT = Subtarget.getXLenVT(); + int XLenInBytes = Subtarget.getXLen() / 8; + + if (verifyReturnAddressArgumentIsConstant(Op, DAG)) + return SDValue(); + + EVT VT = Op.getValueType(); + SDLoc DL(Op); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + if (Depth) { + int Off = -XLenInBytes; + SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); + SDValue Offset = DAG.getConstant(Off, DL, VT); + return DAG.getLoad(VT, DL, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), + MachinePointerInfo()); + } + + // Return the value of the return address register, marking it an implicit + // live-in. + unsigned Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT)); + return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT); +} + +static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, + MachineBasicBlock *BB) { + assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction"); + + MachineFunction &MF = *BB->getParent(); + DebugLoc DL = MI.getDebugLoc(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + unsigned LoReg = MI.getOperand(0).getReg(); + unsigned HiReg = MI.getOperand(1).getReg(); + unsigned SrcReg = MI.getOperand(2).getReg(); + const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass; + int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(); + + TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC, + RI); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), + MachineMemOperand::MOLoad, 8, 8); + BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); + BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg) + .addFrameIndex(FI) + .addImm(4) + .addMemOperand(MMO); + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, + MachineBasicBlock *BB) { + assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo && + "Unexpected instruction"); + + MachineFunction &MF = *BB->getParent(); + DebugLoc DL = MI.getDebugLoc(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned LoReg = MI.getOperand(1).getReg(); + unsigned HiReg = MI.getOperand(2).getReg(); + const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass; + int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(); + + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI), + MachineMemOperand::MOStore, 8, 8); + BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) + .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill())) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); + BuildMI(*BB, MI, DL, TII.get(RISCV::SW)) + .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill())) + .addFrameIndex(FI) + .addImm(4) + .addMemOperand(MMO); + TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI); + MI.eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + MachineBasicBlock * RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { - const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); - DebugLoc DL = MI.getDebugLoc(); - - assert(MI.getOpcode() == RISCV::Select_GPR_Using_CC_GPR && - "Unexpected instr type to insert"); + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected instr type to insert"); + case RISCV::Select_GPR_Using_CC_GPR: + case RISCV::Select_FPR32_Using_CC_GPR: + case RISCV::Select_FPR64_Using_CC_GPR: + break; + case RISCV::BuildPairF64Pseudo: + return emitBuildPairF64Pseudo(MI, BB); + case RISCV::SplitF64Pseudo: + return emitSplitF64Pseudo(MI, BB); + } // To "insert" a SELECT instruction, we actually have to insert the triangle // control-flow pattern. The incoming instruction knows the destination vreg @@ -281,7 +576,9 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // | IfFalseMBB // | / // TailMBB + const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); + DebugLoc DL = MI.getDebugLoc(); MachineFunction::iterator I = ++BB->getIterator(); MachineBasicBlock *HeadMBB = BB; @@ -398,19 +695,36 @@ static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, // Implements the RISC-V calling convention. Returns true upon failure. static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State, bool IsFixed, bool IsRet) { + CCState &State, bool IsFixed, bool IsRet, Type *OrigTy) { unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); assert(XLen == 32 || XLen == 64); MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64; - assert(ValVT == XLenVT && "Unexpected ValVT"); - assert(LocVT == XLenVT && "Unexpected LocVT"); - assert(IsFixed && "Vararg support not yet implemented"); + if (ValVT == MVT::f32) { + LocVT = MVT::i32; + LocInfo = CCValAssign::BCvt; + } // Any return value split in to more than two values can't be returned // directly. if (IsRet && ValNo > 1) return true; + // If this is a variadic argument, the RISC-V calling convention requires + // that it is assigned an 'even' or 'aligned' register if it has 8-byte + // alignment (RV32) or 16-byte alignment (RV64). An aligned register should + // be used regardless of whether the original argument was split during + // legalisation or not. The argument will not be passed by registers if the + // original type is larger than 2*XLEN, so the register alignment rule does + // not apply. + unsigned TwoXLenInBytes = (2 * XLen) / 8; + if (!IsFixed && ArgFlags.getOrigAlign() == TwoXLenInBytes && + DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { + unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); + // Skip 'odd' register if necessary. + if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1) + State.AllocateReg(ArgGPRs); + } + SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs(); SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags = State.getPendingArgFlags(); @@ -418,6 +732,28 @@ static bool CC_RISCV(const DataLayout &DL, unsigned ValNo, MVT ValVT, MVT LocVT, assert(PendingLocs.size() == PendingArgFlags.size() && "PendingLocs and PendingArgFlags out of sync"); + // Handle passing f64 on RV32D with a soft float ABI. + if (XLen == 32 && ValVT == MVT::f64) { + assert(!ArgFlags.isSplit() && PendingLocs.empty() && + "Can't lower f64 if it is split"); + // Depending on available argument GPRS, f64 may be passed in a pair of + // GPRs, split between a GPR and the stack, or passed completely on the + // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these + // cases. + unsigned Reg = State.AllocateReg(ArgGPRs); + LocVT = MVT::i32; + if (!Reg) { + unsigned StackOffset = State.AllocateStack(8, 8); + State.addLoc( + CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo)); + return false; + } + if (!State.AllocateReg(ArgGPRs)) + State.AllocateStack(4, 4); + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); + return false; + } + // Split arguments might be passed indirectly, so keep track of the pending // values. if (ArgFlags.isSplit() || !PendingLocs.empty()) { @@ -482,15 +818,22 @@ void RISCVTargetLowering::analyzeInputArgs( MachineFunction &MF, CCState &CCInfo, const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const { unsigned NumArgs = Ins.size(); + FunctionType *FType = MF.getFunction().getFunctionType(); for (unsigned i = 0; i != NumArgs; ++i) { MVT ArgVT = Ins[i].VT; ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; + Type *ArgTy = nullptr; + if (IsRet) + ArgTy = FType->getReturnType(); + else if (Ins[i].isOrigArg()) + ArgTy = FType->getParamType(Ins[i].getOrigArgIndex()); + if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, - ArgFlags, CCInfo, /*IsRet=*/true, IsRet)) { - DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " - << EVT(ArgVT).getEVTString() << '\n'); + ArgFlags, CCInfo, /*IsRet=*/true, IsRet, ArgTy)) { + LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " + << EVT(ArgVT).getEVTString() << '\n'); llvm_unreachable(nullptr); } } @@ -498,17 +841,19 @@ void RISCVTargetLowering::analyzeInputArgs( void RISCVTargetLowering::analyzeOutputArgs( MachineFunction &MF, CCState &CCInfo, - const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet) const { + const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet, + CallLoweringInfo *CLI) const { unsigned NumArgs = Outs.size(); for (unsigned i = 0; i != NumArgs; i++) { MVT ArgVT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr; if (CC_RISCV(MF.getDataLayout(), i, ArgVT, ArgVT, CCValAssign::Full, - ArgFlags, CCInfo, Outs[i].IsFixed, IsRet)) { - DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " - << EVT(ArgVT).getEVTString() << "\n"); + ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) { + LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " + << EVT(ArgVT).getEVTString() << "\n"); llvm_unreachable(nullptr); } } @@ -521,6 +866,7 @@ static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); EVT LocVT = VA.getLocVT(); + EVT ValVT = VA.getValVT(); SDValue Val; unsigned VReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); @@ -532,8 +878,12 @@ static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain, llvm_unreachable("Unexpected CCValAssign::LocInfo"); case CCValAssign::Full: case CCValAssign::Indirect: - return Val; + break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val); + break; } + return Val; } // The caller is responsible for loading the full value if the argument is @@ -565,6 +915,43 @@ static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain, return Val; } +static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain, + const CCValAssign &VA, const SDLoc &DL) { + assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 && + "Unexpected VA"); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + + if (VA.isMemLoc()) { + // f64 is passed on the stack. + int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true); + SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); + return DAG.getLoad(MVT::f64, DL, Chain, FIN, + MachinePointerInfo::getFixedStack(MF, FI)); + } + + assert(VA.isRegLoc() && "Expected register VA assignment"); + + unsigned LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); + RegInfo.addLiveIn(VA.getLocReg(), LoVReg); + SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32); + SDValue Hi; + if (VA.getLocReg() == RISCV::X17) { + // Second half of f64 is passed on the stack. + int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true); + SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); + Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN, + MachinePointerInfo::getFixedStack(MF, FI)); + } else { + // Second half of f64 is passed in another GPR. + unsigned HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); + RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg); + Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32); + } + return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); +} + // Transform physical registers into virtual registers. SDValue RISCVTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, @@ -580,11 +967,26 @@ SDValue RISCVTargetLowering::LowerFormalArguments( } MachineFunction &MF = DAG.getMachineFunction(); - MVT XLenVT = Subtarget.getXLenVT(); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - if (IsVarArg) - report_fatal_error("VarArg not supported"); + const Function &Func = MF.getFunction(); + if (Func.hasFnAttribute("interrupt")) { + if (!Func.arg_empty()) + report_fatal_error( + "Functions with the interrupt attribute cannot have arguments!"); + + StringRef Kind = + MF.getFunction().getFnAttribute("interrupt").getValueAsString(); + + if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine")) + report_fatal_error( + "Function interrupt attribute argument not supported!"); + } + + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + MVT XLenVT = Subtarget.getXLenVT(); + unsigned XLenInBytes = Subtarget.getXLen() / 8; + // Used with vargs to acumulate store chains. + std::vector<SDValue> OutChains; // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; @@ -595,7 +997,11 @@ SDValue RISCVTargetLowering::LowerFormalArguments( CCValAssign &VA = ArgLocs[i]; assert(VA.getLocVT() == XLenVT && "Unhandled argument type"); SDValue ArgValue; - if (VA.isRegLoc()) + // Passing f64 on RV32D with a soft float ABI must be handled as a special + // case. + if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) + ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL); + else if (VA.isRegLoc()) ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL); else ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL); @@ -621,9 +1027,155 @@ SDValue RISCVTargetLowering::LowerFormalArguments( } InVals.push_back(ArgValue); } + + if (IsVarArg) { + ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs); + unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); + const TargetRegisterClass *RC = &RISCV::GPRRegClass; + MachineFrameInfo &MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>(); + + // Offset of the first variable argument from stack pointer, and size of + // the vararg save area. For now, the varargs save area is either zero or + // large enough to hold a0-a7. + int VaArgOffset, VarArgsSaveSize; + + // If all registers are allocated, then all varargs must be passed on the + // stack and we don't need to save any argregs. + if (ArgRegs.size() == Idx) { + VaArgOffset = CCInfo.getNextStackOffset(); + VarArgsSaveSize = 0; + } else { + VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx); + VaArgOffset = -VarArgsSaveSize; + } + + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); + RVFI->setVarArgsFrameIndex(FI); + + // If saving an odd number of registers then create an extra stack slot to + // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures + // offsets to even-numbered registered remain 2*XLEN-aligned. + if (Idx % 2) { + FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, + true); + VarArgsSaveSize += XLenInBytes; + } + + // Copy the integer registers that may have been used for passing varargs + // to the vararg save area. + for (unsigned I = Idx; I < ArgRegs.size(); + ++I, VaArgOffset += XLenInBytes) { + const unsigned Reg = RegInfo.createVirtualRegister(RC); + RegInfo.addLiveIn(ArgRegs[I], Reg); + SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT); + FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true); + SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); + SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, + MachinePointerInfo::getFixedStack(MF, FI)); + cast<StoreSDNode>(Store.getNode()) + ->getMemOperand() + ->setValue((Value *)nullptr); + OutChains.push_back(Store); + } + RVFI->setVarArgsSaveSize(VarArgsSaveSize); + } + + // All stores are grouped in one node to allow the matching between + // the size of Ins and InVals. This only happens for vararg functions. + if (!OutChains.empty()) { + OutChains.push_back(Chain); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); + } + return Chain; } +/// IsEligibleForTailCallOptimization - Check whether the call is eligible +/// for tail call optimization. +/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization. +bool RISCVTargetLowering::IsEligibleForTailCallOptimization( + CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, + const SmallVector<CCValAssign, 16> &ArgLocs) const { + + auto &Callee = CLI.Callee; + auto CalleeCC = CLI.CallConv; + auto IsVarArg = CLI.IsVarArg; + auto &Outs = CLI.Outs; + auto &Caller = MF.getFunction(); + auto CallerCC = Caller.getCallingConv(); + + // Do not tail call opt functions with "disable-tail-calls" attribute. + if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true") + return false; + + // Exception-handling functions need a special set of instructions to + // indicate a return to the hardware. Tail-calling another function would + // probably break this. + // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This + // should be expanded as new function attributes are introduced. + if (Caller.hasFnAttribute("interrupt")) + return false; + + // Do not tail call opt functions with varargs. + if (IsVarArg) + return false; + + // Do not tail call opt if the stack is used to pass parameters. + if (CCInfo.getNextStackOffset() != 0) + return false; + + // Do not tail call opt if any parameters need to be passed indirectly. + // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are + // passed indirectly. So the address of the value will be passed in a + // register, or if not available, then the address is put on the stack. In + // order to pass indirectly, space on the stack often needs to be allocated + // in order to store the value. In this case the CCInfo.getNextStackOffset() + // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs + // are passed CCValAssign::Indirect. + for (auto &VA : ArgLocs) + if (VA.getLocInfo() == CCValAssign::Indirect) + return false; + + // Do not tail call opt if either caller or callee uses struct return + // semantics. + auto IsCallerStructRet = Caller.hasStructRetAttr(); + auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); + if (IsCallerStructRet || IsCalleeStructRet) + return false; + + // Externally-defined functions with weak linkage should not be + // tail-called. The behaviour of branch instructions in this situation (as + // used for tail calls) is implementation-defined, so we cannot rely on the + // linker replacing the tail call with a return. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + const GlobalValue *GV = G->getGlobal(); + if (GV->hasExternalWeakLinkage()) + return false; + } + + // The callee has to preserve all registers the caller needs to preserve. + const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); + if (CalleeCC != CallerCC) { + const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); + if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) + return false; + } + + // Byval parameters hand the function a pointer directly into the stack area + // we want to reuse during a tail call. Working around this *is* possible + // but less efficient and uglier in LowerCall. + for (auto &Arg : Outs) + if (Arg.Flags.isByVal()) + return false; + + return true; +} + // Lower a call to a callseq_start + CALL + callseq_end chain, and add input // and output parameter nodes. SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, @@ -635,22 +1187,29 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; - CLI.IsTailCall = false; + bool &IsTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; EVT PtrVT = getPointerTy(DAG.getDataLayout()); MVT XLenVT = Subtarget.getXLenVT(); - if (IsVarArg) { - report_fatal_error("LowerCall with varargs not implemented"); - } - MachineFunction &MF = DAG.getMachineFunction(); // Analyze the operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false); + analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); + + // Check if it's really possible to do a tail call. + if (IsTailCall) + IsTailCall = IsEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, + ArgLocs); + + if (IsTailCall) + ++NumTailCalls; + else if (CLI.CS && CLI.CS.isMustTailCall()) + report_fatal_error("failed to perform tail call elimination on a call " + "site marked musttail"); // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = ArgCCInfo.getNextStackOffset(); @@ -673,12 +1232,13 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Align, /*IsVolatile=*/false, /*AlwaysInline=*/false, - /*isTailCall=*/false, MachinePointerInfo(), + IsTailCall, MachinePointerInfo(), MachinePointerInfo()); ByValArgs.push_back(FIPtr); } - Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); + if (!IsTailCall) + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); // Copy argument values to their designated locations. SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; @@ -689,11 +1249,45 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue ArgValue = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; + // Handle passing f64 on RV32D with a soft float ABI as a special case. + bool IsF64OnRV32DSoftABI = + VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64; + if (IsF64OnRV32DSoftABI && VA.isRegLoc()) { + SDValue SplitF64 = DAG.getNode( + RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue); + SDValue Lo = SplitF64.getValue(0); + SDValue Hi = SplitF64.getValue(1); + + unsigned RegLo = VA.getLocReg(); + RegsToPass.push_back(std::make_pair(RegLo, Lo)); + + if (RegLo == RISCV::X17) { + // Second half of f64 is passed on the stack. + // Work out the address of the stack slot. + if (!StackPtr.getNode()) + StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT); + // Emit the store. + MemOpChains.push_back( + DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo())); + } else { + // Second half of f64 is passed in another GPR. + unsigned RegHigh = RegLo + 1; + RegsToPass.push_back(std::make_pair(RegHigh, Hi)); + } + continue; + } + + // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way + // as any other MemLoc. + // Promote the value if needed. // For now, only handle fully promoted and indirect arguments. switch (VA.getLocInfo()) { case CCValAssign::Full: break; + case CCValAssign::BCvt: + ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), ArgValue); + break; case CCValAssign::Indirect: { // Store the argument in a stack slot and pass its address. SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT); @@ -731,6 +1325,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); } else { assert(VA.isMemLoc() && "Argument not register or memory"); + assert(!IsTailCall && "Tail call not allowed if stack is used " + "for passing parameters"); // Work out the address of the stack slot. if (!StackPtr.getNode()) @@ -757,10 +1353,13 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, Glue = Chain.getValue(1); } - if (isa<GlobalAddressSDNode>(Callee)) { - Callee = lowerGlobalAddress(Callee, DAG); - } else if (isa<ExternalSymbolSDNode>(Callee)) { - Callee = lowerExternalSymbol(Callee, DAG); + // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a + // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't + // split it and then direct call can be matched by PseudoCALL. + if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) { + Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, 0); + } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, 0); } // The first call operand is the chain and the second is the target address. @@ -773,11 +1372,13 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, for (auto &Reg : RegsToPass) Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); - // Add a register mask operand representing the call-preserved registers. - const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); + if (!IsTailCall) { + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + } // Glue the call to the argument copies, if any. if (Glue.getNode()) @@ -785,6 +1386,12 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, // Emit the call. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + if (IsTailCall) { + MF.getFrameInfo().setHasTailCall(); + return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops); + } + Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops); Glue = Chain.getValue(1); @@ -802,13 +1409,32 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, // Copy all of the result registers out of their specified physreg. for (auto &VA : RVLocs) { - // Copy the value out, gluing the copy to the end of the call sequence. - SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), - VA.getLocVT(), Glue); + // Copy the value out + SDValue RetValue = + DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); + // Glue the RetValue to the end of the call sequence Chain = RetValue.getValue(1); Glue = RetValue.getValue(2); + if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { + assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment"); + SDValue RetValue2 = + DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue); + Chain = RetValue2.getValue(1); + Glue = RetValue2.getValue(2); + RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue, + RetValue2); + } + + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::BCvt: + RetValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), RetValue); + break; + } - assert(VA.getLocInfo() == CCValAssign::Full && "Unknown loc info!"); InVals.push_back(RetValue); } @@ -824,22 +1450,34 @@ bool RISCVTargetLowering::CanLowerReturn( MVT VT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; if (CC_RISCV(MF.getDataLayout(), i, VT, VT, CCValAssign::Full, ArgFlags, - CCInfo, /*IsFixed=*/true, /*IsRet=*/true)) + CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr)) return false; } return true; } +static SDValue packIntoRegLoc(SelectionDAG &DAG, SDValue Val, + const CCValAssign &VA, const SDLoc &DL) { + EVT LocVT = VA.getLocVT(); + + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unexpected CCValAssign::LocInfo"); + case CCValAssign::Full: + break; + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val); + break; + } + return Val; +} + SDValue RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { - if (IsVarArg) { - report_fatal_error("VarArg not supported"); - } - // Stores the assignment of the return value to a location. SmallVector<CCValAssign, 16> RVLocs; @@ -847,9 +1485,10 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); - analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true); + analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, + nullptr); - SDValue Flag; + SDValue Glue; SmallVector<SDValue, 4> RetOps(1, Chain); // Copy the result values into the output registers. @@ -857,21 +1496,60 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SDValue Val = OutVals[i]; CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - assert(VA.getLocInfo() == CCValAssign::Full && - "Unexpected CCValAssign::LocInfo"); - Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag); + if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) { + // Handle returning f64 on RV32D with a soft float ABI. + assert(VA.isRegLoc() && "Expected return via registers"); + SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL, + DAG.getVTList(MVT::i32, MVT::i32), Val); + SDValue Lo = SplitF64.getValue(0); + SDValue Hi = SplitF64.getValue(1); + unsigned RegLo = VA.getLocReg(); + unsigned RegHi = RegLo + 1; + Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue); + Glue = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(RegLo, MVT::i32)); + Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue); + Glue = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(RegHi, MVT::i32)); + } else { + // Handle a 'normal' return. + Val = packIntoRegLoc(DAG, Val, VA, DL); + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue); - // Guarantee that all emitted copies are stuck together. - Flag = Chain.getValue(1); - RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + // Guarantee that all emitted copies are stuck together. + Glue = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } } RetOps[0] = Chain; // Update chain. - // Add the flag if we have it. - if (Flag.getNode()) { - RetOps.push_back(Flag); + // Add the glue node if we have it. + if (Glue.getNode()) { + RetOps.push_back(Glue); + } + + // Interrupt service routines use different return instructions. + const Function &Func = DAG.getMachineFunction().getFunction(); + if (Func.hasFnAttribute("interrupt")) { + if (!Func.getReturnType()->isVoidTy()) + report_fatal_error( + "Functions with the interrupt attribute must have void return type!"); + + MachineFunction &MF = DAG.getMachineFunction(); + StringRef Kind = + MF.getFunction().getFnAttribute("interrupt").getValueAsString(); + + unsigned RetOpc; + if (Kind == "user") + RetOpc = RISCVISD::URET_FLAG; + else if (Kind == "supervisor") + RetOpc = RISCVISD::SRET_FLAG; + else + RetOpc = RISCVISD::MRET_FLAG; + + return DAG.getNode(RetOpc, DL, MVT::Other, RetOps); } return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps); @@ -883,10 +1561,58 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { break; case RISCVISD::RET_FLAG: return "RISCVISD::RET_FLAG"; + case RISCVISD::URET_FLAG: + return "RISCVISD::URET_FLAG"; + case RISCVISD::SRET_FLAG: + return "RISCVISD::SRET_FLAG"; + case RISCVISD::MRET_FLAG: + return "RISCVISD::MRET_FLAG"; case RISCVISD::CALL: return "RISCVISD::CALL"; case RISCVISD::SELECT_CC: return "RISCVISD::SELECT_CC"; + case RISCVISD::BuildPairF64: + return "RISCVISD::BuildPairF64"; + case RISCVISD::SplitF64: + return "RISCVISD::SplitF64"; + case RISCVISD::TAIL: + return "RISCVISD::TAIL"; } return nullptr; } + +std::pair<unsigned, const TargetRegisterClass *> +RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, + StringRef Constraint, + MVT VT) const { + // First, see if this is a constraint that directly corresponds to a + // RISCV register class. + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + return std::make_pair(0U, &RISCV::GPRRegClass); + default: + break; + } + } + + return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); +} + +Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { + if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent) + return Builder.CreateFence(Ord); + if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord)) + return Builder.CreateFence(AtomicOrdering::Release); + return nullptr; +} + +Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder, + Instruction *Inst, + AtomicOrdering Ord) const { + if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord)) + return Builder.CreateFence(AtomicOrdering::Acquire); + return nullptr; +} |