diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-08-24 17:43:23 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-08-24 17:43:23 +0000 |
commit | 75b4d546cd9d9f62a5a3466e6df5629262aef7b1 (patch) | |
tree | b141eb279efd6e78815df4a94ec3b5e5a8b1bf7f /contrib/llvm-project/llvm/lib/Target/AArch64 | |
parent | d065b3eb9ed6785a19dc7d96fb34a0050747c663 (diff) | |
parent | bdc6feb28f528ee3a365ca97577f7312ffa0dc65 (diff) | |
download | src-test2-75b4d546cd9d9f62a5a3466e6df5629262aef7b1.tar.gz src-test2-75b4d546cd9d9f62a5a3466e6df5629262aef7b1.zip |
Notes
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AArch64')
9 files changed, 200 insertions, 26 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 4789a9f02937..83653dcbb8cf 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -148,6 +148,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -399,12 +400,102 @@ static bool ShouldSignReturnAddress(MachineFunction &MF) { return false; } +// Convenience function to create a DWARF expression for +// Expr + NumBytes + NumVGScaledBytes * AArch64::VG +static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, + int NumBytes, int NumVGScaledBytes, unsigned VG, + llvm::raw_string_ostream &Comment) { + uint8_t buffer[16]; + + if (NumBytes) { + Expr.push_back(dwarf::DW_OP_consts); + Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer)); + Expr.push_back((uint8_t)dwarf::DW_OP_plus); + Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes); + } + + if (NumVGScaledBytes) { + Expr.push_back((uint8_t)dwarf::DW_OP_consts); + Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer)); + + Expr.push_back((uint8_t)dwarf::DW_OP_bregx); + Expr.append(buffer, buffer + encodeULEB128(VG, buffer)); + Expr.push_back(0); + + Expr.push_back((uint8_t)dwarf::DW_OP_mul); + Expr.push_back((uint8_t)dwarf::DW_OP_plus); + + Comment << (NumVGScaledBytes < 0 ? " - " : " + ") + << std::abs(NumVGScaledBytes) << " * VG"; + } +} + +// Creates an MCCFIInstruction: +// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr } +MCCFIInstruction AArch64FrameLowering::createDefCFAExpressionFromSP( + const TargetRegisterInfo &TRI, const StackOffset &OffsetFromSP) const { + int64_t NumBytes, NumVGScaledBytes; + OffsetFromSP.getForDwarfOffset(NumBytes, NumVGScaledBytes); + + std::string CommentBuffer = "sp"; + llvm::raw_string_ostream Comment(CommentBuffer); + + // Build up the expression (SP + NumBytes + NumVGScaledBytes * AArch64::VG) + SmallString<64> Expr; + Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + /*SP*/ 31)); + Expr.push_back(0); + appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes, + TRI.getDwarfRegNum(AArch64::VG, true), Comment); + + // Wrap this into DW_CFA_def_cfa. + SmallString<64> DefCfaExpr; + DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); + uint8_t buffer[16]; + DefCfaExpr.append(buffer, + buffer + encodeULEB128(Expr.size(), buffer)); + DefCfaExpr.append(Expr.str()); + return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), + Comment.str()); +} + +MCCFIInstruction AArch64FrameLowering::createCfaOffset( + const TargetRegisterInfo &TRI, unsigned Reg, + const StackOffset &OffsetFromDefCFA) const { + int64_t NumBytes, NumVGScaledBytes; + OffsetFromDefCFA.getForDwarfOffset(NumBytes, NumVGScaledBytes); + + unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); + + // Non-scalable offsets can use DW_CFA_offset directly. + if (!NumVGScaledBytes) + return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes); + + std::string CommentBuffer; + llvm::raw_string_ostream Comment(CommentBuffer); + Comment << printReg(Reg, &TRI) << " @ cfa"; + + // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG) + SmallString<64> OffsetExpr; + appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes, + TRI.getDwarfRegNum(AArch64::VG, true), Comment); + + // Wrap this into DW_CFA_expression + SmallString<64> CfaExpr; + CfaExpr.push_back(dwarf::DW_CFA_expression); + uint8_t buffer[16]; + CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); + CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer)); + CfaExpr.append(OffsetExpr.str()); + + return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), Comment.str()); +} + void AArch64FrameLowering::emitCalleeSavedFrameMoves( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetSubtargetInfo &STI = MF.getSubtarget(); - const MCRegisterInfo *MRI = STI.getRegisterInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); const TargetInstrInfo *TII = STI.getInstrInfo(); DebugLoc DL = MBB.findDebugLoc(MBBI); @@ -415,11 +506,26 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves( for (const auto &Info : CSI) { unsigned Reg = Info.getReg(); - int64_t Offset = - MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea(); - unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + + // Not all unwinders may know about SVE registers, so assume the lowest + // common demoninator. + unsigned NewReg; + if (static_cast<const AArch64RegisterInfo *>(TRI)->regNeedsCFI(Reg, NewReg)) + Reg = NewReg; + else + continue; + + StackOffset Offset; + if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::SVEVector) { + AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); + Offset = StackOffset(MFI.getObjectOffset(Info.getFrameIdx()), MVT::nxv1i8) - + StackOffset(AFI->getCalleeSavedStackSize(MFI), MVT::i8); + } else { + Offset = {MFI.getObjectOffset(Info.getFrameIdx()) - + getOffsetOfLocalArea(), + MVT::i8}; + } + unsigned CFIIndex = MF.addFrameInst(createCfaOffset(*TRI, Reg, Offset)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); @@ -1383,9 +1489,18 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { - // Encode the stack size of the leaf function. - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize())); + unsigned CFIIndex; + if (SVEStackSize) { + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + StackOffset TotalSize = + SVEStackSize + StackOffset((int64_t)MFI.getStackSize(), MVT::i8); + CFIIndex = MF.addFrameInst(createDefCFAExpressionFromSP(TRI, TotalSize)); + } else { + // Encode the stack size of the leaf function. + CFIIndex = MF.addFrameInst( + MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize())); + } BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); @@ -2006,6 +2121,7 @@ static void computeCalleeSaveRegisterPairs( // available unwind codes. This flag assures that the alignment fixup is done // only once, as intened. bool FixupDone = false; + for (unsigned i = 0; i < Count; ++i) { RegPairInfo RPI; RPI.Reg1 = CSI[i].getReg(); diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 444740cb50ab..1ca8c3e9e2bf 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -18,6 +18,8 @@ namespace llvm { +class MCCFIInstruction; + class AArch64FrameLowering : public TargetFrameLowering { public: explicit AArch64FrameLowering() @@ -119,6 +121,11 @@ private: int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF, int &MinCSFrameIndex, int &MaxCSFrameIndex) const; + MCCFIInstruction + createDefCFAExpressionFromSP(const TargetRegisterInfo &TRI, + const StackOffset &OffsetFromSP) const; + MCCFIInstruction createCfaOffset(const TargetRegisterInfo &MRI, unsigned DwarfReg, + const StackOffset &OffsetFromDefCFA) const; bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB, unsigned StackBumpBytes) const; }; diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1500da2fdfc7..45bfa85bdc07 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4107,6 +4107,7 @@ static bool canGuaranteeTCO(CallingConv::ID CC) { static bool mayTailCallThisCC(CallingConv::ID CC) { switch (CC) { case CallingConv::C: + case CallingConv::AArch64_SVE_VectorCall: case CallingConv::PreserveMost: case CallingConv::Swift: return true; @@ -4126,6 +4127,15 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( MachineFunction &MF = DAG.getMachineFunction(); const Function &CallerF = MF.getFunction(); CallingConv::ID CallerCC = CallerF.getCallingConv(); + + // If this function uses the C calling convention but has an SVE signature, + // then it preserves more registers and should assume the SVE_VectorCall CC. + // The check for matching callee-saved regs will determine whether it is + // eligible for TCO. + if (CallerCC == CallingConv::C && + AArch64RegisterInfo::hasSVEArgsOrReturn(&MF)) + CallerCC = CallingConv::AArch64_SVE_VectorCall; + bool CCMatch = CallerCC == CalleeCC; // When using the Windows calling convention on a non-windows OS, we want @@ -4313,6 +4323,20 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; bool IsSibCall = false; + // Check callee args/returns for SVE registers and set calling convention + // accordingly. + if (CallConv == CallingConv::C) { + bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){ + return Out.VT.isScalableVector(); + }); + bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){ + return In.VT.isScalableVector(); + }); + + if (CalleeInSVE || CalleeOutSVE) + CallConv = CallingConv::AArch64_SVE_VectorCall; + } + if (IsTailCall) { // Check if it's really possible to do a tail call. IsTailCall = isEligibleForTailCallOptimization( @@ -4666,20 +4690,6 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Ops.push_back(DAG.getRegister(RegToPass.first, RegToPass.second.getValueType())); - // Check callee args/returns for SVE registers and set calling convention - // accordingly. - if (CallConv == CallingConv::C) { - bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){ - return Out.VT.isScalableVector(); - }); - bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){ - return In.VT.isScalableVector(); - }); - - if (CalleeInSVE || CalleeOutSVE) - CallConv = CallingConv::AArch64_SVE_VectorCall; - } - // Add a register mask operand representing the call-preserved registers. const uint32_t *Mask; const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 83a488afc797..3e9c8c7b6df2 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -40,7 +40,30 @@ AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT) AArch64_MC::initLLVMToCVRegMapping(this); } -static bool hasSVEArgsOrReturn(const MachineFunction *MF) { +/// Return whether the register needs a CFI entry. Not all unwinders may know +/// about SVE registers, so we assume the lowest common denominator, i.e. the +/// callee-saves required by the base ABI. For the SVE registers z8-z15 only the +/// lower 64-bits (d8-d15) need to be saved. The lower 64-bits subreg is +/// returned in \p RegToUseForCFI. +bool AArch64RegisterInfo::regNeedsCFI(unsigned Reg, + unsigned &RegToUseForCFI) const { + if (AArch64::PPRRegClass.contains(Reg)) + return false; + + if (AArch64::ZPRRegClass.contains(Reg)) { + RegToUseForCFI = getSubReg(Reg, AArch64::dsub); + for (int I = 0; CSR_AArch64_AAPCS_SaveList[I]; ++I) { + if (CSR_AArch64_AAPCS_SaveList[I] == RegToUseForCFI) + return true; + } + return false; + } + + RegToUseForCFI = Reg; + return true; +} + +bool AArch64RegisterInfo::hasSVEArgsOrReturn(const MachineFunction *MF) { const Function &F = MF->getFunction(); return isa<ScalableVectorType>(F.getReturnType()) || any_of(F.args(), [](const Argument &Arg) { diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.h index 22a8ba76c611..7b20f181e76d 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -42,6 +42,8 @@ public: void UpdateCustomCallPreservedMask(MachineFunction &MF, const uint32_t **Mask) const; + static bool hasSVEArgsOrReturn(const MachineFunction *MF); + /// Code Generation virtual methods... const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; const MCPhysReg *getDarwinCalleeSavedRegs(const MachineFunction *MF) const; @@ -122,6 +124,7 @@ public: MachineFunction &MF) const override; unsigned getLocalAddressRegister(const MachineFunction &MF) const; + bool regNeedsCFI(unsigned Reg, unsigned &RegToUseForCFI) const; }; } // end namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index bd05c56009a1..54b351fda053 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -133,6 +133,9 @@ def NZCV : AArch64Reg<0, "nzcv">; // First fault status register def FFR : AArch64Reg<0, "ffr">, DwarfRegNum<[47]>; +// Purely virtual Vector Granule (VG) Dwarf register +def VG : AArch64Reg<0, "vg">, DwarfRegNum<[46]>; + // GPR register classes with the intersections of GPR32/GPR32sp and // GPR64/GPR64sp for use by the coalescer. def GPR32common : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 30)> { diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 3449a8bd16d2..4f29f2f18185 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1765,7 +1765,7 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio defm : unpred_store< store, nxv4f16, ST1H_S_IMM, PTRUE_S>; defm : unpred_store< store, nxv2f16, ST1H_D_IMM, PTRUE_D>; defm : unpred_store< store, nxv4f32, ST1W_IMM, PTRUE_S>; - defm : unpred_store< store, nxv4f32, ST1W_D_IMM, PTRUE_D>; + defm : unpred_store< store, nxv2f32, ST1W_D_IMM, PTRUE_D>; defm : unpred_store< store, nxv2f64, ST1D_IMM, PTRUE_D>; multiclass unpred_load<PatFrag Load, ValueType Ty, Instruction RegImmInst, diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackOffset.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackOffset.h index 6fa1c744f77e..24751a81797d 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackOffset.h +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64StackOffset.h @@ -123,6 +123,18 @@ public: } } + void getForDwarfOffset(int64_t &ByteSized, int64_t &VGSized) const { + assert(isValid() && "Invalid frame offset"); + + // VGSized offsets are divided by '2', because the VG register is the + // the number of 64bit granules as opposed to 128bit vector chunks, + // which is how the 'n' in e.g. MVT::nxv1i8 is modelled. + // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes. + // VG = n * 2 and the dwarf offset must be VG * 8 bytes. + ByteSized = Bytes; + VGSized = ScalableBytes / 2; + } + /// Returns whether the offset is known zero. explicit operator bool() const { return Bytes || ScalableBytes; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp index 74fe0cdd1ea7..0245dd1d611a 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp @@ -160,7 +160,7 @@ bool SVEIntrinsicOpts::optimizePTest(IntrinsicInst *I) { I->eraseFromParent(); if (Op1->use_empty()) Op1->eraseFromParent(); - if (Op2->use_empty()) + if (Op1 != Op2 && Op2->use_empty()) Op2->eraseFromParent(); return true; |