diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-08-24 17:20:50 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-08-24 17:20:50 +0000 |
commit | bdc6feb28f528ee3a365ca97577f7312ffa0dc65 (patch) | |
tree | 8fc5cf6a8835cc178d70cd0ee29af2513f5c9161 /llvm/lib | |
parent | 10c469f2ae76868106ec8bc8fbac13e0f26552f7 (diff) |
Notes
Diffstat (limited to 'llvm/lib')
18 files changed, 299 insertions, 81 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index d81a9be26d39b..b6a9a95683603 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -241,6 +241,7 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { OutStreamer->emitCFIGnuArgsSize(Inst.getOffset()); break; case MCCFIInstruction::OpEscape: + OutStreamer->AddComment(Inst.getComment()); OutStreamer->emitCFIEscape(Inst.getValues()); break; case MCCFIInstruction::OpRestore: diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp index 572d1203aaf21..c629f872df121 100644 --- a/llvm/lib/Support/X86TargetParser.cpp +++ b/llvm/lib/Support/X86TargetParser.cpp @@ -37,6 +37,10 @@ public: set(I); } + bool any() const { + return llvm::any_of(Bits, [](uint64_t V) { return V != 0; }); + } + constexpr FeatureBitset &set(unsigned I) { // GCC <6.2 crashes if this is written in a single statement. uint32_t NewBits = Bits[I / 32] | (uint32_t(1) << (I % 32)); @@ -89,6 +93,13 @@ public: Result.Bits[I] = ~Bits[I]; return Result; } + + constexpr bool operator!=(const FeatureBitset &RHS) const { + for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) + if (Bits[I] != RHS.Bits[I]) + return true; + return false; + } }; struct ProcInfo { @@ -552,11 +563,17 @@ void llvm::X86::getFeaturesForCPU(StringRef CPU, // For each feature that is (transitively) implied by this feature, set it. static void getImpliedEnabledFeatures(FeatureBitset &Bits, const FeatureBitset &Implies) { + // Fast path: Implies is often empty. + if (!Implies.any()) + return; + FeatureBitset Prev; Bits |= Implies; - for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i) { - if (Implies[i]) - getImpliedEnabledFeatures(Bits, FeatureInfos[i].ImpliedFeatures); - } + do { + Prev = Bits; + for (unsigned i = CPU_FEATURE_MAX; i;) + if (Bits[--i]) + Bits |= FeatureInfos[i].ImpliedFeatures; + } while (Prev != Bits); } /// Create bit vector of features that are implied disabled if the feature @@ -564,12 +581,14 @@ static void getImpliedEnabledFeatures(FeatureBitset &Bits, static void getImpliedDisabledFeatures(FeatureBitset &Bits, unsigned Value) { // Check all features looking for any dependent on this feature. If we find // one, mark it and recursively find any feature that depend on it. - for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i) { - if (FeatureInfos[i].ImpliedFeatures[Value]) { - Bits.set(i); - getImpliedDisabledFeatures(Bits, i); - } - } + FeatureBitset Prev; + Bits.set(Value); + do { + Prev = Bits; + for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i) + if ((FeatureInfos[i].ImpliedFeatures & Bits).any()) + Bits.set(i); + } while (Prev != Bits); } void llvm::X86::getImpliedFeatures( diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 4789a9f02937a..83653dcbb8cf7 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -148,6 +148,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -399,12 +400,102 @@ static bool ShouldSignReturnAddress(MachineFunction &MF) { return false; } +// Convenience function to create a DWARF expression for +// Expr + NumBytes + NumVGScaledBytes * AArch64::VG +static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, + int NumBytes, int NumVGScaledBytes, unsigned VG, + llvm::raw_string_ostream &Comment) { + uint8_t buffer[16]; + + if (NumBytes) { + Expr.push_back(dwarf::DW_OP_consts); + Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer)); + Expr.push_back((uint8_t)dwarf::DW_OP_plus); + Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes); + } + + if (NumVGScaledBytes) { + Expr.push_back((uint8_t)dwarf::DW_OP_consts); + Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer)); + + Expr.push_back((uint8_t)dwarf::DW_OP_bregx); + Expr.append(buffer, buffer + encodeULEB128(VG, buffer)); + Expr.push_back(0); + + Expr.push_back((uint8_t)dwarf::DW_OP_mul); + Expr.push_back((uint8_t)dwarf::DW_OP_plus); + + Comment << (NumVGScaledBytes < 0 ? " - " : " + ") + << std::abs(NumVGScaledBytes) << " * VG"; + } +} + +// Creates an MCCFIInstruction: +// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr } +MCCFIInstruction AArch64FrameLowering::createDefCFAExpressionFromSP( + const TargetRegisterInfo &TRI, const StackOffset &OffsetFromSP) const { + int64_t NumBytes, NumVGScaledBytes; + OffsetFromSP.getForDwarfOffset(NumBytes, NumVGScaledBytes); + + std::string CommentBuffer = "sp"; + llvm::raw_string_ostream Comment(CommentBuffer); + + // Build up the expression (SP + NumBytes + NumVGScaledBytes * AArch64::VG) + SmallString<64> Expr; + Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + /*SP*/ 31)); + Expr.push_back(0); + appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes, + TRI.getDwarfRegNum(AArch64::VG, true), Comment); + + // Wrap this into DW_CFA_def_cfa. + SmallString<64> DefCfaExpr; + DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); + uint8_t buffer[16]; + DefCfaExpr.append(buffer, + buffer + encodeULEB128(Expr.size(), buffer)); + DefCfaExpr.append(Expr.str()); + return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), + Comment.str()); +} + +MCCFIInstruction AArch64FrameLowering::createCfaOffset( + const TargetRegisterInfo &TRI, unsigned Reg, + const StackOffset &OffsetFromDefCFA) const { + int64_t NumBytes, NumVGScaledBytes; + OffsetFromDefCFA.getForDwarfOffset(NumBytes, NumVGScaledBytes); + + unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); + + // Non-scalable offsets can use DW_CFA_offset directly. + if (!NumVGScaledBytes) + return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes); + + std::string CommentBuffer; + llvm::raw_string_ostream Comment(CommentBuffer); + Comment << printReg(Reg, &TRI) << " @ cfa"; + + // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG) + SmallString<64> OffsetExpr; + appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes, + TRI.getDwarfRegNum(AArch64::VG, true), Comment); + + // Wrap this into DW_CFA_expression + SmallString<64> CfaExpr; + CfaExpr.push_back(dwarf::DW_CFA_expression); + uint8_t buffer[16]; + CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); + CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer)); + CfaExpr.append(OffsetExpr.str()); + + return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), Comment.str()); +} + void AArch64FrameLowering::emitCalleeSavedFrameMoves( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetSubtargetInfo &STI = MF.getSubtarget(); - const MCRegisterInfo *MRI = STI.getRegisterInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); const TargetInstrInfo *TII = STI.getInstrInfo(); DebugLoc DL = MBB.findDebugLoc(MBBI); @@ -415,11 +506,26 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves( for (const auto &Info : CSI) { unsigned Reg = Info.getReg(); - int64_t Offset = - MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea(); - unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + + // Not all unwinders may know about SVE registers, so assume the lowest + // common demoninator. + unsigned NewReg; + if (static_cast<const AArch64RegisterInfo *>(TRI)->regNeedsCFI(Reg, NewReg)) + Reg = NewReg; + else + continue; + + StackOffset Offset; + if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::SVEVector) { + AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); + Offset = StackOffset(MFI.getObjectOffset(Info.getFrameIdx()), MVT::nxv1i8) - + StackOffset(AFI->getCalleeSavedStackSize(MFI), MVT::i8); + } else { + Offset = {MFI.getObjectOffset(Info.getFrameIdx()) - + getOffsetOfLocalArea(), + MVT::i8}; + } + unsigned CFIIndex = MF.addFrameInst(createCfaOffset(*TRI, Reg, Offset)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); @@ -1383,9 +1489,18 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); } else { - // Encode the stack size of the leaf function. - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize())); + unsigned CFIIndex; + if (SVEStackSize) { + const TargetSubtargetInfo &STI = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + StackOffset TotalSize = + SVEStackSize + StackOffset((int64_t)MFI.getStackSize(), MVT::i8); + CFIIndex = MF.addFrameInst(createDefCFAExpressionFromSP(TRI, TotalSize)); + } else { + // Encode the stack size of the leaf function. + CFIIndex = MF.addFrameInst( + MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize())); + } BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); @@ -2006,6 +2121,7 @@ static void computeCalleeSaveRegisterPairs( // available unwind codes. This flag assures that the alignment fixup is done // only once, as intened. bool FixupDone = false; + for (unsigned i = 0; i < Count; ++i) { RegPairInfo RPI; RPI.Reg1 = CSI[i].getReg(); diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 444740cb50ab9..1ca8c3e9e2bf6 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -18,6 +18,8 @@ namespace llvm { +class MCCFIInstruction; + class AArch64FrameLowering : public TargetFrameLowering { public: explicit AArch64FrameLowering() @@ -119,6 +121,11 @@ private: int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF, int &MinCSFrameIndex, int &MaxCSFrameIndex) const; + MCCFIInstruction + createDefCFAExpressionFromSP(const TargetRegisterInfo &TRI, + const StackOffset &OffsetFromSP) const; + MCCFIInstruction createCfaOffset(const TargetRegisterInfo &MRI, unsigned DwarfReg, + const StackOffset &OffsetFromDefCFA) const; bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB, unsigned StackBumpBytes) const; }; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1500da2fdfc74..45bfa85bdc07c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4107,6 +4107,7 @@ static bool canGuaranteeTCO(CallingConv::ID CC) { static bool mayTailCallThisCC(CallingConv::ID CC) { switch (CC) { case CallingConv::C: + case CallingConv::AArch64_SVE_VectorCall: case CallingConv::PreserveMost: case CallingConv::Swift: return true; @@ -4126,6 +4127,15 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( MachineFunction &MF = DAG.getMachineFunction(); const Function &CallerF = MF.getFunction(); CallingConv::ID CallerCC = CallerF.getCallingConv(); + + // If this function uses the C calling convention but has an SVE signature, + // then it preserves more registers and should assume the SVE_VectorCall CC. + // The check for matching callee-saved regs will determine whether it is + // eligible for TCO. + if (CallerCC == CallingConv::C && + AArch64RegisterInfo::hasSVEArgsOrReturn(&MF)) + CallerCC = CallingConv::AArch64_SVE_VectorCall; + bool CCMatch = CallerCC == CalleeCC; // When using the Windows calling convention on a non-windows OS, we want @@ -4313,6 +4323,20 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; bool IsSibCall = false; + // Check callee args/returns for SVE registers and set calling convention + // accordingly. + if (CallConv == CallingConv::C) { + bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){ + return Out.VT.isScalableVector(); + }); + bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){ + return In.VT.isScalableVector(); + }); + + if (CalleeInSVE || CalleeOutSVE) + CallConv = CallingConv::AArch64_SVE_VectorCall; + } + if (IsTailCall) { // Check if it's really possible to do a tail call. IsTailCall = isEligibleForTailCallOptimization( @@ -4666,20 +4690,6 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Ops.push_back(DAG.getRegister(RegToPass.first, RegToPass.second.getValueType())); - // Check callee args/returns for SVE registers and set calling convention - // accordingly. - if (CallConv == CallingConv::C) { - bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){ - return Out.VT.isScalableVector(); - }); - bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){ - return In.VT.isScalableVector(); - }); - - if (CalleeInSVE || CalleeOutSVE) - CallConv = CallingConv::AArch64_SVE_VectorCall; - } - // Add a register mask operand representing the call-preserved registers. const uint32_t *Mask; const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 83a488afc7972..3e9c8c7b6df2c 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -40,7 +40,30 @@ AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT) AArch64_MC::initLLVMToCVRegMapping(this); } -static bool hasSVEArgsOrReturn(const MachineFunction *MF) { +/// Return whether the register needs a CFI entry. Not all unwinders may know +/// about SVE registers, so we assume the lowest common denominator, i.e. the +/// callee-saves required by the base ABI. For the SVE registers z8-z15 only the +/// lower 64-bits (d8-d15) need to be saved. The lower 64-bits subreg is +/// returned in \p RegToUseForCFI. +bool AArch64RegisterInfo::regNeedsCFI(unsigned Reg, + unsigned &RegToUseForCFI) const { + if (AArch64::PPRRegClass.contains(Reg)) + return false; + + if (AArch64::ZPRRegClass.contains(Reg)) { + RegToUseForCFI = getSubReg(Reg, AArch64::dsub); + for (int I = 0; CSR_AArch64_AAPCS_SaveList[I]; ++I) { + if (CSR_AArch64_AAPCS_SaveList[I] == RegToUseForCFI) + return true; + } + return false; + } + + RegToUseForCFI = Reg; + return true; +} + +bool AArch64RegisterInfo::hasSVEArgsOrReturn(const MachineFunction *MF) { const Function &F = MF->getFunction(); return isa<ScalableVectorType>(F.getReturnType()) || any_of(F.args(), [](const Argument &Arg) { diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h index 22a8ba76c6111..7b20f181e76df 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -42,6 +42,8 @@ public: void UpdateCustomCallPreservedMask(MachineFunction &MF, const uint32_t **Mask) const; + static bool hasSVEArgsOrReturn(const MachineFunction *MF); + /// Code Generation virtual methods... const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; const MCPhysReg *getDarwinCalleeSavedRegs(const MachineFunction *MF) const; @@ -122,6 +124,7 @@ public: MachineFunction &MF) const override; unsigned getLocalAddressRegister(const MachineFunction &MF) const; + bool regNeedsCFI(unsigned Reg, unsigned &RegToUseForCFI) const; }; } // end namespace llvm diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index bd05c56009a1d..54b351fda053b 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -133,6 +133,9 @@ def NZCV : AArch64Reg<0, "nzcv">; // First fault status register def FFR : AArch64Reg<0, "ffr">, DwarfRegNum<[47]>; +// Purely virtual Vector Granule (VG) Dwarf register +def VG : AArch64Reg<0, "vg">, DwarfRegNum<[46]>; + // GPR register classes with the intersections of GPR32/GPR32sp and // GPR64/GPR64sp for use by the coalescer. def GPR32common : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 30)> { diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 3449a8bd16d28..4f29f2f181854 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1765,7 +1765,7 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio defm : unpred_store< store, nxv4f16, ST1H_S_IMM, PTRUE_S>; defm : unpred_store< store, nxv2f16, ST1H_D_IMM, PTRUE_D>; defm : unpred_store< store, nxv4f32, ST1W_IMM, PTRUE_S>; - defm : unpred_store< store, nxv4f32, ST1W_D_IMM, PTRUE_D>; + defm : unpred_store< store, nxv2f32, ST1W_D_IMM, PTRUE_D>; defm : unpred_store< store, nxv2f64, ST1D_IMM, PTRUE_D>; multiclass unpred_load<PatFrag Load, ValueType Ty, Instruction RegImmInst, diff --git a/llvm/lib/Target/AArch64/AArch64StackOffset.h b/llvm/lib/Target/AArch64/AArch64StackOffset.h index 6fa1c744f77e2..24751a81797d3 100644 --- a/llvm/lib/Target/AArch64/AArch64StackOffset.h +++ b/llvm/lib/Target/AArch64/AArch64StackOffset.h @@ -123,6 +123,18 @@ public: } } + void getForDwarfOffset(int64_t &ByteSized, int64_t &VGSized) const { + assert(isValid() && "Invalid frame offset"); + + // VGSized offsets are divided by '2', because the VG register is the + // the number of 64bit granules as opposed to 128bit vector chunks, + // which is how the 'n' in e.g. MVT::nxv1i8 is modelled. + // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes. + // VG = n * 2 and the dwarf offset must be VG * 8 bytes. + ByteSized = Bytes; + VGSized = ScalableBytes / 2; + } + /// Returns whether the offset is known zero. explicit operator bool() const { return Bytes || ScalableBytes; } diff --git a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp index 74fe0cdd1ea7f..0245dd1d611a8 100644 --- a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp +++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp @@ -160,7 +160,7 @@ bool SVEIntrinsicOpts::optimizePTest(IntrinsicInst *I) { I->eraseFromParent(); if (Op1->use_empty()) Op1->eraseFromParent(); - if (Op2->use_empty()) + if (Op1 != Op2 && Op2->use_empty()) Op2->eraseFromParent(); return true; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 9a4c57fedac2a..e428e7155e5e9 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2653,22 +2653,35 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const { return LoadSpillOpcodesArray[getSpillTarget()]; } -void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI, +void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr *StartMI, MachineInstr *EndMI, unsigned RegNo) const { // Conservatively clear kill flag for the register if the instructions are in // different basic blocks and in SSA form, because the kill flag may no longer // be right. There is no need to bother with dead flags since defs with no // uses will be handled by DCE. - MachineRegisterInfo &MRI = StartMI.getParent()->getParent()->getRegInfo(); - if (MRI.isSSA() && (StartMI.getParent() != EndMI.getParent())) { + MachineRegisterInfo &MRI = StartMI->getParent()->getParent()->getRegInfo(); + if (MRI.isSSA() && (StartMI->getParent() != EndMI->getParent())) { MRI.clearKillFlags(RegNo); return; } // Instructions between [StartMI, EndMI] should be in same basic block. - assert((StartMI.getParent() == EndMI.getParent()) && + assert((StartMI->getParent() == EndMI->getParent()) && "Instructions are not in same basic block"); + // If before RA, StartMI may be def through COPY, we need to adjust it to the + // real def. See function getForwardingDefMI. + if (MRI.isSSA()) { + bool Reads, Writes; + std::tie(Reads, Writes) = StartMI->readsWritesVirtualRegister(RegNo); + if (!Reads && !Writes) { + assert(Register::isVirtualRegister(RegNo) && + "Must be a virtual register"); + // Get real def and ignore copies. + StartMI = MRI.getVRegDef(RegNo); + } + } + bool IsKillSet = false; auto clearOperandKillInfo = [=] (MachineInstr &MI, unsigned Index) { @@ -2681,21 +2694,21 @@ void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI, // Set killed flag for EndMI. // No need to do anything if EndMI defines RegNo. int UseIndex = - EndMI.findRegisterUseOperandIdx(RegNo, false, &getRegisterInfo()); + EndMI->findRegisterUseOperandIdx(RegNo, false, &getRegisterInfo()); if (UseIndex != -1) { - EndMI.getOperand(UseIndex).setIsKill(true); + EndMI->getOperand(UseIndex).setIsKill(true); IsKillSet = true; // Clear killed flag for other EndMI operands related to RegNo. In some // upexpected cases, killed may be set multiple times for same register // operand in same MI. - for (int i = 0, e = EndMI.getNumOperands(); i != e; ++i) + for (int i = 0, e = EndMI->getNumOperands(); i != e; ++i) if (i != UseIndex) - clearOperandKillInfo(EndMI, i); + clearOperandKillInfo(*EndMI, i); } // Walking the inst in reverse order (EndMI -> StartMI]. - MachineBasicBlock::reverse_iterator It = EndMI; - MachineBasicBlock::reverse_iterator E = EndMI.getParent()->rend(); + MachineBasicBlock::reverse_iterator It = *EndMI; + MachineBasicBlock::reverse_iterator E = EndMI->getParent()->rend(); // EndMI has been handled above, skip it here. It++; MachineOperand *MO = nullptr; @@ -2721,13 +2734,13 @@ void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI, } else if ((MO = It->findRegisterDefOperand(RegNo, false, true, &getRegisterInfo()))) { // No use found, set dead for its def. - assert(&*It == &StartMI && "No new def between StartMI and EndMI."); + assert(&*It == StartMI && "No new def between StartMI and EndMI."); MO->setIsDead(true); break; } } - if ((&*It) == &StartMI) + if ((&*It) == StartMI) break; } // Ensure RegMo liveness is killed after EndMI. @@ -3858,7 +3871,7 @@ bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI, // ForwardingOperandReg = LI imm1 // y = op2 imm2, ForwardingOperandReg(killed) if (IsForwardingOperandKilled) - fixupIsDeadOrKill(DefMI, MI, ForwardingOperandReg); + fixupIsDeadOrKill(&DefMI, &MI, ForwardingOperandReg); LLVM_DEBUG(dbgs() << "With:\n"); LLVM_DEBUG(MI.dump()); @@ -3950,9 +3963,9 @@ bool PPCInstrInfo::transformToNewImmFormFedByAdd( // Update kill flag if (RegMO->isKill() || IsKilledFor(RegMO->getReg())) - fixupIsDeadOrKill(DefMI, MI, RegMO->getReg()); + fixupIsDeadOrKill(&DefMI, &MI, RegMO->getReg()); if (ForwardKilledOperandReg != ~0U) - fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg); + fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg); } LLVM_DEBUG(dbgs() << "With:\n"); @@ -4063,12 +4076,12 @@ bool PPCInstrInfo::transformToImmFormFedByAdd( // x = ADD reg(killed), imm // y = XOP 0, x if (IsFwdFeederRegKilled || RegMO->isKill()) - fixupIsDeadOrKill(DefMI, MI, RegMO->getReg()); + fixupIsDeadOrKill(&DefMI, &MI, RegMO->getReg()); // Pattern 3: // ForwardKilledOperandReg = ADD reg, imm // y = XOP 0, ForwardKilledOperandReg(killed) if (ForwardKilledOperandReg != ~0U) - fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg); + fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg); LLVM_DEBUG(dbgs() << "With:\n"); LLVM_DEBUG(MI.dump()); @@ -4224,7 +4237,7 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI, // ForwardKilledOperandReg = LI imm // y = XOP reg, ForwardKilledOperandReg(killed) if (ForwardKilledOperandReg != ~0U) - fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg); + fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg); return true; } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index 43973c627fcf1..556c95fef3bdb 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -570,14 +570,16 @@ public: /// up. Before calling this function, /// 1. Ensure that \p RegNo liveness is killed after instruction \p EndMI. /// 2. Ensure that there is no new definition between (\p StartMI, \p EndMI) - /// and possible definition for \p RegNo is \p StartMI or \p EndMI. + /// and possible definition for \p RegNo is \p StartMI or \p EndMI. For + /// pre-RA cases, definition may be \p StartMI through COPY, \p StartMI + /// will be adjust to true definition. /// 3. We can do accurate fixup for the case when all instructions between /// [\p StartMI, \p EndMI] are in same basic block. /// 4. For the case when \p StartMI and \p EndMI are not in same basic block, /// we conservatively clear kill flag for all uses of \p RegNo for pre-RA /// and for post-RA, we give an assertion as without reaching definition /// analysis post-RA, \p StartMI and \p EndMI are hard to keep right. - void fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI, + void fixupIsDeadOrKill(MachineInstr *StartMI, MachineInstr *EndMI, unsigned RegNo) const; void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const; void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index d39ec505127c4..7b6ea002c7b71 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -279,7 +279,7 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB, // Handle a single unconditional branch. if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) { - TBB = I->getOperand(0).getMBB(); + TBB = getBranchDestBlock(*I); return false; } @@ -293,7 +293,7 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB, if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() && I->getDesc().isUnconditionalBranch()) { parseCondBranch(*std::prev(I), TBB, Cond); - FBB = I->getOperand(0).getMBB(); + FBB = getBranchDestBlock(*I); return false; } @@ -384,10 +384,6 @@ unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, MachineFunction *MF = MBB.getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - const auto &TM = static_cast<const RISCVTargetMachine &>(MF->getTarget()); - - if (TM.isPositionIndependent()) - report_fatal_error("Unable to insert indirect branch"); if (!isInt<32>(BrOffset)) report_fatal_error( @@ -399,15 +395,13 @@ unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass); auto II = MBB.end(); - MachineInstr &LuiMI = *BuildMI(MBB, II, DL, get(RISCV::LUI), ScratchReg) - .addMBB(&DestBB, RISCVII::MO_HI); - BuildMI(MBB, II, DL, get(RISCV::PseudoBRIND)) - .addReg(ScratchReg, RegState::Kill) - .addMBB(&DestBB, RISCVII::MO_LO); + MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump)) + .addReg(ScratchReg, RegState::Define | RegState::Dead) + .addMBB(&DestBB, RISCVII::MO_CALL); RS->enterBasicBlockEnd(MBB); unsigned Scav = RS->scavengeRegisterBackwards(RISCV::GPRRegClass, - LuiMI.getIterator(), false, 0); + MI.getIterator(), false, 0); MRI.replaceRegWith(ScratchReg, Scav); MRI.clearVirtRegs(); RS->setRegUsed(Scav); @@ -431,6 +425,7 @@ RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const { bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp, int64_t BrOffset) const { + unsigned XLen = STI.getXLen(); // Ideally we could determine the supported branch offset from the // RISCVII::FormMask, but this can't be used for Pseudo instructions like // PseudoBR. @@ -447,6 +442,8 @@ bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp, case RISCV::JAL: case RISCV::PseudoBR: return isIntN(21, BrOffset); + case RISCV::PseudoJump: + return isIntN(32, SignExtend64(BrOffset + 0x800, XLen)); } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index b9483062ddeb1..8547f791092b0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1012,8 +1012,8 @@ def : Pat<(riscv_tail (iPTR tglobaladdr:$dst)), def : Pat<(riscv_tail (iPTR texternalsym:$dst)), (PseudoTAIL texternalsym:$dst)>; -let isCall = 0, isBarrier = 0, isCodeGenOnly = 0, hasSideEffects = 0, - mayStore = 0, mayLoad = 0 in +let isCall = 0, isBarrier = 1, isBranch = 1, isTerminator = 1, + isCodeGenOnly = 0, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in def PseudoJump : Pseudo<(outs GPR:$rd), (ins pseudo_jump_symbol:$target), []> { let AsmString = "jump\t$target, $rd"; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 86aa85e965f6d..1671917157f43 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3208,13 +3208,23 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, return DAG.getFrameIndex(FI, PtrVT); } + EVT ArgVT = Ins[i].ArgVT; + + // If this is a vector that has been split into multiple parts, and the + // scalar size of the parts don't match the vector element size, then we can't + // elide the copy. The parts will have padding between them instead of being + // packed like a vector. + bool ScalarizedAndExtendedVector = + ArgVT.isVector() && !VA.getLocVT().isVector() && + VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits(); + // This is an argument in memory. We might be able to perform copy elision. // If the argument is passed directly in memory without any extension, then we // can perform copy elision. Large vector types, for example, may be passed // indirectly by pointer. if (Flags.isCopyElisionCandidate() && - VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem) { - EVT ArgVT = Ins[i].ArgVT; + VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem && + !ScalarizedAndExtendedVector) { SDValue PartAddr; if (Ins[i].PartOffset == 0) { // If this is a one-part value or the first part of a multi-part value, diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index d9fb820f7cb53..9524d9a362049 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -468,19 +468,16 @@ static bool CanDoGlobalSRA(GlobalVariable *GV) { /// Copy over the debug info for a variable to its SRA replacements. static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV, uint64_t FragmentOffsetInBits, - uint64_t FragmentSizeInBits) { + uint64_t FragmentSizeInBits, + uint64_t VarSize) { SmallVector<DIGlobalVariableExpression *, 1> GVs; GV->getDebugInfo(GVs); for (auto *GVE : GVs) { DIVariable *Var = GVE->getVariable(); - Optional<uint64_t> VarSize = Var->getSizeInBits(); - DIExpression *Expr = GVE->getExpression(); // If the FragmentSize is smaller than the variable, // emit a fragment expression. - // If the variable size is unknown a fragment must be - // emitted to be safe. - if (!VarSize || FragmentSizeInBits < *VarSize) { + if (FragmentSizeInBits < VarSize) { if (auto E = DIExpression::createFragmentExpression( Expr, FragmentOffsetInBits, FragmentSizeInBits)) Expr = *E; @@ -505,6 +502,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { assert(GV->hasLocalLinkage()); Constant *Init = GV->getInitializer(); Type *Ty = Init->getType(); + uint64_t VarSize = DL.getTypeSizeInBits(Ty); std::map<unsigned, GlobalVariable *> NewGlobals; @@ -560,7 +558,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { // Copy over the debug info for the variable. uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType()); uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(ElementIdx); - transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size); + transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, VarSize); } else { uint64_t EltSize = DL.getTypeAllocSize(ElTy); Align EltAlign = DL.getABITypeAlign(ElTy); @@ -573,7 +571,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { if (NewAlign > EltAlign) NGV->setAlignment(NewAlign); transferSRADebugInfo(GV, NGV, FragmentSizeInBits * ElementIdx, - FragmentSizeInBits); + FragmentSizeInBits, VarSize); } } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index c6233a68847dd..2f1325e80d2f7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -216,7 +216,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) { // Replace X*(2^C) with X << C, where C is either a scalar or a vector. - if (Constant *NewCst = getLogBase2(NewOp->getType(), C1)) { + // Note that we need to sanitize undef multipliers to 1, + // to avoid introducing poison. + Constant *SafeC1 = Constant::replaceUndefsWith( + C1, ConstantInt::get(C1->getType()->getScalarType(), 1)); + if (Constant *NewCst = getLogBase2(NewOp->getType(), SafeC1)) { BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst); if (I.hasNoUnsignedWrap()) |