summaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2020-08-24 17:20:50 +0000
committerDimitry Andric <dim@FreeBSD.org>2020-08-24 17:20:50 +0000
commitbdc6feb28f528ee3a365ca97577f7312ffa0dc65 (patch)
tree8fc5cf6a8835cc178d70cd0ee29af2513f5c9161 /llvm/lib
parent10c469f2ae76868106ec8bc8fbac13e0f26552f7 (diff)
Notes
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp1
-rw-r--r--llvm/lib/Support/X86TargetParser.cpp39
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp134
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.h7
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp38
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp25
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.h3
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.td3
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td2
-rw-r--r--llvm/lib/Target/AArch64/AArch64StackOffset.h12
-rw-r--r--llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp49
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.h6
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp21
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td4
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp14
-rw-r--r--llvm/lib/Transforms/IPO/GlobalOpt.cpp14
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp6
18 files changed, 299 insertions, 81 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index d81a9be26d39b..b6a9a95683603 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -241,6 +241,7 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
OutStreamer->emitCFIGnuArgsSize(Inst.getOffset());
break;
case MCCFIInstruction::OpEscape:
+ OutStreamer->AddComment(Inst.getComment());
OutStreamer->emitCFIEscape(Inst.getValues());
break;
case MCCFIInstruction::OpRestore:
diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp
index 572d1203aaf21..c629f872df121 100644
--- a/llvm/lib/Support/X86TargetParser.cpp
+++ b/llvm/lib/Support/X86TargetParser.cpp
@@ -37,6 +37,10 @@ public:
set(I);
}
+ bool any() const {
+ return llvm::any_of(Bits, [](uint64_t V) { return V != 0; });
+ }
+
constexpr FeatureBitset &set(unsigned I) {
// GCC <6.2 crashes if this is written in a single statement.
uint32_t NewBits = Bits[I / 32] | (uint32_t(1) << (I % 32));
@@ -89,6 +93,13 @@ public:
Result.Bits[I] = ~Bits[I];
return Result;
}
+
+ constexpr bool operator!=(const FeatureBitset &RHS) const {
+ for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I)
+ if (Bits[I] != RHS.Bits[I])
+ return true;
+ return false;
+ }
};
struct ProcInfo {
@@ -552,11 +563,17 @@ void llvm::X86::getFeaturesForCPU(StringRef CPU,
// For each feature that is (transitively) implied by this feature, set it.
static void getImpliedEnabledFeatures(FeatureBitset &Bits,
const FeatureBitset &Implies) {
+ // Fast path: Implies is often empty.
+ if (!Implies.any())
+ return;
+ FeatureBitset Prev;
Bits |= Implies;
- for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i) {
- if (Implies[i])
- getImpliedEnabledFeatures(Bits, FeatureInfos[i].ImpliedFeatures);
- }
+ do {
+ Prev = Bits;
+ for (unsigned i = CPU_FEATURE_MAX; i;)
+ if (Bits[--i])
+ Bits |= FeatureInfos[i].ImpliedFeatures;
+ } while (Prev != Bits);
}
/// Create bit vector of features that are implied disabled if the feature
@@ -564,12 +581,14 @@ static void getImpliedEnabledFeatures(FeatureBitset &Bits,
static void getImpliedDisabledFeatures(FeatureBitset &Bits, unsigned Value) {
// Check all features looking for any dependent on this feature. If we find
// one, mark it and recursively find any feature that depend on it.
- for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i) {
- if (FeatureInfos[i].ImpliedFeatures[Value]) {
- Bits.set(i);
- getImpliedDisabledFeatures(Bits, i);
- }
- }
+ FeatureBitset Prev;
+ Bits.set(Value);
+ do {
+ Prev = Bits;
+ for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i)
+ if ((FeatureInfos[i].ImpliedFeatures & Bits).any())
+ Bits.set(i);
+ } while (Prev != Bits);
}
void llvm::X86::getImpliedFeatures(
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 4789a9f02937a..83653dcbb8cf7 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -148,6 +148,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -399,12 +400,102 @@ static bool ShouldSignReturnAddress(MachineFunction &MF) {
return false;
}
+// Convenience function to create a DWARF expression for
+// Expr + NumBytes + NumVGScaledBytes * AArch64::VG
+static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr,
+ int NumBytes, int NumVGScaledBytes, unsigned VG,
+ llvm::raw_string_ostream &Comment) {
+ uint8_t buffer[16];
+
+ if (NumBytes) {
+ Expr.push_back(dwarf::DW_OP_consts);
+ Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
+ Expr.push_back((uint8_t)dwarf::DW_OP_plus);
+ Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
+ }
+
+ if (NumVGScaledBytes) {
+ Expr.push_back((uint8_t)dwarf::DW_OP_consts);
+ Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));
+
+ Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
+ Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
+ Expr.push_back(0);
+
+ Expr.push_back((uint8_t)dwarf::DW_OP_mul);
+ Expr.push_back((uint8_t)dwarf::DW_OP_plus);
+
+ Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
+ << std::abs(NumVGScaledBytes) << " * VG";
+ }
+}
+
+// Creates an MCCFIInstruction:
+// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
+MCCFIInstruction AArch64FrameLowering::createDefCFAExpressionFromSP(
+ const TargetRegisterInfo &TRI, const StackOffset &OffsetFromSP) const {
+ int64_t NumBytes, NumVGScaledBytes;
+ OffsetFromSP.getForDwarfOffset(NumBytes, NumVGScaledBytes);
+
+ std::string CommentBuffer = "sp";
+ llvm::raw_string_ostream Comment(CommentBuffer);
+
+ // Build up the expression (SP + NumBytes + NumVGScaledBytes * AArch64::VG)
+ SmallString<64> Expr;
+ Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + /*SP*/ 31));
+ Expr.push_back(0);
+ appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
+ TRI.getDwarfRegNum(AArch64::VG, true), Comment);
+
+ // Wrap this into DW_CFA_def_cfa.
+ SmallString<64> DefCfaExpr;
+ DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
+ uint8_t buffer[16];
+ DefCfaExpr.append(buffer,
+ buffer + encodeULEB128(Expr.size(), buffer));
+ DefCfaExpr.append(Expr.str());
+ return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(),
+ Comment.str());
+}
+
+MCCFIInstruction AArch64FrameLowering::createCfaOffset(
+ const TargetRegisterInfo &TRI, unsigned Reg,
+ const StackOffset &OffsetFromDefCFA) const {
+ int64_t NumBytes, NumVGScaledBytes;
+ OffsetFromDefCFA.getForDwarfOffset(NumBytes, NumVGScaledBytes);
+
+ unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
+
+ // Non-scalable offsets can use DW_CFA_offset directly.
+ if (!NumVGScaledBytes)
+ return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
+
+ std::string CommentBuffer;
+ llvm::raw_string_ostream Comment(CommentBuffer);
+ Comment << printReg(Reg, &TRI) << " @ cfa";
+
+ // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
+ SmallString<64> OffsetExpr;
+ appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
+ TRI.getDwarfRegNum(AArch64::VG, true), Comment);
+
+ // Wrap this into DW_CFA_expression
+ SmallString<64> CfaExpr;
+ CfaExpr.push_back(dwarf::DW_CFA_expression);
+ uint8_t buffer[16];
+ CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
+ CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
+ CfaExpr.append(OffsetExpr.str());
+
+ return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), Comment.str());
+}
+
void AArch64FrameLowering::emitCalleeSavedFrameMoves(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetSubtargetInfo &STI = MF.getSubtarget();
- const MCRegisterInfo *MRI = STI.getRegisterInfo();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
const TargetInstrInfo *TII = STI.getInstrInfo();
DebugLoc DL = MBB.findDebugLoc(MBBI);
@@ -415,11 +506,26 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
for (const auto &Info : CSI) {
unsigned Reg = Info.getReg();
- int64_t Offset =
- MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
- unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
+
+ // Not all unwinders may know about SVE registers, so assume the lowest
+ // common demoninator.
+ unsigned NewReg;
+ if (static_cast<const AArch64RegisterInfo *>(TRI)->regNeedsCFI(Reg, NewReg))
+ Reg = NewReg;
+ else
+ continue;
+
+ StackOffset Offset;
+ if (MFI.getStackID(Info.getFrameIdx()) == TargetStackID::SVEVector) {
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ Offset = StackOffset(MFI.getObjectOffset(Info.getFrameIdx()), MVT::nxv1i8) -
+ StackOffset(AFI->getCalleeSavedStackSize(MFI), MVT::i8);
+ } else {
+ Offset = {MFI.getObjectOffset(Info.getFrameIdx()) -
+ getOffsetOfLocalArea(),
+ MVT::i8};
+ }
+ unsigned CFIIndex = MF.addFrameInst(createCfaOffset(*TRI, Reg, Offset));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -1383,9 +1489,18 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
} else {
- // Encode the stack size of the leaf function.
- unsigned CFIIndex = MF.addFrameInst(
- MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize()));
+ unsigned CFIIndex;
+ if (SVEStackSize) {
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+ StackOffset TotalSize =
+ SVEStackSize + StackOffset((int64_t)MFI.getStackSize(), MVT::i8);
+ CFIIndex = MF.addFrameInst(createDefCFAExpressionFromSP(TRI, TotalSize));
+ } else {
+ // Encode the stack size of the leaf function.
+ CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, MFI.getStackSize()));
+ }
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -2006,6 +2121,7 @@ static void computeCalleeSaveRegisterPairs(
// available unwind codes. This flag assures that the alignment fixup is done
// only once, as intened.
bool FixupDone = false;
+
for (unsigned i = 0; i < Count; ++i) {
RegPairInfo RPI;
RPI.Reg1 = CSI[i].getReg();
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 444740cb50ab9..1ca8c3e9e2bf6 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -18,6 +18,8 @@
namespace llvm {
+class MCCFIInstruction;
+
class AArch64FrameLowering : public TargetFrameLowering {
public:
explicit AArch64FrameLowering()
@@ -119,6 +121,11 @@ private:
int64_t assignSVEStackObjectOffsets(MachineFrameInfo &MF,
int &MinCSFrameIndex,
int &MaxCSFrameIndex) const;
+ MCCFIInstruction
+ createDefCFAExpressionFromSP(const TargetRegisterInfo &TRI,
+ const StackOffset &OffsetFromSP) const;
+ MCCFIInstruction createCfaOffset(const TargetRegisterInfo &MRI, unsigned DwarfReg,
+ const StackOffset &OffsetFromDefCFA) const;
bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
unsigned StackBumpBytes) const;
};
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1500da2fdfc74..45bfa85bdc07c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4107,6 +4107,7 @@ static bool canGuaranteeTCO(CallingConv::ID CC) {
static bool mayTailCallThisCC(CallingConv::ID CC) {
switch (CC) {
case CallingConv::C:
+ case CallingConv::AArch64_SVE_VectorCall:
case CallingConv::PreserveMost:
case CallingConv::Swift:
return true;
@@ -4126,6 +4127,15 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
MachineFunction &MF = DAG.getMachineFunction();
const Function &CallerF = MF.getFunction();
CallingConv::ID CallerCC = CallerF.getCallingConv();
+
+ // If this function uses the C calling convention but has an SVE signature,
+ // then it preserves more registers and should assume the SVE_VectorCall CC.
+ // The check for matching callee-saved regs will determine whether it is
+ // eligible for TCO.
+ if (CallerCC == CallingConv::C &&
+ AArch64RegisterInfo::hasSVEArgsOrReturn(&MF))
+ CallerCC = CallingConv::AArch64_SVE_VectorCall;
+
bool CCMatch = CallerCC == CalleeCC;
// When using the Windows calling convention on a non-windows OS, we want
@@ -4313,6 +4323,20 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
bool IsSibCall = false;
+ // Check callee args/returns for SVE registers and set calling convention
+ // accordingly.
+ if (CallConv == CallingConv::C) {
+ bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
+ return Out.VT.isScalableVector();
+ });
+ bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
+ return In.VT.isScalableVector();
+ });
+
+ if (CalleeInSVE || CalleeOutSVE)
+ CallConv = CallingConv::AArch64_SVE_VectorCall;
+ }
+
if (IsTailCall) {
// Check if it's really possible to do a tail call.
IsTailCall = isEligibleForTailCallOptimization(
@@ -4666,20 +4690,6 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Ops.push_back(DAG.getRegister(RegToPass.first,
RegToPass.second.getValueType()));
- // Check callee args/returns for SVE registers and set calling convention
- // accordingly.
- if (CallConv == CallingConv::C) {
- bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
- return Out.VT.isScalableVector();
- });
- bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
- return In.VT.isScalableVector();
- });
-
- if (CalleeInSVE || CalleeOutSVE)
- CallConv = CallingConv::AArch64_SVE_VectorCall;
- }
-
// Add a register mask operand representing the call-preserved registers.
const uint32_t *Mask;
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 83a488afc7972..3e9c8c7b6df2c 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -40,7 +40,30 @@ AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
AArch64_MC::initLLVMToCVRegMapping(this);
}
-static bool hasSVEArgsOrReturn(const MachineFunction *MF) {
+/// Return whether the register needs a CFI entry. Not all unwinders may know
+/// about SVE registers, so we assume the lowest common denominator, i.e. the
+/// callee-saves required by the base ABI. For the SVE registers z8-z15 only the
+/// lower 64-bits (d8-d15) need to be saved. The lower 64-bits subreg is
+/// returned in \p RegToUseForCFI.
+bool AArch64RegisterInfo::regNeedsCFI(unsigned Reg,
+ unsigned &RegToUseForCFI) const {
+ if (AArch64::PPRRegClass.contains(Reg))
+ return false;
+
+ if (AArch64::ZPRRegClass.contains(Reg)) {
+ RegToUseForCFI = getSubReg(Reg, AArch64::dsub);
+ for (int I = 0; CSR_AArch64_AAPCS_SaveList[I]; ++I) {
+ if (CSR_AArch64_AAPCS_SaveList[I] == RegToUseForCFI)
+ return true;
+ }
+ return false;
+ }
+
+ RegToUseForCFI = Reg;
+ return true;
+}
+
+bool AArch64RegisterInfo::hasSVEArgsOrReturn(const MachineFunction *MF) {
const Function &F = MF->getFunction();
return isa<ScalableVectorType>(F.getReturnType()) ||
any_of(F.args(), [](const Argument &Arg) {
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index 22a8ba76c6111..7b20f181e76df 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -42,6 +42,8 @@ public:
void UpdateCustomCallPreservedMask(MachineFunction &MF,
const uint32_t **Mask) const;
+ static bool hasSVEArgsOrReturn(const MachineFunction *MF);
+
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
const MCPhysReg *getDarwinCalleeSavedRegs(const MachineFunction *MF) const;
@@ -122,6 +124,7 @@ public:
MachineFunction &MF) const override;
unsigned getLocalAddressRegister(const MachineFunction &MF) const;
+ bool regNeedsCFI(unsigned Reg, unsigned &RegToUseForCFI) const;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index bd05c56009a1d..54b351fda053b 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -133,6 +133,9 @@ def NZCV : AArch64Reg<0, "nzcv">;
// First fault status register
def FFR : AArch64Reg<0, "ffr">, DwarfRegNum<[47]>;
+// Purely virtual Vector Granule (VG) Dwarf register
+def VG : AArch64Reg<0, "vg">, DwarfRegNum<[46]>;
+
// GPR register classes with the intersections of GPR32/GPR32sp and
// GPR64/GPR64sp for use by the coalescer.
def GPR32common : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 30)> {
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 3449a8bd16d28..4f29f2f181854 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1765,7 +1765,7 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
defm : unpred_store< store, nxv4f16, ST1H_S_IMM, PTRUE_S>;
defm : unpred_store< store, nxv2f16, ST1H_D_IMM, PTRUE_D>;
defm : unpred_store< store, nxv4f32, ST1W_IMM, PTRUE_S>;
- defm : unpred_store< store, nxv4f32, ST1W_D_IMM, PTRUE_D>;
+ defm : unpred_store< store, nxv2f32, ST1W_D_IMM, PTRUE_D>;
defm : unpred_store< store, nxv2f64, ST1D_IMM, PTRUE_D>;
multiclass unpred_load<PatFrag Load, ValueType Ty, Instruction RegImmInst,
diff --git a/llvm/lib/Target/AArch64/AArch64StackOffset.h b/llvm/lib/Target/AArch64/AArch64StackOffset.h
index 6fa1c744f77e2..24751a81797d3 100644
--- a/llvm/lib/Target/AArch64/AArch64StackOffset.h
+++ b/llvm/lib/Target/AArch64/AArch64StackOffset.h
@@ -123,6 +123,18 @@ public:
}
}
+ void getForDwarfOffset(int64_t &ByteSized, int64_t &VGSized) const {
+ assert(isValid() && "Invalid frame offset");
+
+ // VGSized offsets are divided by '2', because the VG register is the
+ // the number of 64bit granules as opposed to 128bit vector chunks,
+ // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
+ // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
+ // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
+ ByteSized = Bytes;
+ VGSized = ScalableBytes / 2;
+ }
+
/// Returns whether the offset is known zero.
explicit operator bool() const { return Bytes || ScalableBytes; }
diff --git a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
index 74fe0cdd1ea7f..0245dd1d611a8 100644
--- a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
+++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
@@ -160,7 +160,7 @@ bool SVEIntrinsicOpts::optimizePTest(IntrinsicInst *I) {
I->eraseFromParent();
if (Op1->use_empty())
Op1->eraseFromParent();
- if (Op2->use_empty())
+ if (Op1 != Op2 && Op2->use_empty())
Op2->eraseFromParent();
return true;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 9a4c57fedac2a..e428e7155e5e9 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2653,22 +2653,35 @@ const unsigned *PPCInstrInfo::getLoadOpcodesForSpillArray() const {
return LoadSpillOpcodesArray[getSpillTarget()];
}
-void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
+void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr *StartMI, MachineInstr *EndMI,
unsigned RegNo) const {
// Conservatively clear kill flag for the register if the instructions are in
// different basic blocks and in SSA form, because the kill flag may no longer
// be right. There is no need to bother with dead flags since defs with no
// uses will be handled by DCE.
- MachineRegisterInfo &MRI = StartMI.getParent()->getParent()->getRegInfo();
- if (MRI.isSSA() && (StartMI.getParent() != EndMI.getParent())) {
+ MachineRegisterInfo &MRI = StartMI->getParent()->getParent()->getRegInfo();
+ if (MRI.isSSA() && (StartMI->getParent() != EndMI->getParent())) {
MRI.clearKillFlags(RegNo);
return;
}
// Instructions between [StartMI, EndMI] should be in same basic block.
- assert((StartMI.getParent() == EndMI.getParent()) &&
+ assert((StartMI->getParent() == EndMI->getParent()) &&
"Instructions are not in same basic block");
+ // If before RA, StartMI may be def through COPY, we need to adjust it to the
+ // real def. See function getForwardingDefMI.
+ if (MRI.isSSA()) {
+ bool Reads, Writes;
+ std::tie(Reads, Writes) = StartMI->readsWritesVirtualRegister(RegNo);
+ if (!Reads && !Writes) {
+ assert(Register::isVirtualRegister(RegNo) &&
+ "Must be a virtual register");
+ // Get real def and ignore copies.
+ StartMI = MRI.getVRegDef(RegNo);
+ }
+ }
+
bool IsKillSet = false;
auto clearOperandKillInfo = [=] (MachineInstr &MI, unsigned Index) {
@@ -2681,21 +2694,21 @@ void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
// Set killed flag for EndMI.
// No need to do anything if EndMI defines RegNo.
int UseIndex =
- EndMI.findRegisterUseOperandIdx(RegNo, false, &getRegisterInfo());
+ EndMI->findRegisterUseOperandIdx(RegNo, false, &getRegisterInfo());
if (UseIndex != -1) {
- EndMI.getOperand(UseIndex).setIsKill(true);
+ EndMI->getOperand(UseIndex).setIsKill(true);
IsKillSet = true;
// Clear killed flag for other EndMI operands related to RegNo. In some
// upexpected cases, killed may be set multiple times for same register
// operand in same MI.
- for (int i = 0, e = EndMI.getNumOperands(); i != e; ++i)
+ for (int i = 0, e = EndMI->getNumOperands(); i != e; ++i)
if (i != UseIndex)
- clearOperandKillInfo(EndMI, i);
+ clearOperandKillInfo(*EndMI, i);
}
// Walking the inst in reverse order (EndMI -> StartMI].
- MachineBasicBlock::reverse_iterator It = EndMI;
- MachineBasicBlock::reverse_iterator E = EndMI.getParent()->rend();
+ MachineBasicBlock::reverse_iterator It = *EndMI;
+ MachineBasicBlock::reverse_iterator E = EndMI->getParent()->rend();
// EndMI has been handled above, skip it here.
It++;
MachineOperand *MO = nullptr;
@@ -2721,13 +2734,13 @@ void PPCInstrInfo::fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
} else if ((MO = It->findRegisterDefOperand(RegNo, false, true,
&getRegisterInfo()))) {
// No use found, set dead for its def.
- assert(&*It == &StartMI && "No new def between StartMI and EndMI.");
+ assert(&*It == StartMI && "No new def between StartMI and EndMI.");
MO->setIsDead(true);
break;
}
}
- if ((&*It) == &StartMI)
+ if ((&*It) == StartMI)
break;
}
// Ensure RegMo liveness is killed after EndMI.
@@ -3858,7 +3871,7 @@ bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,
// ForwardingOperandReg = LI imm1
// y = op2 imm2, ForwardingOperandReg(killed)
if (IsForwardingOperandKilled)
- fixupIsDeadOrKill(DefMI, MI, ForwardingOperandReg);
+ fixupIsDeadOrKill(&DefMI, &MI, ForwardingOperandReg);
LLVM_DEBUG(dbgs() << "With:\n");
LLVM_DEBUG(MI.dump());
@@ -3950,9 +3963,9 @@ bool PPCInstrInfo::transformToNewImmFormFedByAdd(
// Update kill flag
if (RegMO->isKill() || IsKilledFor(RegMO->getReg()))
- fixupIsDeadOrKill(DefMI, MI, RegMO->getReg());
+ fixupIsDeadOrKill(&DefMI, &MI, RegMO->getReg());
if (ForwardKilledOperandReg != ~0U)
- fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg);
+ fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg);
}
LLVM_DEBUG(dbgs() << "With:\n");
@@ -4063,12 +4076,12 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(
// x = ADD reg(killed), imm
// y = XOP 0, x
if (IsFwdFeederRegKilled || RegMO->isKill())
- fixupIsDeadOrKill(DefMI, MI, RegMO->getReg());
+ fixupIsDeadOrKill(&DefMI, &MI, RegMO->getReg());
// Pattern 3:
// ForwardKilledOperandReg = ADD reg, imm
// y = XOP 0, ForwardKilledOperandReg(killed)
if (ForwardKilledOperandReg != ~0U)
- fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg);
+ fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg);
LLVM_DEBUG(dbgs() << "With:\n");
LLVM_DEBUG(MI.dump());
@@ -4224,7 +4237,7 @@ bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,
// ForwardKilledOperandReg = LI imm
// y = XOP reg, ForwardKilledOperandReg(killed)
if (ForwardKilledOperandReg != ~0U)
- fixupIsDeadOrKill(DefMI, MI, ForwardKilledOperandReg);
+ fixupIsDeadOrKill(&DefMI, &MI, ForwardKilledOperandReg);
return true;
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 43973c627fcf1..556c95fef3bdb 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -570,14 +570,16 @@ public:
/// up. Before calling this function,
/// 1. Ensure that \p RegNo liveness is killed after instruction \p EndMI.
/// 2. Ensure that there is no new definition between (\p StartMI, \p EndMI)
- /// and possible definition for \p RegNo is \p StartMI or \p EndMI.
+ /// and possible definition for \p RegNo is \p StartMI or \p EndMI. For
+ /// pre-RA cases, definition may be \p StartMI through COPY, \p StartMI
+ /// will be adjust to true definition.
/// 3. We can do accurate fixup for the case when all instructions between
/// [\p StartMI, \p EndMI] are in same basic block.
/// 4. For the case when \p StartMI and \p EndMI are not in same basic block,
/// we conservatively clear kill flag for all uses of \p RegNo for pre-RA
/// and for post-RA, we give an assertion as without reaching definition
/// analysis post-RA, \p StartMI and \p EndMI are hard to keep right.
- void fixupIsDeadOrKill(MachineInstr &StartMI, MachineInstr &EndMI,
+ void fixupIsDeadOrKill(MachineInstr *StartMI, MachineInstr *EndMI,
unsigned RegNo) const;
void replaceInstrWithLI(MachineInstr &MI, const LoadImmediateInfo &LII) const;
void replaceInstrOperandWithImm(MachineInstr &MI, unsigned OpNo,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index d39ec505127c4..7b6ea002c7b71 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -279,7 +279,7 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
// Handle a single unconditional branch.
if (NumTerminators == 1 && I->getDesc().isUnconditionalBranch()) {
- TBB = I->getOperand(0).getMBB();
+ TBB = getBranchDestBlock(*I);
return false;
}
@@ -293,7 +293,7 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
if (NumTerminators == 2 && std::prev(I)->getDesc().isConditionalBranch() &&
I->getDesc().isUnconditionalBranch()) {
parseCondBranch(*std::prev(I), TBB, Cond);
- FBB = I->getOperand(0).getMBB();
+ FBB = getBranchDestBlock(*I);
return false;
}
@@ -384,10 +384,6 @@ unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- const auto &TM = static_cast<const RISCVTargetMachine &>(MF->getTarget());
-
- if (TM.isPositionIndependent())
- report_fatal_error("Unable to insert indirect branch");
if (!isInt<32>(BrOffset))
report_fatal_error(
@@ -399,15 +395,13 @@ unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
auto II = MBB.end();
- MachineInstr &LuiMI = *BuildMI(MBB, II, DL, get(RISCV::LUI), ScratchReg)
- .addMBB(&DestBB, RISCVII::MO_HI);
- BuildMI(MBB, II, DL, get(RISCV::PseudoBRIND))
- .addReg(ScratchReg, RegState::Kill)
- .addMBB(&DestBB, RISCVII::MO_LO);
+ MachineInstr &MI = *BuildMI(MBB, II, DL, get(RISCV::PseudoJump))
+ .addReg(ScratchReg, RegState::Define | RegState::Dead)
+ .addMBB(&DestBB, RISCVII::MO_CALL);
RS->enterBasicBlockEnd(MBB);
unsigned Scav = RS->scavengeRegisterBackwards(RISCV::GPRRegClass,
- LuiMI.getIterator(), false, 0);
+ MI.getIterator(), false, 0);
MRI.replaceRegWith(ScratchReg, Scav);
MRI.clearVirtRegs();
RS->setRegUsed(Scav);
@@ -431,6 +425,7 @@ RISCVInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
int64_t BrOffset) const {
+ unsigned XLen = STI.getXLen();
// Ideally we could determine the supported branch offset from the
// RISCVII::FormMask, but this can't be used for Pseudo instructions like
// PseudoBR.
@@ -447,6 +442,8 @@ bool RISCVInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
case RISCV::JAL:
case RISCV::PseudoBR:
return isIntN(21, BrOffset);
+ case RISCV::PseudoJump:
+ return isIntN(32, SignExtend64(BrOffset + 0x800, XLen));
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index b9483062ddeb1..8547f791092b0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1012,8 +1012,8 @@ def : Pat<(riscv_tail (iPTR tglobaladdr:$dst)),
def : Pat<(riscv_tail (iPTR texternalsym:$dst)),
(PseudoTAIL texternalsym:$dst)>;
-let isCall = 0, isBarrier = 0, isCodeGenOnly = 0, hasSideEffects = 0,
- mayStore = 0, mayLoad = 0 in
+let isCall = 0, isBarrier = 1, isBranch = 1, isTerminator = 1,
+ isCodeGenOnly = 0, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in
def PseudoJump : Pseudo<(outs GPR:$rd), (ins pseudo_jump_symbol:$target), []> {
let AsmString = "jump\t$target, $rd";
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 86aa85e965f6d..1671917157f43 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3208,13 +3208,23 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
return DAG.getFrameIndex(FI, PtrVT);
}
+ EVT ArgVT = Ins[i].ArgVT;
+
+ // If this is a vector that has been split into multiple parts, and the
+ // scalar size of the parts don't match the vector element size, then we can't
+ // elide the copy. The parts will have padding between them instead of being
+ // packed like a vector.
+ bool ScalarizedAndExtendedVector =
+ ArgVT.isVector() && !VA.getLocVT().isVector() &&
+ VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();
+
// This is an argument in memory. We might be able to perform copy elision.
// If the argument is passed directly in memory without any extension, then we
// can perform copy elision. Large vector types, for example, may be passed
// indirectly by pointer.
if (Flags.isCopyElisionCandidate() &&
- VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem) {
- EVT ArgVT = Ins[i].ArgVT;
+ VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
+ !ScalarizedAndExtendedVector) {
SDValue PartAddr;
if (Ins[i].PartOffset == 0) {
// If this is a one-part value or the first part of a multi-part value,
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index d9fb820f7cb53..9524d9a362049 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -468,19 +468,16 @@ static bool CanDoGlobalSRA(GlobalVariable *GV) {
/// Copy over the debug info for a variable to its SRA replacements.
static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV,
uint64_t FragmentOffsetInBits,
- uint64_t FragmentSizeInBits) {
+ uint64_t FragmentSizeInBits,
+ uint64_t VarSize) {
SmallVector<DIGlobalVariableExpression *, 1> GVs;
GV->getDebugInfo(GVs);
for (auto *GVE : GVs) {
DIVariable *Var = GVE->getVariable();
- Optional<uint64_t> VarSize = Var->getSizeInBits();
-
DIExpression *Expr = GVE->getExpression();
// If the FragmentSize is smaller than the variable,
// emit a fragment expression.
- // If the variable size is unknown a fragment must be
- // emitted to be safe.
- if (!VarSize || FragmentSizeInBits < *VarSize) {
+ if (FragmentSizeInBits < VarSize) {
if (auto E = DIExpression::createFragmentExpression(
Expr, FragmentOffsetInBits, FragmentSizeInBits))
Expr = *E;
@@ -505,6 +502,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
assert(GV->hasLocalLinkage());
Constant *Init = GV->getInitializer();
Type *Ty = Init->getType();
+ uint64_t VarSize = DL.getTypeSizeInBits(Ty);
std::map<unsigned, GlobalVariable *> NewGlobals;
@@ -560,7 +558,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
// Copy over the debug info for the variable.
uint64_t Size = DL.getTypeAllocSizeInBits(NGV->getValueType());
uint64_t FragmentOffsetInBits = Layout.getElementOffsetInBits(ElementIdx);
- transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size);
+ transferSRADebugInfo(GV, NGV, FragmentOffsetInBits, Size, VarSize);
} else {
uint64_t EltSize = DL.getTypeAllocSize(ElTy);
Align EltAlign = DL.getABITypeAlign(ElTy);
@@ -573,7 +571,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
if (NewAlign > EltAlign)
NGV->setAlignment(NewAlign);
transferSRADebugInfo(GV, NGV, FragmentSizeInBits * ElementIdx,
- FragmentSizeInBits);
+ FragmentSizeInBits, VarSize);
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index c6233a68847dd..2f1325e80d2f7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -216,7 +216,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) {
// Replace X*(2^C) with X << C, where C is either a scalar or a vector.
- if (Constant *NewCst = getLogBase2(NewOp->getType(), C1)) {
+ // Note that we need to sanitize undef multipliers to 1,
+ // to avoid introducing poison.
+ Constant *SafeC1 = Constant::replaceUndefsWith(
+ C1, ConstantInt::get(C1->getType()->getScalarType(), 1));
+ if (Constant *NewCst = getLogBase2(NewOp->getType(), SafeC1)) {
BinaryOperator *Shl = BinaryOperator::CreateShl(NewOp, NewCst);
if (I.hasNoUnsignedWrap())