aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2024-01-11 18:29:01 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-04-19 21:24:43 +0000
commit52e4ee64c25fe0837e9cf783a63e8c214b3180cf (patch)
treeef0a98fe51363441060377330e2e714855102bf6 /contrib/llvm-project/llvm/lib/Target
parented89c59104c13195cbbad881f64c6a71f687c1e4 (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp64
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td18
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp98
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp52
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp43
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp8
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp5
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td55
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td14
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp9
-rw-r--r--contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td3
-rw-r--r--contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td13
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp77
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp40
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp46
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h6
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp37
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp17
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td140
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td33
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td38
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td98
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td45
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp2
-rw-r--r--contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp7
-rw-r--r--contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp4
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h16
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp1
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp53
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td110
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td380
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td12
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td18
60 files changed, 1171 insertions, 647 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 90e1ce9ddf66..7d2ff146a340 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -256,6 +256,11 @@ void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) {
if (BTE->getZExtValue())
Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
+ if (const auto *GCS = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("guarded-control-stack")))
+ if (GCS->getZExtValue())
+ Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_GCS;
+
if (const auto *Sign = mdconst::extract_or_null<ConstantInt>(
M.getModuleFlag("sign-return-address")))
if (Sign->getZExtValue())
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index edc8cc7d4d1e..ea5679b4d5e3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -6834,10 +6834,10 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
return getPackedVectorTypeFromPredicateType(
Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
case Intrinsic::aarch64_sve_ld1udq:
- case Intrinsic::aarch64_sve_st1udq:
+ case Intrinsic::aarch64_sve_st1dq:
return EVT(MVT::nxv1i64);
case Intrinsic::aarch64_sve_ld1uwq:
- case Intrinsic::aarch64_sve_st1uwq:
+ case Intrinsic::aarch64_sve_st1wq:
return EVT(MVT::nxv1i32);
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 47e665176e8b..e2d07a096496 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4513,8 +4513,7 @@ static SDValue skipExtensionForVectorMULL(SDValue N, SelectionDAG &DAG) {
SDLoc dl(N);
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i != NumElts; ++i) {
- ConstantSDNode *C = cast<ConstantSDNode>(N.getOperand(i));
- const APInt &CInt = C->getAPIntValue();
+ const APInt &CInt = N.getConstantOperandAPInt(i);
// Element types smaller than 32 bits are not legal, so use i32 elements.
// The values are implicitly truncated so sext vs. zext doesn't matter.
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 1cfbf4737a6f..42b7a6418032 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4214,6 +4214,9 @@ static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
switch (FirstOpc) {
default:
return false;
+ case AArch64::LDRQui:
+ case AArch64::LDURQi:
+ return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
case AArch64::LDRWui:
case AArch64::LDURWi:
return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index b435b3ce03e7..e90b8a8ca7ac 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1326,10 +1326,14 @@ static int alignTo(int Num, int PowOf2) {
static bool mayAlias(MachineInstr &MIa,
SmallVectorImpl<MachineInstr *> &MemInsns,
AliasAnalysis *AA) {
- for (MachineInstr *MIb : MemInsns)
- if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
+ for (MachineInstr *MIb : MemInsns) {
+ if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) {
+ LLVM_DEBUG(dbgs() << "Aliasing with: "; MIb->dump());
return true;
+ }
+ }
+ LLVM_DEBUG(dbgs() << "No aliases found\n");
return false;
}
@@ -1757,9 +1761,11 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// Remember any instructions that read/write memory between FirstMI and MI.
SmallVector<MachineInstr *, 4> MemInsns;
+ LLVM_DEBUG(dbgs() << "Find match for: "; FirstMI.dump());
for (unsigned Count = 0; MBBI != E && Count < Limit;
MBBI = next_nodbg(MBBI, E)) {
MachineInstr &MI = *MBBI;
+ LLVM_DEBUG(dbgs() << "Analysing 2nd insn: "; MI.dump());
UsedInBetween.accumulate(MI);
@@ -1859,6 +1865,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
UsedRegUnits, TRI);
MemInsns.push_back(&MI);
+ LLVM_DEBUG(dbgs() << "Offset doesn't fit in immediate, "
+ << "keep looking.\n");
continue;
}
// If the alignment requirements of the paired (scaled) instruction
@@ -1868,6 +1876,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
UsedRegUnits, TRI);
MemInsns.push_back(&MI);
+ LLVM_DEBUG(dbgs()
+ << "Offset doesn't fit due to alignment requirements, "
+ << "keep looking.\n");
continue;
}
}
@@ -1884,14 +1895,22 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq(
Reg, getLdStRegOp(MI).getReg());
- // If the Rt of the second instruction was not modified or used between
- // the two instructions and none of the instructions between the second
- // and first alias with the second, we can combine the second into the
- // first.
- if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) &&
- !(MI.mayLoad() && !SameLoadReg &&
- !UsedRegUnits.available(getLdStRegOp(MI).getReg())) &&
- !mayAlias(MI, MemInsns, AA)) {
+ // If the Rt of the second instruction (destination register of the
+ // load) was not modified or used between the two instructions and none
+ // of the instructions between the second and first alias with the
+ // second, we can combine the second into the first.
+ bool RtNotModified =
+ ModifiedRegUnits.available(getLdStRegOp(MI).getReg());
+ bool RtNotUsed = !(MI.mayLoad() && !SameLoadReg &&
+ !UsedRegUnits.available(getLdStRegOp(MI).getReg()));
+
+ LLVM_DEBUG(dbgs() << "Checking, can combine 2nd into 1st insn:\n"
+ << "Reg '" << getLdStRegOp(MI) << "' not modified: "
+ << (RtNotModified ? "true" : "false") << "\n"
+ << "Reg '" << getLdStRegOp(MI) << "' not used: "
+ << (RtNotUsed ? "true" : "false") << "\n");
+
+ if (RtNotModified && RtNotUsed && !mayAlias(MI, MemInsns, AA)) {
// For pairs loading into the same reg, try to find a renaming
// opportunity to allow the renaming of Reg between FirstMI and MI
// and combine MI into FirstMI; otherwise bail and keep looking.
@@ -1904,6 +1923,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
UsedRegUnits, TRI);
MemInsns.push_back(&MI);
+ LLVM_DEBUG(dbgs() << "Can't find reg for renaming, "
+ << "keep looking.\n");
continue;
}
Flags.setRenameReg(*RenameReg);
@@ -1919,10 +1940,15 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// between the two instructions and none of the instructions between the
// first and the second alias with the first, we can combine the first
// into the second.
- if (!(MayLoad &&
- !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) &&
- !mayAlias(FirstMI, MemInsns, AA)) {
+ RtNotModified = !(
+ MayLoad && !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg()));
+ LLVM_DEBUG(dbgs() << "Checking, can combine 1st into 2nd insn:\n"
+ << "Reg '" << getLdStRegOp(FirstMI)
+ << "' not modified: "
+ << (RtNotModified ? "true" : "false") << "\n");
+
+ if (RtNotModified && !mayAlias(FirstMI, MemInsns, AA)) {
if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
Flags.setMergeForward(true);
Flags.clearRenameReg();
@@ -1938,8 +1964,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
MBBIWithRenameReg = MBBI;
}
}
- // Unable to combine these instructions due to interference in between.
- // Keep looking.
+ LLVM_DEBUG(dbgs() << "Unable to combine these instructions due to "
+ << "interference in between, keep looking.\n");
}
}
@@ -1948,16 +1974,20 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// If the instruction wasn't a matching load or store. Stop searching if we
// encounter a call instruction that might modify memory.
- if (MI.isCall())
+ if (MI.isCall()) {
+ LLVM_DEBUG(dbgs() << "Found a call, stop looking.\n");
return E;
+ }
// Update modified / uses register units.
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
// Otherwise, if the base register is modified, we have no match, so
// return early.
- if (!ModifiedRegUnits.available(BaseReg))
+ if (!ModifiedRegUnits.available(BaseReg)) {
+ LLVM_DEBUG(dbgs() << "Base reg is modified, stop looking.\n");
return E;
+ }
// Update list of instructions that read/write memory.
if (MI.mayLoadOrStore())
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp
index 6fcd9c290e9c..6c6cd120b035 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp
@@ -53,7 +53,7 @@ using namespace PatternMatch;
#define DEBUG_TYPE "aarch64-loop-idiom-transform"
static cl::opt<bool>
- DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(true),
+ DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
static cl::opt<bool> DisableByteCmp(
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index ee10a7d1c706..4782ad076c60 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1397,17 +1397,17 @@ let Predicates = [HasSVEorSME] in {
(RegImmInst Z_q:$Zt, PPR3bAny:$Pg, GPR64sp:$base, (i64 0))>;
}
- // ld1quw/st1quw
+ // ld1quw/st1qw
defm : sve_ld1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>;
defm : sve_ld1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>;
- defm : sve_st1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_st1uwq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>;
- defm : sve_st1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_st1uwq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>;
+ defm : sve_st1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_st1wq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>;
+ defm : sve_st1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_st1wq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>;
- // ld1qud/st1qud
+ // ld1qud/st1qd
defm : sve_ld1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>;
defm : sve_ld1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>;
- defm : sve_st1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_st1udq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>;
- defm : sve_st1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_st1udq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>;
+ defm : sve_st1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_st1dq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>;
+ defm : sve_st1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_st1dq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>;
} // End HasSVEorSME
@@ -4006,7 +4006,9 @@ defm WHILEHS_CXX : sve2p1_int_while_rr_pn<"whilehs", 0b100>;
defm WHILEHI_CXX : sve2p1_int_while_rr_pn<"whilehi", 0b101>;
defm WHILELO_CXX : sve2p1_int_while_rr_pn<"whilelo", 0b110>;
defm WHILELS_CXX : sve2p1_int_while_rr_pn<"whilels", 0b111>;
+} // End HasSVE2p1_or_HasSME2
+let Predicates = [HasSVEorSME] in {
// Aliases for existing SVE instructions for which predicate-as-counter are
// accepted as an operand to the instruction
@@ -4025,7 +4027,7 @@ def : InstAlias<"mov $Pd, $Pn",
def : InstAlias<"pfalse\t$Pd", (PFALSE PNRasPPR8:$Pd), 0>;
-} // End HasSVE2p1_or_HasSME2
+}
//===----------------------------------------------------------------------===//
// Non-widening BFloat16 to BFloat16 instructions
@@ -4095,7 +4097,7 @@ defm FMAXQV : sve2p1_fp_reduction_q<0b110, "fmaxqv", int_aarch64_sve_fmaxqv>;
defm FMINQV : sve2p1_fp_reduction_q<0b111, "fminqv", int_aarch64_sve_fminqv>;
defm DUPQ_ZZI : sve2p1_dupq<"dupq">;
-defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq_lane>;
+defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq>;
defm PMOV_PZI : sve2p1_vector_to_pred<"pmov", int_aarch64_sve_pmov_to_pred_lane, int_aarch64_sve_pmov_to_pred_lane_zero>;
defm PMOV_ZIP : sve2p1_pred_to_vector<"pmov", int_aarch64_sve_pmov_to_vector_lane_merging, int_aarch64_sve_pmov_to_vector_lane_zeroing>;
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b5b8b6829178..13b5e578391d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1406,9 +1406,23 @@ static std::optional<Instruction *> instCombineSVEAllActive(IntrinsicInst &II,
return &II;
}
+// Simplify operations where predicate has all inactive lanes or try to replace
+// with _u form when all lanes are active
+static std::optional<Instruction *>
+instCombineSVEAllOrNoActive(InstCombiner &IC, IntrinsicInst &II,
+ Intrinsic::ID IID) {
+ if (match(II.getOperand(0), m_ZeroInt())) {
+ // llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are
+ // inactive for sv[func]_m
+ return IC.replaceInstUsesWith(II, II.getOperand(1));
+ }
+ return instCombineSVEAllActive(II, IID);
+}
+
static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC,
IntrinsicInst &II) {
- if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_add_u))
+ if (auto II_U =
+ instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_add_u))
return II_U;
if (auto MLA = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
Intrinsic::aarch64_sve_mla>(
@@ -1423,7 +1437,8 @@ static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC,
static std::optional<Instruction *>
instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) {
- if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fadd_u))
+ if (auto II_U =
+ instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fadd_u))
return II_U;
if (auto FMLA =
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
@@ -1465,7 +1480,8 @@ instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) {
static std::optional<Instruction *>
instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) {
- if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fsub_u))
+ if (auto II_U =
+ instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fsub_u))
return II_U;
if (auto FMLS =
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
@@ -1507,7 +1523,8 @@ instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) {
static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC,
IntrinsicInst &II) {
- if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sub_u))
+ if (auto II_U =
+ instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sub_u))
return II_U;
if (auto MLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
Intrinsic::aarch64_sve_mls>(
@@ -1523,11 +1540,6 @@ static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
auto *OpMultiplicand = II.getOperand(1);
auto *OpMultiplier = II.getOperand(2);
- // Canonicalise a non _u intrinsic only.
- if (II.getIntrinsicID() != IID)
- if (auto II_U = instCombineSVEAllActive(II, IID))
- return II_U;
-
// Return true if a given instruction is a unit splat value, false otherwise.
auto IsUnitSplat = [](auto *I) {
auto *SplatValue = getSplatValue(I);
@@ -1891,34 +1903,38 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
case Intrinsic::aarch64_sve_ptest_last:
return instCombineSVEPTest(IC, II);
case Intrinsic::aarch64_sve_fabd:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fabd_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fabd_u);
case Intrinsic::aarch64_sve_fadd:
return instCombineSVEVectorFAdd(IC, II);
case Intrinsic::aarch64_sve_fadd_u:
return instCombineSVEVectorFAddU(IC, II);
case Intrinsic::aarch64_sve_fdiv:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fdiv_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fdiv_u);
case Intrinsic::aarch64_sve_fmax:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmax_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmax_u);
case Intrinsic::aarch64_sve_fmaxnm:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmaxnm_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmaxnm_u);
case Intrinsic::aarch64_sve_fmin:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmin_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmin_u);
case Intrinsic::aarch64_sve_fminnm:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fminnm_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fminnm_u);
case Intrinsic::aarch64_sve_fmla:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmla_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmla_u);
case Intrinsic::aarch64_sve_fmls:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmls_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmls_u);
case Intrinsic::aarch64_sve_fmul:
+ if (auto II_U =
+ instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmul_u))
+ return II_U;
+ return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u);
case Intrinsic::aarch64_sve_fmul_u:
return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u);
case Intrinsic::aarch64_sve_fmulx:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmulx_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmulx_u);
case Intrinsic::aarch64_sve_fnmla:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmla_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmla_u);
case Intrinsic::aarch64_sve_fnmls:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmls_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmls_u);
case Intrinsic::aarch64_sve_fsub:
return instCombineSVEVectorFSub(IC, II);
case Intrinsic::aarch64_sve_fsub_u:
@@ -1930,20 +1946,24 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
Intrinsic::aarch64_sve_mla_u>(
IC, II, true);
case Intrinsic::aarch64_sve_mla:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mla_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mla_u);
case Intrinsic::aarch64_sve_mls:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mls_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mls_u);
case Intrinsic::aarch64_sve_mul:
+ if (auto II_U =
+ instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mul_u))
+ return II_U;
+ return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u);
case Intrinsic::aarch64_sve_mul_u:
return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u);
case Intrinsic::aarch64_sve_sabd:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sabd_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sabd_u);
case Intrinsic::aarch64_sve_smax:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smax_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smax_u);
case Intrinsic::aarch64_sve_smin:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smin_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smin_u);
case Intrinsic::aarch64_sve_smulh:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smulh_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smulh_u);
case Intrinsic::aarch64_sve_sub:
return instCombineSVEVectorSub(IC, II);
case Intrinsic::aarch64_sve_sub_u:
@@ -1951,31 +1971,31 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
Intrinsic::aarch64_sve_mls_u>(
IC, II, true);
case Intrinsic::aarch64_sve_uabd:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uabd_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uabd_u);
case Intrinsic::aarch64_sve_umax:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umax_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umax_u);
case Intrinsic::aarch64_sve_umin:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umin_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umin_u);
case Intrinsic::aarch64_sve_umulh:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umulh_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umulh_u);
case Intrinsic::aarch64_sve_asr:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_asr_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_asr_u);
case Intrinsic::aarch64_sve_lsl:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsl_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsl_u);
case Intrinsic::aarch64_sve_lsr:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsr_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsr_u);
case Intrinsic::aarch64_sve_and:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_and_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_and_u);
case Intrinsic::aarch64_sve_bic:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_bic_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_bic_u);
case Intrinsic::aarch64_sve_eor:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_eor_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_eor_u);
case Intrinsic::aarch64_sve_orr:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_orr_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_orr_u);
case Intrinsic::aarch64_sve_sqsub:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sqsub_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sqsub_u);
case Intrinsic::aarch64_sve_uqsub:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uqsub_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uqsub_u);
case Intrinsic::aarch64_sve_tbl:
return instCombineSVETBL(IC, II);
case Intrinsic::aarch64_sve_uunpkhi:
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index b657a0954d78..302116447efc 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1166,7 +1166,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_FMAD).lower();
// Access to floating-point environment.
- getActionDefinitionsBuilder({G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
+ getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
+ G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
.libcall();
getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 496ab18e9b19..6e074b6a63c4 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -120,7 +120,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
assert((!Target.getSymA() ||
Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None ||
- Target.getSymA()->getKind() == MCSymbolRefExpr::VK_PLT) &&
+ Target.getSymA()->getKind() == MCSymbolRefExpr::VK_PLT ||
+ Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOTPCREL) &&
"Should only be expression-level modifiers here");
assert((!Target.getSymB() ||
@@ -206,7 +207,10 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
case FK_Data_2:
return R_CLS(ABS16);
case FK_Data_4:
- return R_CLS(ABS32);
+ return (!IsILP32 &&
+ Target.getAccessVariant() == MCSymbolRefExpr::VK_GOTPCREL)
+ ? ELF::R_AARCH64_GOTPCREL32
+ : R_CLS(ABS32);
case FK_Data_8:
if (IsILP32) {
Ctx.reportError(Fixup.getLoc(),
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 70f3c2c99f0f..44d9a8ac7cb6 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -1268,7 +1268,7 @@ multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> {
}
class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
- : I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd),
+ : I<(outs zpr_ty:$Zd), (ins zpr_ty:$_Zd, zpr_ty:$Zn, zpr_ty:$Zm),
asm, "\t$Zd, $Zn, $Zm", "", []>,
Sched<[]> {
bits<5> Zm;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 0c77fe725958..b9411e205212 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -111,7 +111,7 @@ def smulu64 : GICombineRule<
[{ return matchCombine_s_mul_u64(*${smul}, ${matchinfo}); }]),
(apply [{ applyCombine_s_mul_u64(*${smul}, ${matchinfo}); }])>;
-def sign_exension_in_reg_matchdata : GIDefMatchData<"MachineInstr *">;
+def sign_exension_in_reg_matchdata : GIDefMatchData<"std::pair<MachineInstr *, unsigned>">;
def sign_extension_in_reg : GICombineRule<
(defs root:$sign_inreg, sign_exension_in_reg_matchdata:$matchinfo),
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 719ae2e8750c..41462d7a133e 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1579,13 +1579,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
bool AMDGPUDAGToDAGISel::SelectBUFSOffset(SDValue ByteOffsetNode,
SDValue &SOffset) const {
- if (Subtarget->hasRestrictedSOffset()) {
- if (auto SOffsetConst = dyn_cast<ConstantSDNode>(ByteOffsetNode)) {
- if (SOffsetConst->isZero()) {
- SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
- return true;
- }
- }
+ if (Subtarget->hasRestrictedSOffset() && isNullConstant(ByteOffsetNode)) {
+ SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
+ return true;
}
SOffset = ByteOffsetNode;
@@ -2483,7 +2479,7 @@ void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
SDValue PtrBase = Ptr.getOperand(0);
SDValue PtrOffset = Ptr.getOperand(1);
- const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
+ const APInt &OffsetVal = PtrOffset->getAsAPIntVal();
if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
N = glueCopyToM0(N, PtrBase);
Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index d2a02143e4e7..5762f1906a16 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -1026,6 +1026,51 @@ public:
return N;
}
+ /// Strip "amdgpu-no-lds-kernel-id" from any functions where we may have
+ /// introduced its use. If AMDGPUAttributor ran prior to the pass, we inferred
+ /// the lack of llvm.amdgcn.lds.kernel.id calls.
+ void removeNoLdsKernelIdFromReachable(CallGraph &CG, Function *KernelRoot) {
+ KernelRoot->removeFnAttr("amdgpu-no-lds-kernel-id");
+
+ SmallVector<Function *> Tmp({CG[KernelRoot]->getFunction()});
+ if (!Tmp.back())
+ return;
+
+ SmallPtrSet<Function *, 8> Visited;
+ bool SeenUnknownCall = false;
+
+ do {
+ Function *F = Tmp.pop_back_val();
+
+ for (auto &N : *CG[F]) {
+ if (!N.second)
+ continue;
+
+ Function *Callee = N.second->getFunction();
+ if (!Callee) {
+ if (!SeenUnknownCall) {
+ SeenUnknownCall = true;
+
+ // If we see any indirect calls, assume nothing about potential
+ // targets.
+ // TODO: This could be refined to possible LDS global users.
+ for (auto &N : *CG.getExternalCallingNode()) {
+ Function *PotentialCallee = N.second->getFunction();
+ if (!isKernelLDS(PotentialCallee))
+ PotentialCallee->removeFnAttr("amdgpu-no-lds-kernel-id");
+ }
+
+ continue;
+ }
+ }
+
+ Callee->removeFnAttr("amdgpu-no-lds-kernel-id");
+ if (Visited.insert(Callee).second)
+ Tmp.push_back(Callee);
+ }
+ } while (!Tmp.empty());
+ }
+
DenseMap<Function *, GlobalVariable *> lowerDynamicLDSVariables(
Module &M, LDSUsesInfoTy &LDSUsesInfo,
DenseSet<Function *> const &KernelsThatIndirectlyAllocateDynamicLDS,
@@ -1175,6 +1220,13 @@ public:
M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement);
replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered,
LookupTable);
+
+ // Strip amdgpu-no-lds-kernel-id from all functions reachable from the
+ // kernel. We may have inferred this wasn't used prior to the pass.
+ //
+ // TODO: We could filter out subgraphs that do not access LDS globals.
+ for (Function *F : KernelsThatAllocateTableLDS)
+ removeNoLdsKernelIdFromReachable(CG, F);
}
DenseMap<Function *, GlobalVariable *> KernelToCreatedDynamicLDS =
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index 21bfab52c6c4..bb1d6cb72e80 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -99,10 +99,10 @@ public:
// Combine unsigned buffer load and signed extension instructions to generate
// signed buffer laod instructions.
- bool matchCombineSignExtendInReg(MachineInstr &MI,
- MachineInstr *&MatchInfo) const;
- void applyCombineSignExtendInReg(MachineInstr &MI,
- MachineInstr *&MatchInfo) const;
+ bool matchCombineSignExtendInReg(
+ MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;
+ void applyCombineSignExtendInReg(
+ MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;
// Find the s_mul_u64 instructions where the higher bits are either
// zero-extended or sign-extended.
@@ -395,34 +395,36 @@ bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize(
// Identify buffer_load_{u8, u16}.
bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg(
- MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const {
- Register Op0Reg = MI.getOperand(1).getReg();
- SubwordBufferLoad = MRI.getVRegDef(Op0Reg);
-
- if (!MRI.hasOneNonDBGUse(Op0Reg))
+ MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {
+ Register LoadReg = MI.getOperand(1).getReg();
+ if (!MRI.hasOneNonDBGUse(LoadReg))
return false;
// Check if the first operand of the sign extension is a subword buffer load
// instruction.
- return SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE ||
- SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
+ MachineInstr *LoadMI = MRI.getVRegDef(LoadReg);
+ int64_t Width = MI.getOperand(2).getImm();
+ switch (LoadMI->getOpcode()) {
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
+ MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE};
+ return Width == 8;
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
+ MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT};
+ return Width == 16;
+ }
+ return false;
}
// Combine buffer_load_{u8, u16} and the sign extension instruction to generate
// buffer_load_{i8, i16}.
void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg(
- MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const {
- // Modify the opcode and the destination of buffer_load_{u8, u16}:
- // Replace the opcode.
- unsigned Opc =
- SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE
- ? AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE
- : AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT;
- SubwordBufferLoad->setDesc(TII.get(Opc));
- // Update the destination register of SubwordBufferLoad with the destination
- // register of the sign extension.
+ MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {
+ auto [LoadMI, NewOpcode] = MatchData;
+ LoadMI->setDesc(TII.get(NewOpcode));
+ // Update the destination register of the load with the destination register
+ // of the sign extension.
Register SignExtendInsnDst = MI.getOperand(0).getReg();
- SubwordBufferLoad->getOperand(0).setReg(SignExtendInsnDst);
+ LoadMI->getOperand(0).setReg(SignExtendInsnDst);
// Remove the sign extension.
MI.eraseFromParent();
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index b7f043860115..ba79affe683d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1342,10 +1342,8 @@ private:
unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
bool ParseRegRange(unsigned& Num, unsigned& Width);
- unsigned getRegularReg(RegisterKind RegKind,
- unsigned RegNum,
- unsigned RegWidth,
- SMLoc Loc);
+ unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
+ unsigned RegWidth, SMLoc Loc);
bool isRegister();
bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
@@ -2616,6 +2614,8 @@ AMDGPUAsmParser::isRegister(const AsmToken &Token,
StringRef RegName = Reg->Name;
StringRef RegSuffix = Str.substr(RegName.size());
if (!RegSuffix.empty()) {
+ RegSuffix.consume_back(".l");
+ RegSuffix.consume_back(".h");
unsigned Num;
// A single register with an index: rXX
if (getRegNum(RegSuffix, Num))
@@ -2636,12 +2636,9 @@ AMDGPUAsmParser::isRegister()
return isRegister(getToken(), peekToken());
}
-unsigned
-AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
- unsigned RegNum,
- unsigned RegWidth,
- SMLoc Loc) {
-
+unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
+ unsigned SubReg, unsigned RegWidth,
+ SMLoc Loc) {
assert(isRegularReg(RegKind));
unsigned AlignSize = 1;
@@ -2670,7 +2667,17 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
return AMDGPU::NoRegister;
}
- return RC.getRegister(RegIdx);
+ unsigned Reg = RC.getRegister(RegIdx);
+
+ if (SubReg) {
+ Reg = TRI->getSubReg(Reg, SubReg);
+
+ // Currently all regular registers have their .l and .h subregisters, so
+ // we should never need to generate an error here.
+ assert(Reg && "Invalid subregister!");
+ }
+
+ return Reg;
}
bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
@@ -2748,7 +2755,17 @@ unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
RegKind = RI->Kind;
StringRef RegSuffix = RegName.substr(RI->Name.size());
+ unsigned SubReg = NoSubRegister;
if (!RegSuffix.empty()) {
+ // We don't know the opcode till we are done parsing, so we don't know if
+ // registers should be 16 or 32 bit. It is therefore mandatory to put .l or
+ // .h to correctly specify 16 bit registers. We also can't determine class
+ // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
+ if (RegSuffix.consume_back(".l"))
+ SubReg = AMDGPU::lo16;
+ else if (RegSuffix.consume_back(".h"))
+ SubReg = AMDGPU::hi16;
+
// Single 32-bit register: vXX.
if (!getRegNum(RegSuffix, RegNum)) {
Error(Loc, "invalid register index");
@@ -2761,7 +2778,7 @@ unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
return AMDGPU::NoRegister;
}
- return getRegularReg(RegKind, RegNum, RegWidth, Loc);
+ return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
}
unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
@@ -2813,7 +2830,7 @@ unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
}
if (isRegularReg(RegKind))
- Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
+ Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
return Reg;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index a7d8ff0242b8..bcd93e30d6c2 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1450,20 +1450,27 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {
return false;
return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI);
};
- auto IsExpiredFn = [](const MachineInstr &I, int) {
+ bool LdsdirCanWait = ST.hasLdsWaitVMSRC();
+ auto IsExpiredFn = [this, LdsdirCanWait](const MachineInstr &I, int) {
return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) ||
(I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) ||
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
- AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0);
+ AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0) ||
+ (LdsdirCanWait && SIInstrInfo::isLDSDIR(I) &&
+ !TII.getNamedOperand(I, AMDGPU::OpName::waitvsrc)->getImm());
};
if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
std::numeric_limits<int>::max())
return false;
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- TII.get(AMDGPU::S_WAITCNT_DEPCTR))
- .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0));
+ if (LdsdirCanWait) {
+ TII.getNamedOperand(*MI, AMDGPU::OpName::waitvsrc)->setImm(0);
+ } else {
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII.get(AMDGPU::S_WAITCNT_DEPCTR))
+ .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0));
+ }
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index f6f37f5170a4..85d062a9a6f5 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1128,6 +1128,8 @@ public:
bool hasLdsDirect() const { return getGeneration() >= GFX11; }
+ bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
+
bool hasVALUPartialForwardingHazard() const {
return getGeneration() >= GFX11;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index d539d75fdff0..201cc8d01e2d 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -31,7 +31,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT,
InlineAsmEnd = ";#ASMEND";
//===--- Data Emission Directives -------------------------------------===//
- SunStyleELFSectionSwitchSyntax = true;
UsesELFSectionDirectiveForBSS = true;
//===--- Global Variable Emission Directives --------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 6ddc7e864fb2..5a9222e91588 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -8181,12 +8181,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// SGPR_NULL to avoid generating an extra s_mov with zero.
static SDValue selectSOffset(SDValue SOffset, SelectionDAG &DAG,
const GCNSubtarget *Subtarget) {
- if (Subtarget->hasRestrictedSOffset())
- if (auto SOffsetConst = dyn_cast<ConstantSDNode>(SOffset)) {
- if (SOffsetConst->isZero()) {
- return DAG.getRegister(AMDGPU::SGPR_NULL, MVT::i32);
- }
- }
+ if (Subtarget->hasRestrictedSOffset() && isNullConstant(SOffset))
+ return DAG.getRegister(AMDGPU::SGPR_NULL, MVT::i32);
return SOffset;
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 1cb1d32707f2..1f480c248154 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -292,7 +292,7 @@ public:
VgprVmemTypes[GprNo] = 0;
}
- void setNonKernelFunctionInitialState() {
+ void setStateOnFunctionEntryOrReturn() {
setScoreUB(VS_CNT, getWaitCountMax(VS_CNT));
PendingEvents |= WaitEventMaskForInst[VS_CNT];
}
@@ -1487,6 +1487,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
if (callWaitsOnFunctionReturn(Inst)) {
// Act as a wait on everything
ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt::allZeroExceptVsCnt());
+ ScoreBrackets->setStateOnFunctionEntryOrReturn();
} else {
// May need to way wait for anything.
ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt());
@@ -1879,7 +1880,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
auto NonKernelInitialState =
std::make_unique<WaitcntBrackets>(ST, Limits, Encoding);
- NonKernelInitialState->setNonKernelFunctionInitialState();
+ NonKernelInitialState->setStateOnFunctionEntryOrReturn();
BlockInfos[&EntryBB].Incoming = std::move(NonKernelInitialState);
Modified = true;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index fee900b3efb2..e50f5f28e030 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5276,10 +5276,15 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_FLOOR_F32: return AMDGPU::V_FLOOR_F32_e64;
case AMDGPU::S_TRUNC_F32: return AMDGPU::V_TRUNC_F32_e64;
case AMDGPU::S_RNDNE_F32: return AMDGPU::V_RNDNE_F32_e64;
- case AMDGPU::S_CEIL_F16: return AMDGPU::V_CEIL_F16_t16_e64;
- case AMDGPU::S_FLOOR_F16: return AMDGPU::V_FLOOR_F16_t16_e64;
- case AMDGPU::S_TRUNC_F16: return AMDGPU::V_TRUNC_F16_t16_e64;
- case AMDGPU::S_RNDNE_F16: return AMDGPU::V_RNDNE_F16_t16_e64;
+ case AMDGPU::S_CEIL_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
+ : AMDGPU::V_CEIL_F16_fake16_e64;
+ case AMDGPU::S_FLOOR_F16:
+ return AMDGPU::V_FLOOR_F16_fake16_e64;
+ case AMDGPU::S_TRUNC_F16:
+ return AMDGPU::V_TRUNC_F16_fake16_e64;
+ case AMDGPU::S_RNDNE_F16:
+ return AMDGPU::V_RNDNE_F16_fake16_e64;
case AMDGPU::S_ADD_F32: return AMDGPU::V_ADD_F32_e64;
case AMDGPU::S_SUB_F32: return AMDGPU::V_SUB_F32_e64;
case AMDGPU::S_MIN_F32: return AMDGPU::V_MIN_F32_e64;
@@ -5328,15 +5333,15 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_CMP_NEQ_F16: return AMDGPU::V_CMP_NEQ_F16_t16_e64;
case AMDGPU::S_CMP_NLT_F16: return AMDGPU::V_CMP_NLT_F16_t16_e64;
case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64;
- case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_t16_e64;
+ case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_fake16_e64;
case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64;
- case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_t16_e64;
+ case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_fake16_e64;
case AMDGPU::V_S_RCP_F32_e64: return AMDGPU::V_RCP_F32_e64;
- case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_t16_e64;
+ case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_fake16_e64;
case AMDGPU::V_S_RSQ_F32_e64: return AMDGPU::V_RSQ_F32_e64;
- case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_t16_e64;
+ case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_fake16_e64;
case AMDGPU::V_S_SQRT_F32_e64: return AMDGPU::V_SQRT_F32_e64;
- case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_t16_e64;
+ case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_fake16_e64;
}
llvm_unreachable(
"Unexpected scalar opcode without corresponding vector one!");
@@ -7266,8 +7271,14 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
if (AMDGPU::getNamedOperandIdx(NewOpcode,
AMDGPU::OpName::src0_modifiers) >= 0)
NewInstr.addImm(0);
- if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0) >= 0)
- NewInstr->addOperand(Inst.getOperand(1));
+ if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::src0)) {
+ MachineOperand Src = Inst.getOperand(1);
+ if (AMDGPU::isTrue16Inst(NewOpcode) && ST.useRealTrue16Insts() &&
+ Src.isReg() && RI.isVGPR(MRI, Src.getReg()))
+ NewInstr.addReg(Src.getReg(), 0, AMDGPU::lo16);
+ else
+ NewInstr->addOperand(Src);
+ }
if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
// We are converting these to a BFE, so we need to add the missing
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index f07b8fa0ea4c..04c92155f5aa 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1773,28 +1773,27 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs,
bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
- Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel,
- bit IsVOP3P> {
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel> {
// getInst64 handles clamp and omod. implicit mutex between vop3p and omod
dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs,
HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
Src0Mod, Src1Mod, Src2Mod>.ret;
dag opsel = (ins op_sel0:$op_sel);
- dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi);
- dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi));
-
- dag ret = !con(base,
- !if(HasOpSel, opsel,(ins)),
- !if(IsVOP3P, vop3pFields,(ins)));
+ dag ret = !con(base, !if(HasOpSel, opsel, (ins)));
}
class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel,
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
- dag ret = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs,
+ dag base = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs,
HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/,
- 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod,
- HasOpSel, 1/*IsVOP3P*/>.ret;
+ 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, HasOpSel>.ret;
+
+ dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi);
+ dag vop3p_neg = (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi);
+
+ dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), vop3p_neg);
+ dag ret = !con(base, vop3pFields);
}
class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC,
@@ -1804,7 +1803,7 @@ class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC,
dag ret = getInsVOP3Base<Src0RC, Src1RC,
Src2RC, NumSrcArgs,
HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod,
- Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/, 0>.ret;
+ Src0Mod, Src1Mod, Src2Mod, /*HasOpSel=*/1>.ret;
}
class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
@@ -2390,9 +2389,15 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP,
NumSrcArgs, HasModifiers,
Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret;
- field dag InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP,
+ defvar InsVOP3DPPBase = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP,
Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
- Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel, IsVOP3P>.ret;
+ Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret;
+ defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP,
+ Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel,
+ Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret;
+
+ field dag InsVOP3Base = !if(IsVOP3P, InsVOP3PDPPBase, InsVOP3DPPBase);
+
field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 27a7c29cb1ac..99960c94e598 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -74,6 +74,7 @@ class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemo
// copy relevant pseudo op flags
let SubtargetPredicate = ps.SubtargetPredicate;
+ let OtherPredicates = ps.OtherPredicates;
let AsmMatchConverter = ps.AsmMatchConverter;
let AsmVariantName = ps.AsmVariantName;
let Constraints = ps.Constraints;
@@ -157,8 +158,11 @@ multiclass VOP1Inst_t16<string opName,
let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in {
defm NAME : VOP1Inst<opName, P, node>;
}
- let OtherPredicates = [HasTrue16BitInsts] in {
- defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_Fake16<P>, node>;
+ let OtherPredicates = [UseRealTrue16Insts] in {
+ defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_True16<P>, node>;
+ }
+ let OtherPredicates = [UseFakeTrue16Insts] in {
+ defm _fake16 : VOP1Inst<opName#"_fake16", VOPProfile_Fake16<P>, node>;
}
}
@@ -679,6 +683,7 @@ class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP1
let SchedRW = ps.SchedRW;
let Uses = ps.Uses;
let TRANS = ps.TRANS;
+ let OtherPredicates = ps.OtherPredicates;
bits<8> vdst;
let Inst{8-0} = 0xfa;
@@ -707,6 +712,7 @@ class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
let Defs = ps.Defs;
let SchedRW = ps.SchedRW;
let Uses = ps.Uses;
+ let OtherPredicates = ps.OtherPredicates;
bits<8> vdst;
let Inst{8-0} = fi;
@@ -742,7 +748,9 @@ multiclass VOP1_Real_e32<GFXGen Gen, bits<9> op, string opName = NAME> {
multiclass VOP1_Real_e32_with_name<GFXGen Gen, bits<9> op, string opName,
string asmName> {
defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
- let AsmString = asmName # ps.AsmOperands in {
+ let AsmString = asmName # ps.AsmOperands,
+ DecoderNamespace = Gen.DecoderNamespace #
+ !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
defm NAME : VOP1_Real_e32<Gen, op, opName>;
}
}
@@ -761,7 +769,9 @@ multiclass VOP1_Real_dpp<GFXGen Gen, bits<9> op, string opName = NAME> {
multiclass VOP1_Real_dpp_with_name<GFXGen Gen, bits<9> op, string opName,
string asmName> {
defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
- let AsmString = asmName # ps.Pfl.AsmDPP16 in {
+ let AsmString = asmName # ps.Pfl.AsmDPP16,
+ DecoderNamespace = "DPP" # Gen.DecoderNamespace #
+ !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
defm NAME : VOP1_Real_dpp<Gen, op, opName>;
}
}
@@ -774,7 +784,9 @@ multiclass VOP1_Real_dpp8<GFXGen Gen, bits<9> op, string opName = NAME> {
multiclass VOP1_Real_dpp8_with_name<GFXGen Gen, bits<9> op, string opName,
string asmName> {
defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
- let AsmString = asmName # ps.Pfl.AsmDPP8 in {
+ let AsmString = asmName # ps.Pfl.AsmDPP8,
+ DecoderNamespace = "DPP8" # Gen.DecoderNamespace #
+ !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
defm NAME : VOP1_Real_dpp8<Gen, op, opName>;
}
}
@@ -854,29 +866,30 @@ defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03b,
"V_FFBH_I32", "v_cls_i32">;
defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>;
defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">;
-defm V_NOT_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">;
-defm V_CVT_I32_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
-defm V_CVT_U32_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
+defm V_NOT_B16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">;
+defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
+defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x050, "v_cvt_f16_u16">;
defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;
defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x052, "v_cvt_u16_f16">;
defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x053, "v_cvt_i16_f16">;
-defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
-defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">;
-defm V_RSQ_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">;
-defm V_LOG_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">;
-defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
-defm V_FREXP_MANT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
+defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
+defm V_SQRT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">;
+defm V_RSQ_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">;
+defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">;
+defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
+defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
-defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
+defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
-defm V_TRUNC_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05d, "v_trunc_f16">;
-defm V_RNDNE_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">;
-defm V_FRACT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">;
-defm V_SIN_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">;
-defm V_COS_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">;
-defm V_SAT_PK_U8_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
+defm V_CEIL_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
+defm V_TRUNC_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05d, "v_trunc_f16">;
+defm V_RNDNE_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">;
+defm V_FRACT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">;
+defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">;
+defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">;
+defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">;
defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index ecee61daa1c8..48d4e259bc1c 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -111,8 +111,8 @@ class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemo
class VOP2_Real_Gen <VOP2_Pseudo ps, GFXGen Gen, string real_name = ps.Mnemonic> :
VOP2_Real <ps, Gen.Subtarget, real_name> {
- let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
- Gen.AssemblerPredicate);
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []);
let DecoderNamespace = Gen.DecoderNamespace#
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
}
@@ -437,7 +437,7 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
let InsDPP16 = !con(InsDPP, (ins FI:$fi));
let InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, RegisterOperand<VGPR_32>, 3,
0, HasModifiers, HasModifiers, HasOMod,
- Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel, 0/*IsVOP3P*/>.ret;
+ Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel>.ret;
// We need a dummy src2 tied to dst to track the use of that register for s_delay_alu
let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X);
let InsVOPDXDeferred =
@@ -1275,8 +1275,8 @@ class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, int subtarget,
class VOP2_DPP16_Gen<bits<6> op, VOP2_DPP_Pseudo ps, GFXGen Gen,
string opName = ps.OpName, VOPProfile p = ps.Pfl> :
VOP2_DPP16<op, ps, Gen.Subtarget, opName, p> {
- let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
- Gen.AssemblerPredicate);
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []);
let DecoderNamespace = "DPP"#Gen.DecoderNamespace#
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
}
@@ -1304,8 +1304,8 @@ class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps,
class VOP2_DPP8_Gen<bits<6> op, VOP2_Pseudo ps, GFXGen Gen,
VOPProfile p = ps.Pfl> :
VOP2_DPP8<op, ps, p> {
- let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
- Gen.AssemblerPredicate);
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []);
let DecoderNamespace = "DPP8"#Gen.DecoderNamespace#
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td
index fd4626d902ac..c4b9e7063093 100644
--- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -208,8 +208,8 @@ class VOP3_Real <VOP_Pseudo ps, int EncodingFamily, string asm_name = ps.Mnemoni
class VOP3_Real_Gen <VOP_Pseudo ps, GFXGen Gen, string asm_name = ps.Mnemonic> :
VOP3_Real <ps, Gen.Subtarget, asm_name> {
- let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
- Gen.AssemblerPredicate);
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []);
let DecoderNamespace = Gen.DecoderNamespace#
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
}
@@ -1340,8 +1340,8 @@ class VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, int subtarget,
class VOP3_DPP16_Gen<bits<10> op, VOP_DPP_Pseudo ps, GFXGen Gen,
string opName = ps.OpName> :
VOP3_DPP16 <op, ps, Gen.Subtarget, opName> {
- let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
- Gen.AssemblerPredicate);
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []);
let DecoderNamespace = "DPP"#Gen.DecoderNamespace#
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
}
@@ -1470,9 +1470,8 @@ multiclass VOP3_Real_dpp8_with_name<GFXGen Gen, bits<10> op, string opName,
let AsmString = asmName # ps.Pfl.AsmVOP3DPP8,
DecoderNamespace = "DPP8"#Gen.DecoderNamespace#
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16"),
- AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
- Gen.AssemblerPredicate) in {
-
+ OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts],
+ [TruePredicate]) in {
defm NAME : VOP3_Real_dpp8_Base<Gen, op, opName>;
}
}
diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 568085bd0ab3..f8a281032c77 100644
--- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -9577,8 +9577,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
SmallVector<SDValue, 8> Ops;
SDLoc dl(N);
for (unsigned i = 0; i != NumElts; ++i) {
- ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
- const APInt &CInt = C->getAPIntValue();
+ const APInt &CInt = N->getConstantOperandAPInt(i);
// Element types smaller than 32 bits are not legal, so use i32 elements.
// The values are implicitly truncated so sext vs. zext doesn't matter.
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
@@ -18080,8 +18079,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
SDValue Op0 = CMOV->getOperand(0);
SDValue Op1 = CMOV->getOperand(1);
- auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2));
- auto CC = CCNode->getAPIntValue().getLimitedValue();
+ auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();
SDValue CmpZ = CMOV->getOperand(4);
// The compare must be against zero.
@@ -20109,8 +20107,7 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
// The operand to BFI is already a mask suitable for removing the bits it
// sets.
- ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
- const APInt &Mask = CI->getAPIntValue();
+ const APInt &Mask = Op.getConstantOperandAPInt(2);
Known.Zero &= Mask;
Known.One &= Mask;
return;
diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index e68904863cfc..fc066f001316 100644
--- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -1149,15 +1149,10 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// but they are different from CMP.
// FIXME: since we're doing a post-processing, use a pseudoinstr here, so
// lowering & isel wouldn't diverge.
- bool andCC = false;
- if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
- if (RHSC->isZero() && LHS.hasOneUse() &&
- (LHS.getOpcode() == ISD::AND ||
- (LHS.getOpcode() == ISD::TRUNCATE &&
- LHS.getOperand(0).getOpcode() == ISD::AND))) {
- andCC = true;
- }
- }
+ bool andCC = isNullConstant(RHS) && LHS.hasOneUse() &&
+ (LHS.getOpcode() == ISD::AND ||
+ (LHS.getOpcode() == ISD::TRUNCATE &&
+ LHS.getOperand(0).getOpcode() == ISD::AND));
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDValue TargetCC;
SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index c65090d915ef..34c5569b8076 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2019,9 +2019,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DL, RetTy, Args, Outs, retAlignment,
HasVAArgs
? std::optional<std::pair<unsigned, const APInt &>>(std::make_pair(
- CLI.NumFixedArgs,
- cast<ConstantSDNode>(VADeclareParam->getOperand(1))
- ->getAPIntValue()))
+ CLI.NumFixedArgs, VADeclareParam->getConstantOperandAPInt(1)))
: std::nullopt,
*CB, UniqueCallSite);
const char *ProtoStr = nvTM->getStrPool().save(Proto).data();
@@ -2297,7 +2295,7 @@ SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (VT == MVT::v2f16 || VT == MVT::v2bf16)
Value = cast<ConstantFPSDNode>(Operand)->getValueAPF().bitcastToAPInt();
else if (VT == MVT::v2i16 || VT == MVT::v4i8)
- Value = cast<ConstantSDNode>(Operand)->getAPIntValue();
+ Value = Operand->getAsAPIntVal();
else
llvm_unreachable("Unsupported type");
// i8 values are carried around as i16, so we need to zero out upper bits,
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 13665985f52e..e1cced327544 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -164,6 +164,9 @@ def True : Predicate<"true">;
class hasPTX<int version>: Predicate<"Subtarget->getPTXVersion() >= " # version>;
class hasSM<int version>: Predicate<"Subtarget->getSmVersion() >= " # version>;
+// Explicit records for arch-accelerated SM versions
+def hasSM90a : Predicate<"Subtarget->getFullSmVersion() == 901">;
+
// non-sync shfl instructions are not available on sm_70+ in PTX6.4+
def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70"
"&& Subtarget->getPTXVersion() >= 64)">;
diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 85eae44f349a..6b062a7f3912 100644
--- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -6727,3 +6727,16 @@ def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
"mov.pred\t$d, %is_explicit_cluster;",
[(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>,
Requires<[hasSM<90>, hasPTX<78>]>;
+
+// setmaxnreg inc/dec intrinsics
+let isConvergent = true in {
+multiclass SET_MAXNREG<string Action, Intrinsic Intr> {
+ def : NVPTXInst<(outs), (ins i32imm:$reg_count),
+ "setmaxnreg." # Action # ".sync.aligned.u32 $reg_count;",
+ [(Intr timm:$reg_count)]>,
+ Requires<[hasSM90a, hasPTX<80>]>;
+}
+
+defm INT_SET_MAXNREG_INC : SET_MAXNREG<"inc", int_nvvm_setmaxnreg_inc_sync_aligned_u32>;
+defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_aligned_u32>;
+} // isConvergent
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 235df1880b37..4e164fda1d8d 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -16241,7 +16241,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// Since we are doing this pre-legalize, the RHS can be a constant of
// arbitrary bitwidth which may cause issues when trying to get the value
// from the underlying APInt.
- auto RHSAPInt = cast<ConstantSDNode>(RHS)->getAPIntValue();
+ auto RHSAPInt = RHS->getAsAPIntVal();
if (!RHSAPInt.isIntN(64))
break;
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index b1601739fd45..bf756e39bd5d 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1909,7 +1909,7 @@ def STWAT : X_RD5_RS5_IM5<31, 710, (outs), (ins gprc:$RST, gprc:$RA, u5imm:$RB),
"stwat $RST, $RA, $RB", IIC_LdStStore>,
Requires<[IsISA3_0]>;
-let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
+let isTrap = 1, hasCtrlDep = 1 in
def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>;
def TWI : DForm_base<3, (outs), (ins u5imm:$RST, gprc:$RA, s16imm:$D, variable_ops),
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index d616aaeddf41..7d42481db57f 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -199,6 +199,8 @@ class RISCVAsmParser : public MCTargetAsmParser {
ParseStatus parseInsnDirectiveOpcode(OperandVector &Operands);
ParseStatus parseInsnCDirectiveOpcode(OperandVector &Operands);
ParseStatus parseGPRAsFPR(OperandVector &Operands);
+ template <bool IsRV64Inst> ParseStatus parseGPRPair(OperandVector &Operands);
+ ParseStatus parseGPRPair(OperandVector &Operands, bool IsRV64Inst);
ParseStatus parseFRMArg(OperandVector &Operands);
ParseStatus parseFenceArg(OperandVector &Operands);
ParseStatus parseReglist(OperandVector &Operands);
@@ -466,6 +468,12 @@ public:
bool isGPRAsFPR() const { return isGPR() && Reg.IsGPRAsFPR; }
+ bool isGPRPair() const {
+ return Kind == KindTy::Register &&
+ RISCVMCRegisterClasses[RISCV::GPRPairRegClassID].contains(
+ Reg.RegNum);
+ }
+
static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm,
RISCVMCExpr::VariantKind &VK) {
if (auto *RE = dyn_cast<RISCVMCExpr>(Expr)) {
@@ -1295,11 +1303,15 @@ unsigned RISCVAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
for (unsigned I = 0; I < MCID.NumOperands; ++I) {
- if (MCID.operands()[I].RegClass == RISCV::GPRPF64RegClassID) {
+ if (MCID.operands()[I].RegClass == RISCV::GPRPairRegClassID) {
const auto &Op = Inst.getOperand(I);
assert(Op.isReg());
MCRegister Reg = Op.getReg();
+ if (RISCVMCRegisterClasses[RISCV::GPRPairRegClassID].contains(Reg))
+ continue;
+
+ // FIXME: We should form a paired register during parsing/matching.
if (((Reg.id() - RISCV::X0) & 1) != 0)
return Match_RequiresEvenGPRs;
}
@@ -2222,6 +2234,48 @@ ParseStatus RISCVAsmParser::parseGPRAsFPR(OperandVector &Operands) {
return ParseStatus::Success;
}
+template <bool IsRV64>
+ParseStatus RISCVAsmParser::parseGPRPair(OperandVector &Operands) {
+ return parseGPRPair(Operands, IsRV64);
+}
+
+ParseStatus RISCVAsmParser::parseGPRPair(OperandVector &Operands,
+ bool IsRV64Inst) {
+ // If this is not an RV64 GPRPair instruction, don't parse as a GPRPair on
+ // RV64 as it will prevent matching the RV64 version of the same instruction
+ // that doesn't use a GPRPair.
+ // If this is an RV64 GPRPair instruction, there is no RV32 version so we can
+ // still parse as a pair.
+ if (!IsRV64Inst && isRV64())
+ return ParseStatus::NoMatch;
+
+ if (getLexer().isNot(AsmToken::Identifier))
+ return ParseStatus::NoMatch;
+
+ StringRef Name = getLexer().getTok().getIdentifier();
+ MCRegister RegNo = matchRegisterNameHelper(isRVE(), Name);
+
+ if (!RegNo)
+ return ParseStatus::NoMatch;
+
+ if (!RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(RegNo))
+ return ParseStatus::NoMatch;
+
+ if ((RegNo - RISCV::X0) & 1)
+ return TokError("register must be even");
+
+ SMLoc S = getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size());
+ getLexer().Lex();
+
+ const MCRegisterInfo *RI = getContext().getRegisterInfo();
+ unsigned Pair = RI->getMatchingSuperReg(
+ RegNo, RISCV::sub_gpr_even,
+ &RISCVMCRegisterClasses[RISCV::GPRPairRegClassID]);
+ Operands.push_back(RISCVOperand::createReg(Pair, S, E));
+ return ParseStatus::Success;
+}
+
ParseStatus RISCVAsmParser::parseFRMArg(OperandVector &Operands) {
if (getLexer().isNot(AsmToken::Identifier))
return TokError(
@@ -3335,27 +3389,6 @@ bool RISCVAsmParser::validateInstruction(MCInst &Inst,
return Error(Loc, "Operand must be constant 4.");
}
- bool IsAMOCAS_D = Opcode == RISCV::AMOCAS_D || Opcode == RISCV::AMOCAS_D_AQ ||
- Opcode == RISCV::AMOCAS_D_RL ||
- Opcode == RISCV::AMOCAS_D_AQ_RL;
- bool IsAMOCAS_Q = Opcode == RISCV::AMOCAS_Q || Opcode == RISCV::AMOCAS_Q_AQ ||
- Opcode == RISCV::AMOCAS_Q_RL ||
- Opcode == RISCV::AMOCAS_Q_AQ_RL;
- if ((!isRV64() && IsAMOCAS_D) || IsAMOCAS_Q) {
- unsigned Rd = Inst.getOperand(0).getReg();
- unsigned Rs2 = Inst.getOperand(2).getReg();
- assert(Rd >= RISCV::X0 && Rd <= RISCV::X31);
- if ((Rd - RISCV::X0) % 2 != 0) {
- SMLoc Loc = Operands[1]->getStartLoc();
- return Error(Loc, "The destination register must be even.");
- }
- assert(Rs2 >= RISCV::X0 && Rs2 <= RISCV::X31);
- if ((Rs2 - RISCV::X0) % 2 != 0) {
- SMLoc Loc = Operands[2]->getStartLoc();
- return Error(Loc, "The source register must be even.");
- }
- }
-
const MCInstrDesc &MCID = MII.get(Opcode);
if (!(MCID.TSFlags & RISCVII::ConstraintMask))
return false;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index ed80da14c795..4dd039159e29 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -171,7 +171,7 @@ static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint32_t RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeGPRPF64RegisterClass(MCInst &Inst, uint32_t RegNo,
+static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
if (RegNo >= 32 || RegNo & 1)
@@ -546,6 +546,10 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
!STI.hasFeature(RISCV::Feature64Bit),
DecoderTableRV32Zdinx32,
"RV32Zdinx table (Double in Integer and rv32)");
+ TRY_TO_DECODE(STI.hasFeature(RISCV::FeatureStdExtZacas) &&
+ !STI.hasFeature(RISCV::Feature64Bit),
+ DecoderTableRV32Zacas32,
+ "RV32Zacas table (Compare-And-Swap and rv32)");
TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZfinx, DecoderTableRVZfinx32,
"RVZfinx table (Float in Integer)");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXVentanaCondOps,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index ab8070772fe5..ae02e86baf6e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -47,10 +47,50 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
+ const LLT nxv1s8 = LLT::scalable_vector(1, s8);
+ const LLT nxv2s8 = LLT::scalable_vector(2, s8);
+ const LLT nxv4s8 = LLT::scalable_vector(4, s8);
+ const LLT nxv8s8 = LLT::scalable_vector(8, s8);
+ const LLT nxv16s8 = LLT::scalable_vector(16, s8);
+ const LLT nxv32s8 = LLT::scalable_vector(32, s8);
+ const LLT nxv64s8 = LLT::scalable_vector(64, s8);
+
+ const LLT nxv1s16 = LLT::scalable_vector(1, s16);
+ const LLT nxv2s16 = LLT::scalable_vector(2, s16);
+ const LLT nxv4s16 = LLT::scalable_vector(4, s16);
+ const LLT nxv8s16 = LLT::scalable_vector(8, s16);
+ const LLT nxv16s16 = LLT::scalable_vector(16, s16);
+ const LLT nxv32s16 = LLT::scalable_vector(32, s16);
+
+ const LLT nxv1s32 = LLT::scalable_vector(1, s32);
+ const LLT nxv2s32 = LLT::scalable_vector(2, s32);
+ const LLT nxv4s32 = LLT::scalable_vector(4, s32);
+ const LLT nxv8s32 = LLT::scalable_vector(8, s32);
+ const LLT nxv16s32 = LLT::scalable_vector(16, s32);
+
+ const LLT nxv1s64 = LLT::scalable_vector(1, s64);
+ const LLT nxv2s64 = LLT::scalable_vector(2, s64);
+ const LLT nxv4s64 = LLT::scalable_vector(4, s64);
+ const LLT nxv8s64 = LLT::scalable_vector(8, s64);
+
using namespace TargetOpcode;
+ auto AllVecTys = {nxv1s8, nxv2s8, nxv4s8, nxv8s8, nxv16s8, nxv32s8,
+ nxv64s8, nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16,
+ nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32,
+ nxv1s64, nxv2s64, nxv4s64, nxv8s64};
+
getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
.legalFor({s32, sXLen})
+ .legalIf(all(
+ typeInSet(0, AllVecTys),
+ LegalityPredicate([=, &ST](const LegalityQuery &Query) {
+ return ST.hasVInstructions() &&
+ (Query.Types[0].getScalarSizeInBits() != 64 ||
+ ST.hasVInstructionsI64()) &&
+ (Query.Types[0].getElementCount().getKnownMinValue() != 1 ||
+ ST.getELen() == 64);
+ })))
.widenScalarToNextPow2(0)
.clampScalar(0, s32, sXLen);
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
index 0799267eaf7c..76e5b3ed4025 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
@@ -106,6 +106,8 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
if (Expr->getKind() == MCExpr::Target &&
cast<RISCVMCExpr>(Expr)->getKind() == RISCVMCExpr::VK_RISCV_32_PCREL)
return ELF::R_RISCV_32_PCREL;
+ if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOTPCREL)
+ return ELF::R_RISCV_GOT32_PCREL;
return ELF::R_RISCV_32;
case FK_Data_8:
return ELF::R_RISCV_64;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index 9db5148208b3..961b8f0afe22 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -37,6 +37,13 @@ RISCVTargetELFStreamer::RISCVTargetELFStreamer(MCStreamer &S,
auto &MAB = static_cast<RISCVAsmBackend &>(MCA.getBackend());
setTargetABI(RISCVABI::computeTargetABI(STI.getTargetTriple(), Features,
MAB.getTargetOptions().getABIName()));
+ // `j label` in `.option norelax; j label; .option relax; ...; label:` needs a
+ // relocation to ensure the jump target is correct after linking. This is due
+ // to a limitation that shouldForceRelocation has to make the decision upfront
+ // without knowing a possibly future .option relax. When RISCVAsmParser is used,
+ // its ParseInstruction may call setForceRelocs as well.
+ if (STI.hasFeature(RISCV::FeatureRelax))
+ static_cast<RISCVAsmBackend &>(MAB).setForceRelocs();
}
RISCVELFStreamer &RISCVTargetELFStreamer::getStreamer() {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 103a2e2da7b9..ed2b1ceb7d6f 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -308,8 +308,10 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
DebugLoc DL = MBBI->getDebugLoc();
const TargetRegisterInfo *TRI = STI->getRegisterInfo();
- Register Lo = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32);
- Register Hi = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32_hi);
+ Register Lo =
+ TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_even);
+ Register Hi =
+ TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_odd);
BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW))
.addReg(Lo, getKillRegState(MBBI->getOperand(0).isKill()))
.addReg(MBBI->getOperand(1).getReg())
@@ -342,8 +344,10 @@ bool RISCVExpandPseudo::expandRV32ZdinxLoad(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
DebugLoc DL = MBBI->getDebugLoc();
const TargetRegisterInfo *TRI = STI->getRegisterInfo();
- Register Lo = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32);
- Register Hi = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32_hi);
+ Register Lo =
+ TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_even);
+ Register Hi =
+ TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_odd);
// If the register of operand 1 is equal to the Lo register, then swap the
// order of loading the Lo and Hi statements.
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td
index bb7a3291085d..279509575bb5 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -736,6 +736,7 @@ def FeatureStdExtZacas
def HasStdExtZacas : Predicate<"Subtarget->hasStdExtZacas()">,
AssemblerPredicate<(all_of FeatureStdExtZacas),
"'Zacas' (Atomic Compare-And-Swap Instructions)">;
+def NoStdExtZacas : Predicate<"!Subtarget->hasStdExtZacas()">;
//===----------------------------------------------------------------------===//
// Vendor extensions
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 0a1a466af591..cb9ffabc4123 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -138,7 +138,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.is64Bit())
addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
else
- addRegisterClass(MVT::f64, &RISCV::GPRPF64RegClass);
+ addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
}
static const MVT::SimpleValueType BoolVecVTs[] = {
@@ -814,8 +814,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
Custom);
setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
- setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT,
- ISD::SSUBSAT, ISD::USUBSAT},
+ setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT,
+ ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT},
VT, Legal);
// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
@@ -1185,8 +1185,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
- setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT,
- ISD::SSUBSAT, ISD::USUBSAT},
+ setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT,
+ ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT},
VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
@@ -5466,6 +5466,7 @@ static unsigned getRISCVVLOp(SDValue Op) {
OP_CASE(SSUBSAT)
OP_CASE(USUBSAT)
OP_CASE(AVGFLOORU)
+ OP_CASE(AVGCEILU)
OP_CASE(FADD)
OP_CASE(FSUB)
OP_CASE(FMUL)
@@ -5570,7 +5571,7 @@ static bool hasMergeOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
- 125 &&
+ 126 &&
RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
ISD::FIRST_TARGET_STRICTFP_OPCODE ==
21 &&
@@ -5596,7 +5597,7 @@ static bool hasMaskOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
- 125 &&
+ 126 &&
RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
ISD::FIRST_TARGET_STRICTFP_OPCODE ==
21 &&
@@ -6461,6 +6462,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return SplitVectorOp(Op, DAG);
[[fallthrough]];
case ISD::AVGFLOORU:
+ case ISD::AVGCEILU:
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
@@ -7023,8 +7025,7 @@ foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
if (!NewConstOp)
return SDValue();
- const APInt &NewConstAPInt =
- cast<ConstantSDNode>(NewConstOp)->getAPIntValue();
+ const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
return SDValue();
@@ -7154,8 +7155,8 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// is SETGE/SETLE to avoid an XORI.
if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
CCVal == ISD::SETLT) {
- const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
- const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
+ const APInt &TrueVal = TrueV->getAsAPIntVal();
+ const APInt &FalseVal = FalseV->getAsAPIntVal();
if (TrueVal - 1 == FalseVal)
return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
if (TrueVal + 1 == FalseVal)
@@ -16345,7 +16346,7 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
Register SrcReg = MI.getOperand(2).getReg();
const TargetRegisterClass *SrcRC = MI.getOpcode() == RISCV::SplitF64Pseudo_INX
- ? &RISCV::GPRPF64RegClass
+ ? &RISCV::GPRPairRegClass
: &RISCV::FPR64RegClass;
int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
@@ -16384,7 +16385,7 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
Register HiReg = MI.getOperand(2).getReg();
const TargetRegisterClass *DstRC =
- MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPF64RegClass
+ MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPairRegClass
: &RISCV::FPR64RegClass;
int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
@@ -18596,6 +18597,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(UREM_VL)
NODE_NAME_CASE(XOR_VL)
NODE_NAME_CASE(AVGFLOORU_VL)
+ NODE_NAME_CASE(AVGCEILU_VL)
NODE_NAME_CASE(SADDSAT_VL)
NODE_NAME_CASE(UADDSAT_VL)
NODE_NAME_CASE(SSUBSAT_VL)
@@ -18752,7 +18754,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
return std::make_pair(0U, &RISCV::GPRF32RegClass);
if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
- return std::make_pair(0U, &RISCV::GPRPF64RegClass);
+ return std::make_pair(0U, &RISCV::GPRPairRegClass);
return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
case 'f':
if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
@@ -18934,7 +18936,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// Subtarget into account.
if (Res.second == &RISCV::GPRF16RegClass ||
Res.second == &RISCV::GPRF32RegClass ||
- Res.second == &RISCV::GPRPF64RegClass)
+ Res.second == &RISCV::GPRPairRegClass)
return std::make_pair(Res.first, &RISCV::GPRRegClass);
return Res;
@@ -19362,6 +19364,11 @@ bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
return false;
}
+ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const {
+ // Zacas will use amocas.w which does not require extension.
+ return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
+}
+
Register RISCVTargetLowering::getExceptionPointerRegister(
const Constant *PersonalityFn) const {
return RISCV::X10;
@@ -20017,8 +20024,13 @@ unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,
}
bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
- // At the moment, the only scalable instruction GISel knows how to lower is
- // ret with scalable argument.
+
+ // GISel support is in progress or complete for G_ADD, G_SUB, G_AND, G_OR, and
+ // G_XOR.
+ unsigned Op = Inst.getOpcode();
+ if (Op == Instruction::Add || Op == Instruction::Sub ||
+ Op == Instruction::And || Op == Instruction::Or || Op == Instruction::Xor)
+ return false;
if (Inst.getType()->isScalableTy())
return true;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 5d51fe168b04..c65953e37b17 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -255,6 +255,8 @@ enum NodeType : unsigned {
// Averaging adds of unsigned integers.
AVGFLOORU_VL,
+ // Rounding averaging adds of unsigned integers.
+ AVGCEILU_VL,
MULHS_VL,
MULHU_VL,
@@ -631,9 +633,7 @@ public:
return ISD::SIGN_EXTEND;
}
- ISD::NodeType getExtendForAtomicCmpSwapArg() const override {
- return ISD::SIGN_EXTEND;
- }
+ ISD::NodeType getExtendForAtomicCmpSwapArg() const override;
bool shouldTransformSignedTruncationCheck(EVT XVT,
unsigned KeptBits) const override;
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index e591aa935c0b..6c9e529e4bfb 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1464,20 +1464,6 @@ static void doUnion(DemandedFields &A, DemandedFields B) {
A.MaskPolicy |= B.MaskPolicy;
}
-static bool isNonZeroAVL(const MachineOperand &MO,
- const MachineRegisterInfo &MRI) {
- if (MO.isReg()) {
- if (MO.getReg() == RISCV::X0)
- return true;
- if (MachineInstr *MI = MRI.getVRegDef(MO.getReg());
- MI && isNonZeroLoadImmediate(*MI))
- return true;
- return false;
- }
- assert(MO.isImm());
- return 0 != MO.getImm();
-}
-
// Return true if we can mutate PrevMI to match MI without changing any the
// fields which would be observed.
static bool canMutatePriorConfig(const MachineInstr &PrevMI,
@@ -1491,21 +1477,26 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
if (Used.VLAny)
return false;
- // We don't bother to handle the equally zero case here as it's largely
- // uninteresting.
if (Used.VLZeroness) {
if (isVLPreservingConfig(PrevMI))
return false;
- if (!isNonZeroAVL(MI.getOperand(1), MRI) ||
- !isNonZeroAVL(PrevMI.getOperand(1), MRI))
+ if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI),
+ MRI))
return false;
}
- // TODO: Track whether the register is defined between
- // PrevMI and MI.
- if (MI.getOperand(1).isReg() &&
- RISCV::X0 != MI.getOperand(1).getReg())
- return false;
+ auto &AVL = MI.getOperand(1);
+ auto &PrevAVL = PrevMI.getOperand(1);
+ assert(MRI.isSSA());
+
+ // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
+ // For now just check that PrevMI uses the same virtual register.
+ if (AVL.isReg() && AVL.getReg() != RISCV::X0) {
+ if (AVL.getReg().isPhysical())
+ return false;
+ if (!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg())
+ return false;
+ }
}
if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 351f48c1708e..9813c7a70dfc 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -414,15 +414,16 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- if (RISCV::GPRPF64RegClass.contains(DstReg, SrcReg)) {
- // Emit an ADDI for both parts of GPRPF64.
+ if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) {
+ // Emit an ADDI for both parts of GPRPair.
BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
- TRI->getSubReg(DstReg, RISCV::sub_32))
- .addReg(TRI->getSubReg(SrcReg, RISCV::sub_32), getKillRegState(KillSrc))
+ TRI->getSubReg(DstReg, RISCV::sub_gpr_even))
+ .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_even),
+ getKillRegState(KillSrc))
.addImm(0);
BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
- TRI->getSubReg(DstReg, RISCV::sub_32_hi))
- .addReg(TRI->getSubReg(SrcReg, RISCV::sub_32_hi),
+ TRI->getSubReg(DstReg, RISCV::sub_gpr_odd))
+ .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_odd),
getKillRegState(KillSrc))
.addImm(0);
return;
@@ -607,7 +608,7 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
RISCV::SW : RISCV::SD;
IsScalableVector = false;
- } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) {
+ } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoRV32ZdinxSD;
IsScalableVector = false;
} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
@@ -690,7 +691,7 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
RISCV::LW : RISCV::LD;
IsScalableVector = false;
- } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) {
+ } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoRV32ZdinxLD;
IsScalableVector = false;
} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 4d0567e41abc..44552c00c62e 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -157,7 +157,16 @@ defm : AMOPat<"atomic_load_min_32", "AMOMIN_W">;
defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">;
defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">;
-let Predicates = [HasStdExtA] in {
+defm : AMOPat<"atomic_swap_64", "AMOSWAP_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_add_64", "AMOADD_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_and_64", "AMOAND_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_or_64", "AMOOR_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_max_64", "AMOMAX_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64, [IsRV64]>;
+
/// Pseudo AMOs
@@ -169,21 +178,6 @@ class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch),
let hasSideEffects = 0;
}
-let Size = 20 in
-def PseudoAtomicLoadNand32 : PseudoAMO;
-// Ordering constants must be kept in sync with the AtomicOrdering enum in
-// AtomicOrdering.h.
-def : Pat<(XLenVT (atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>;
-def : Pat<(XLenVT (atomic_load_nand_32_acquire GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>;
-def : Pat<(XLenVT (atomic_load_nand_32_release GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>;
-def : Pat<(XLenVT (atomic_load_nand_32_acq_rel GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>;
-def : Pat<(XLenVT (atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>;
-
class PseudoMaskedAMO
: Pseudo<(outs GPR:$res, GPR:$scratch),
(ins GPR:$addr, GPR:$incr, GPR:$mask, ixlenimm:$ordering), []> {
@@ -224,6 +218,23 @@ class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
(AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
timm:$ordering)>;
+let Predicates = [HasStdExtA] in {
+
+let Size = 20 in
+def PseudoAtomicLoadNand32 : PseudoAMO;
+// Ordering constants must be kept in sync with the AtomicOrdering enum in
+// AtomicOrdering.h.
+def : Pat<(XLenVT (atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>;
+def : Pat<(XLenVT (atomic_load_nand_32_acquire GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>;
+def : Pat<(XLenVT (atomic_load_nand_32_release GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>;
+def : Pat<(XLenVT (atomic_load_nand_32_acq_rel GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>;
+def : Pat<(XLenVT (atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>;
+
let Size = 28 in
def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i32,
@@ -256,6 +267,43 @@ let Size = 36 in
def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i32,
PseudoMaskedAtomicLoadUMin32>;
+} // Predicates = [HasStdExtA]
+
+let Predicates = [HasStdExtA, IsRV64] in {
+
+let Size = 20 in
+def PseudoAtomicLoadNand64 : PseudoAMO;
+// Ordering constants must be kept in sync with the AtomicOrdering enum in
+// AtomicOrdering.h.
+def : Pat<(i64 (atomic_load_nand_64_monotonic GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 2)>;
+def : Pat<(i64 (atomic_load_nand_64_acquire GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 4)>;
+def : Pat<(i64 (atomic_load_nand_64_release GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 5)>;
+def : Pat<(i64 (atomic_load_nand_64_acq_rel GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 6)>;
+def : Pat<(i64 (atomic_load_nand_64_seq_cst GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 7)>;
+
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i64,
+ PseudoMaskedAtomicSwap32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i64,
+ PseudoMaskedAtomicLoadAdd32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i64,
+ PseudoMaskedAtomicLoadSub32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i64,
+ PseudoMaskedAtomicLoadNand32>;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i64,
+ PseudoMaskedAtomicLoadMax32>;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i64,
+ PseudoMaskedAtomicLoadMin32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i64,
+ PseudoMaskedAtomicLoadUMax32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i64,
+ PseudoMaskedAtomicLoadUMin32>;
+} // Predicates = [HasStdExtA, IsRV64]
+
/// Compare and exchange
@@ -285,9 +333,17 @@ multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
}
+let Predicates = [HasStdExtA, NoStdExtZacas] in {
def PseudoCmpXchg32 : PseudoCmpXchg;
defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>;
+}
+
+let Predicates = [HasStdExtA, NoStdExtZacas, IsRV64] in {
+def PseudoCmpXchg64 : PseudoCmpXchg;
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>;
+}
+let Predicates = [HasStdExtA] in {
def PseudoMaskedCmpXchg32
: Pseudo<(outs GPR:$res, GPR:$scratch),
(ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask,
@@ -303,60 +359,9 @@ def : Pat<(int_riscv_masked_cmpxchg_i32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
(PseudoMaskedCmpXchg32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
-
} // Predicates = [HasStdExtA]
-defm : AMOPat<"atomic_swap_64", "AMOSWAP_D", i64, [IsRV64]>;
-defm : AMOPat<"atomic_load_add_64", "AMOADD_D", i64, [IsRV64]>;
-defm : AMOPat<"atomic_load_and_64", "AMOAND_D", i64, [IsRV64]>;
-defm : AMOPat<"atomic_load_or_64", "AMOOR_D", i64, [IsRV64]>;
-defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D", i64, [IsRV64]>;
-defm : AMOPat<"atomic_load_max_64", "AMOMAX_D", i64, [IsRV64]>;
-defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64, [IsRV64]>;
-defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64, [IsRV64]>;
-defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64, [IsRV64]>;
-
let Predicates = [HasStdExtA, IsRV64] in {
-
-/// 64-bit pseudo AMOs
-
-let Size = 20 in
-def PseudoAtomicLoadNand64 : PseudoAMO;
-// Ordering constants must be kept in sync with the AtomicOrdering enum in
-// AtomicOrdering.h.
-def : Pat<(i64 (atomic_load_nand_64_monotonic GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 2)>;
-def : Pat<(i64 (atomic_load_nand_64_acquire GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 4)>;
-def : Pat<(i64 (atomic_load_nand_64_release GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 5)>;
-def : Pat<(i64 (atomic_load_nand_64_acq_rel GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 6)>;
-def : Pat<(i64 (atomic_load_nand_64_seq_cst GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 7)>;
-
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i64,
- PseudoMaskedAtomicSwap32>;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i64,
- PseudoMaskedAtomicLoadAdd32>;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i64,
- PseudoMaskedAtomicLoadSub32>;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i64,
- PseudoMaskedAtomicLoadNand32>;
-def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i64,
- PseudoMaskedAtomicLoadMax32>;
-def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i64,
- PseudoMaskedAtomicLoadMin32>;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i64,
- PseudoMaskedAtomicLoadUMax32>;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i64,
- PseudoMaskedAtomicLoadUMin32>;
-
-/// 64-bit compare and exchange
-
-def PseudoCmpXchg64 : PseudoCmpXchg;
-defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>;
-
def : Pat<(int_riscv_masked_cmpxchg_i64
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
(PseudoMaskedCmpXchg32
@@ -408,6 +413,7 @@ defm : AMOPat2<"atomic_load_min_32", "AMOMIN_W", i32>;
defm : AMOPat2<"atomic_load_umax_32", "AMOMAXU_W", i32>;
defm : AMOPat2<"atomic_load_umin_32", "AMOMINU_W", i32>;
+let Predicates = [HasStdExtA, IsRV64] in
defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32, i32>;
let Predicates = [HasAtomicLdSt] in {
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 418421b2a556..fec43d814098 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -33,8 +33,8 @@ def AddrRegImmINX : ComplexPattern<iPTR, 2, "SelectAddrRegImmINX">;
// Zdinx
-def GPRPF64AsFPR : AsmOperandClass {
- let Name = "GPRPF64AsFPR";
+def GPRPairAsFPR : AsmOperandClass {
+ let Name = "GPRPairAsFPR";
let ParserMethod = "parseGPRAsFPR";
let PredicateMethod = "isGPRAsFPR";
let RenderMethod = "addRegOperands";
@@ -52,8 +52,8 @@ def FPR64INX : RegisterOperand<GPR> {
let DecoderMethod = "DecodeGPRRegisterClass";
}
-def FPR64IN32X : RegisterOperand<GPRPF64> {
- let ParserMatchClass = GPRPF64AsFPR;
+def FPR64IN32X : RegisterOperand<GPRPair> {
+ let ParserMatchClass = GPRPairAsFPR;
}
def DExt : ExtInfo<"", "", [HasStdExtD], f64, FPR64, FPR32, FPR64, ?>;
@@ -515,15 +515,15 @@ def PseudoFROUND_D_IN32X : PseudoFROUND<FPR64IN32X, f64>;
/// Loads
let isCall = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 1 in
-def PseudoRV32ZdinxLD : Pseudo<(outs GPRPF64:$dst), (ins GPR:$rs1, simm12:$imm12), []>;
+def PseudoRV32ZdinxLD : Pseudo<(outs GPRPair:$dst), (ins GPR:$rs1, simm12:$imm12), []>;
def : Pat<(f64 (load (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12))),
(PseudoRV32ZdinxLD GPR:$rs1, simm12:$imm12)>;
/// Stores
let isCall = 0, mayLoad = 0, mayStore = 1, Size = 8, isCodeGenOnly = 1 in
-def PseudoRV32ZdinxSD : Pseudo<(outs), (ins GPRPF64:$rs2, GPRNoX0:$rs1, simm12:$imm12), []>;
-def : Pat<(store (f64 GPRPF64:$rs2), (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12)),
- (PseudoRV32ZdinxSD GPRPF64:$rs2, GPR:$rs1, simm12:$imm12)>;
+def PseudoRV32ZdinxSD : Pseudo<(outs), (ins GPRPair:$rs2, GPRNoX0:$rs1, simm12:$imm12), []>;
+def : Pat<(store (f64 GPRPair:$rs2), (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12)),
+ (PseudoRV32ZdinxSD GPRPair:$rs2, GPR:$rs1, simm12:$imm12)>;
/// Pseudo-instructions needed for the soft-float ABI with RV32D
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 4f87c36506e5..8ebd8b89c119 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -877,6 +877,23 @@ multiclass VPatMultiplyAddSDNode_VV_VX<SDNode op, string instruction_name> {
}
}
+multiclass VPatAVGADD_VV_VX_RM<SDNode vop, int vxrm> {
+ foreach vti = AllIntegerVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2,
+ vxrm, vti.AVL, vti.Log2SEW, TA_MA)>;
+ def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatPat (XLenVT GPR:$rs2)))),
+ (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2,
+ vxrm, vti.AVL, vti.Log2SEW, TA_MA)>;
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// Patterns.
//===----------------------------------------------------------------------===//
@@ -1132,20 +1149,8 @@ defm : VPatBinarySDNode_VV_VX<ssubsat, "PseudoVSSUB">;
defm : VPatBinarySDNode_VV_VX<usubsat, "PseudoVSSUBU">;
// 12.2. Vector Single-Width Averaging Add and Subtract
-foreach vti = AllIntegerVectors in {
- let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1),
- (vti.Vector vti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX)
- (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2,
- 0b10, vti.AVL, vti.Log2SEW, TA_MA)>;
- def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1),
- (vti.Vector (SplatPat (XLenVT GPR:$rs2)))),
- (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX)
- (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2,
- 0b10, vti.AVL, vti.Log2SEW, TA_MA)>;
- }
-}
+defm : VPatAVGADD_VV_VX_RM<avgflooru, 0b10>;
+defm : VPatAVGADD_VV_VX_RM<avgceilu, 0b00>;
// 15. Vector Mask Instructions
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index d60ff4b5fab0..1deb9a709463 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -112,6 +112,7 @@ def riscv_cttz_vl : SDNode<"RISCVISD::CTTZ_VL", SDT_RISCVIntUnOp_VL>
def riscv_ctpop_vl : SDNode<"RISCVISD::CTPOP_VL", SDT_RISCVIntUnOp_VL>;
def riscv_avgflooru_vl : SDNode<"RISCVISD::AVGFLOORU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
+def riscv_avgceilu_vl : SDNode<"RISCVISD::AVGCEILU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_saddsat_vl : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_uaddsat_vl : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_ssubsat_vl : SDNode<"RISCVISD::SSUBSAT_VL", SDT_RISCVIntBinOp_VL>;
@@ -2031,6 +2032,25 @@ multiclass VPatSlide1VL_VF<SDNode vop, string instruction_name> {
}
}
+multiclass VPatAVGADDVL_VV_VX_RM<SDNode vop, int vxrm> {
+ foreach vti = AllIntegerVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2),
+ vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatPat (XLenVT GPR:$rs2))),
+ vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2,
+ (vti.Mask V0), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// Patterns.
//===----------------------------------------------------------------------===//
@@ -2308,22 +2328,8 @@ defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">;
defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">;
// 12.2. Vector Single-Width Averaging Add and Subtract
-foreach vti = AllIntegerVectors in {
- let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1),
- (vti.Vector vti.RegClass:$rs2),
- vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
- (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK")
- vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1),
- (vti.Vector (SplatPat (XLenVT GPR:$rs2))),
- vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
- (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX#"_MASK")
- vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2,
- (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- }
-}
+defm : VPatAVGADDVL_VV_VX_RM<riscv_avgflooru_vl, 0b10>;
+defm : VPatAVGADDVL_VV_VX_RM<riscv_avgceilu_vl, 0b00>;
// 12.5. Vector Narrowing Fixed-Point Clip Instructions
class VPatTruncSatClipMaxMinBase<string inst,
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td
index a09f5715b24f..ffcdd0010749 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td
@@ -17,15 +17,107 @@
// Zacas (Atomic Compare-and-Swap)
//===----------------------------------------------------------------------===//
+def GPRPairRV32Operand : AsmOperandClass {
+ let Name = "GPRPairRV32";
+ let ParserMethod = "parseGPRPair<false>";
+ let PredicateMethod = "isGPRPair";
+ let RenderMethod = "addRegOperands";
+}
+
+def GPRPairRV64Operand : AsmOperandClass {
+ let Name = "GPRPairRV64";
+ let ParserMethod = "parseGPRPair<true>";
+ let PredicateMethod = "isGPRPair";
+ let RenderMethod = "addRegOperands";
+}
+
+def GPRPairRV32 : RegisterOperand<GPRPair> {
+ let ParserMatchClass = GPRPairRV32Operand;
+}
+
+def GPRPairRV64 : RegisterOperand<GPRPair> {
+ let ParserMatchClass = GPRPairRV64Operand;
+}
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 1, Constraints = "$rd = $rd_wb" in
+class AMO_cas<bits<5> funct5, bit aq, bit rl, bits<3> funct3, string opcodestr,
+ DAGOperand RC>
+ : RVInstRAtomic<funct5, aq, rl, funct3, OPC_AMO,
+ (outs RC:$rd_wb), (ins RC:$rd, GPRMemZeroOffset:$rs1, RC:$rs2),
+ opcodestr, "$rd, $rs2, $rs1">;
+
+multiclass AMO_cas_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr,
+ DAGOperand RC> {
+ def "" : AMO_cas<funct5, 0, 0, funct3, opcodestr, RC>;
+ def _AQ : AMO_cas<funct5, 1, 0, funct3, opcodestr # ".aq", RC>;
+ def _RL : AMO_cas<funct5, 0, 1, funct3, opcodestr # ".rl", RC>;
+ def _AQ_RL : AMO_cas<funct5, 1, 1, funct3, opcodestr # ".aqrl", RC>;
+}
+
let Predicates = [HasStdExtZacas] in {
-defm AMOCAS_W : AMO_rr_aq_rl<0b00101, 0b010, "amocas.w">;
-defm AMOCAS_D : AMO_rr_aq_rl<0b00101, 0b011, "amocas.d">;
+defm AMOCAS_W : AMO_cas_aq_rl<0b00101, 0b010, "amocas.w", GPR>;
} // Predicates = [HasStdExtZacas]
+let Predicates = [HasStdExtZacas, IsRV32], DecoderNamespace = "RV32Zacas" in {
+defm AMOCAS_D_RV32 : AMO_cas_aq_rl<0b00101, 0b011, "amocas.d", GPRPairRV32>;
+} // Predicates = [HasStdExtZacas, IsRV32]
+
let Predicates = [HasStdExtZacas, IsRV64] in {
-defm AMOCAS_Q : AMO_rr_aq_rl<0b00101, 0b100, "amocas.q">;
+defm AMOCAS_D_RV64 : AMO_cas_aq_rl<0b00101, 0b011, "amocas.d", GPR>;
+defm AMOCAS_Q : AMO_cas_aq_rl<0b00101, 0b100, "amocas.q", GPRPairRV64>;
} // Predicates = [HasStdExtZacas, IsRV64]
+multiclass AMOCASPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT,
+ list<Predicate> ExtraPreds = []> {
+ let Predicates = !listconcat([HasStdExtZacas, NotHasStdExtZtso], ExtraPreds) in {
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst#"_AQ") GPR:$cmp, GPR:$addr, GPR:$new)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst#"_RL") GPR:$cmp, GPR:$addr, GPR:$new)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst#"_AQ_RL") GPR:$cmp, GPR:$addr, GPR:$new)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst#"_AQ_RL") GPR:$cmp, GPR:$addr, GPR:$new)>;
+ } // Predicates = !listconcat([HasStdExtZacas, NotHasStdExtZtso], ExtraPreds)
+ let Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds) in {
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>;
+ } // Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds)
+}
+
+defm : AMOCASPat<"atomic_cmp_swap_32", "AMOCAS_W">;
+defm : AMOCASPat<"atomic_cmp_swap_64", "AMOCAS_D_RV64", i64, [IsRV64]>;
+
//===----------------------------------------------------------------------===//
// Zawrs (Wait-on-Reservation-Set)
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index a59d058382fe..5a4d8c4cfece 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -63,7 +63,10 @@ def sub_vrm1_5 : ComposedSubRegIndex<sub_vrm2_2, sub_vrm1_1>;
def sub_vrm1_6 : ComposedSubRegIndex<sub_vrm2_3, sub_vrm1_0>;
def sub_vrm1_7 : ComposedSubRegIndex<sub_vrm2_3, sub_vrm1_1>;
-def sub_32_hi : SubRegIndex<32, 32>;
+// GPR sizes change with HwMode.
+// FIXME: Support HwMode in SubRegIndex?
+def sub_gpr_even : SubRegIndex<-1>;
+def sub_gpr_odd : SubRegIndex<-1, -1>;
} // Namespace = "RISCV"
// Integer registers
@@ -118,6 +121,8 @@ def XLenVT : ValueTypeByHwMode<[RV32, RV64],
// Allow f64 in GPR for ZDINX on RV64.
def XLenFVT : ValueTypeByHwMode<[RV64],
[f64]>;
+def XLenPairFVT : ValueTypeByHwMode<[RV32],
+ [f64]>;
def XLenRI : RegInfoByHwMode<
[RV32, RV64],
[RegInfo<32,32,32>, RegInfo<64,64,64>]>;
@@ -546,33 +551,37 @@ def DUMMY_REG_PAIR_WITH_X0 : RISCVReg<0, "0">;
def GPRAll : GPRRegisterClass<(add GPR, DUMMY_REG_PAIR_WITH_X0)>;
let RegAltNameIndices = [ABIRegAltName] in {
- def X0_PD : RISCVRegWithSubRegs<0, X0.AsmName,
- [X0, DUMMY_REG_PAIR_WITH_X0],
- X0.AltNames> {
- let SubRegIndices = [sub_32, sub_32_hi];
+ def X0_Pair : RISCVRegWithSubRegs<0, X0.AsmName,
+ [X0, DUMMY_REG_PAIR_WITH_X0],
+ X0.AltNames> {
+ let SubRegIndices = [sub_gpr_even, sub_gpr_odd];
let CoveredBySubRegs = 1;
}
foreach I = 1-15 in {
defvar Index = !shl(I, 1);
+ defvar IndexP1 = !add(Index, 1);
defvar Reg = !cast<Register>("X"#Index);
- defvar RegP1 = !cast<Register>("X"#!add(Index,1));
- def X#Index#_PD : RISCVRegWithSubRegs<Index, Reg.AsmName,
- [Reg, RegP1],
- Reg.AltNames> {
- let SubRegIndices = [sub_32, sub_32_hi];
+ defvar RegP1 = !cast<Register>("X"#IndexP1);
+ def "X" # Index #"_X" # IndexP1 : RISCVRegWithSubRegs<Index,
+ Reg.AsmName,
+ [Reg, RegP1],
+ Reg.AltNames> {
+ let SubRegIndices = [sub_gpr_even, sub_gpr_odd];
let CoveredBySubRegs = 1;
}
}
}
-let RegInfos = RegInfoByHwMode<[RV64], [RegInfo<64, 64, 64>]> in
-def GPRPF64 : RegisterClass<"RISCV", [f64], 64, (add
- X10_PD, X12_PD, X14_PD, X16_PD,
- X6_PD,
- X28_PD, X30_PD,
- X8_PD,
- X18_PD, X20_PD, X22_PD, X24_PD, X26_PD,
- X0_PD, X2_PD, X4_PD
+let RegInfos = RegInfoByHwMode<[RV32, RV64],
+ [RegInfo<64, 64, 64>, RegInfo<128, 128, 128>]>,
+ DecoderMethod = "DecodeGPRPairRegisterClass" in
+def GPRPair : RegisterClass<"RISCV", [XLenPairFVT], 64, (add
+ X10_X11, X12_X13, X14_X15, X16_X17,
+ X6_X7,
+ X28_X29, X30_X31,
+ X8_X9,
+ X18_X19, X20_X21, X22_X23, X24_X25, X26_X27,
+ X0_Pair, X2_X3, X4_X5
)>;
// The register class is added for inline assembly for vector mask types.
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 320f91c76057..815eca1240d8 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -1649,7 +1649,7 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
}
}
if (Node->getValueType(0) == MVT::i128) {
- const APInt &Val = cast<ConstantSDNode>(Node)->getAPIntValue();
+ const APInt &Val = Node->getAsAPIntVal();
SystemZVectorConstantInfo VCI(Val);
if (VCI.isVectorConstantLegal(*Subtarget)) {
loadVectorConstant(VCI, Node);
diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2450c6801a66..7d387c7b9f2f 100644
--- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -340,6 +340,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
+ // Also expand 256 bit shifts if i128 is a legal type.
+ if (isTypeLegal(MVT::i128)) {
+ setOperationAction(ISD::SRL_PARTS, MVT::i128, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i128, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i128, Expand);
+ }
+
// Handle bitcast from fp128 to i128.
if (!isTypeLegal(MVT::i128))
setOperationAction(ISD::BITCAST, MVT::i128, Custom);
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 15dc44a04395..7f0140a5e8c6 100644
--- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -839,9 +839,9 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
unsigned Reg;
- if (Attrs.hasParamAttr(I, Attribute::SExt))
+ if (Call->paramHasAttr(I, Attribute::SExt))
Reg = getRegForSignedValue(V);
- else if (Attrs.hasParamAttr(I, Attribute::ZExt))
+ else if (Call->paramHasAttr(I, Attribute::ZExt))
Reg = getRegForUnsignedValue(V);
else
Reg = getRegForValue(V);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 304b998e1f26..e006dd877360 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -148,21 +148,25 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) {
case X86::AND16ri8:
case X86::AND16rm:
case X86::AND16rr:
+ case X86::AND16rr_REV:
case X86::AND32i32:
case X86::AND32ri:
case X86::AND32ri8:
case X86::AND32rm:
case X86::AND32rr:
+ case X86::AND32rr_REV:
case X86::AND64i32:
case X86::AND64ri32:
case X86::AND64ri8:
case X86::AND64rm:
case X86::AND64rr:
+ case X86::AND64rr_REV:
case X86::AND8i8:
case X86::AND8ri:
case X86::AND8ri8:
case X86::AND8rm:
case X86::AND8rr:
+ case X86::AND8rr_REV:
return FirstMacroFusionInstKind::And;
// CMP
case X86::CMP16i16:
@@ -171,24 +175,28 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) {
case X86::CMP16ri8:
case X86::CMP16rm:
case X86::CMP16rr:
+ case X86::CMP16rr_REV:
case X86::CMP32i32:
case X86::CMP32mr:
case X86::CMP32ri:
case X86::CMP32ri8:
case X86::CMP32rm:
case X86::CMP32rr:
+ case X86::CMP32rr_REV:
case X86::CMP64i32:
case X86::CMP64mr:
case X86::CMP64ri32:
case X86::CMP64ri8:
case X86::CMP64rm:
case X86::CMP64rr:
+ case X86::CMP64rr_REV:
case X86::CMP8i8:
case X86::CMP8mr:
case X86::CMP8ri:
case X86::CMP8ri8:
case X86::CMP8rm:
case X86::CMP8rr:
+ case X86::CMP8rr_REV:
return FirstMacroFusionInstKind::Cmp;
// ADD
case X86::ADD16i16:
@@ -196,42 +204,50 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) {
case X86::ADD16ri8:
case X86::ADD16rm:
case X86::ADD16rr:
+ case X86::ADD16rr_REV:
case X86::ADD32i32:
case X86::ADD32ri:
case X86::ADD32ri8:
case X86::ADD32rm:
case X86::ADD32rr:
+ case X86::ADD32rr_REV:
case X86::ADD64i32:
case X86::ADD64ri32:
case X86::ADD64ri8:
case X86::ADD64rm:
case X86::ADD64rr:
+ case X86::ADD64rr_REV:
case X86::ADD8i8:
case X86::ADD8ri:
case X86::ADD8ri8:
case X86::ADD8rm:
case X86::ADD8rr:
+ case X86::ADD8rr_REV:
// SUB
case X86::SUB16i16:
case X86::SUB16ri:
case X86::SUB16ri8:
case X86::SUB16rm:
case X86::SUB16rr:
+ case X86::SUB16rr_REV:
case X86::SUB32i32:
case X86::SUB32ri:
case X86::SUB32ri8:
case X86::SUB32rm:
case X86::SUB32rr:
+ case X86::SUB32rr_REV:
case X86::SUB64i32:
case X86::SUB64ri32:
case X86::SUB64ri8:
case X86::SUB64rm:
case X86::SUB64rr:
+ case X86::SUB64rr_REV:
case X86::SUB8i8:
case X86::SUB8ri:
case X86::SUB8ri8:
case X86::SUB8rm:
case X86::SUB8rr:
+ case X86::SUB8rr_REV:
return FirstMacroFusionInstKind::AddSub;
// INC
case X86::INC16r:
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
index aad839b83ee1..b13bf361ab79 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -173,6 +173,7 @@ static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) {
#define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC) \
LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr) \
+ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV) \
LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm) \
LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr) \
case X86::MNEMONIC##8ri: \
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5a28240ea9e2..700ab797b2f6 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2444,6 +2444,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::SRL,
ISD::OR,
ISD::AND,
+ ISD::BITREVERSE,
ISD::ADD,
ISD::FADD,
ISD::FSUB,
@@ -4821,8 +4822,8 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
APInt UndefSrcElts(NumSrcElts, 0);
SmallVector<APInt, 64> SrcEltBits;
- auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
- SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits));
+ const APInt &C = Op.getOperand(0).getConstantOperandAPInt(0);
+ SrcEltBits.push_back(C.zextOrTrunc(SrcEltSizeInBits));
SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0));
return CastBitData(UndefSrcElts, SrcEltBits);
}
@@ -17223,6 +17224,7 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
"Cannot lower 512-bit vectors w/o basic ISA!");
int NumElts = Mask.size();
+ int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; });
// Try to recognize shuffles that are just padding a subvector with zeros.
int SubvecElts = 0;
@@ -17288,17 +17290,18 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
Offset += NumElts; // Increment for next iteration.
}
- // If we're broadcasting a SETCC result, try to broadcast the ops instead.
+ // If we're performing an unary shuffle on a SETCC result, try to shuffle the
+ // ops instead.
// TODO: What other unary shuffles would benefit from this?
- if (isBroadcastShuffleMask(Mask) && V1.getOpcode() == ISD::SETCC &&
- V1->hasOneUse()) {
+ if (NumV2Elements == 0 && V1.getOpcode() == ISD::SETCC && V1->hasOneUse()) {
SDValue Op0 = V1.getOperand(0);
SDValue Op1 = V1.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(V1.getOperand(2))->get();
EVT OpVT = Op0.getValueType();
- return DAG.getSetCC(
- DL, VT, DAG.getVectorShuffle(OpVT, DL, Op0, DAG.getUNDEF(OpVT), Mask),
- DAG.getVectorShuffle(OpVT, DL, Op1, DAG.getUNDEF(OpVT), Mask), CC);
+ if (OpVT.getScalarSizeInBits() >= 32 || isBroadcastShuffleMask(Mask))
+ return DAG.getSetCC(
+ DL, VT, DAG.getVectorShuffle(OpVT, DL, Op0, DAG.getUNDEF(OpVT), Mask),
+ DAG.getVectorShuffle(OpVT, DL, Op1, DAG.getUNDEF(OpVT), Mask), CC);
}
MVT ExtVT;
@@ -22551,7 +22554,7 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
// FIXME: Do this for non-constant compares for constant on LHS?
if (CmpVT == MVT::i64 && isa<ConstantSDNode>(Op1) && !isX86CCSigned(X86CC) &&
Op0.hasOneUse() && // Hacky way to not break CSE opportunities with sub.
- cast<ConstantSDNode>(Op1)->getAPIntValue().getActiveBits() <= 32 &&
+ Op1->getAsAPIntVal().getActiveBits() <= 32 &&
DAG.MaskedValueIsZero(Op0, APInt::getHighBitsSet(64, 32))) {
CmpVT = MVT::i32;
Op0 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op0);
@@ -47029,8 +47032,8 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG,
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
- APInt ShlConst = (cast<ConstantSDNode>(N01))->getAPIntValue();
- APInt SarConst = (cast<ConstantSDNode>(N1))->getAPIntValue();
+ APInt ShlConst = N01->getAsAPIntVal();
+ APInt SarConst = N1->getAsAPIntVal();
EVT CVT = N1.getValueType();
if (SarConst.isNegative())
@@ -51835,6 +51838,33 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
return combineFneg(N, DAG, DCI, Subtarget);
}
+static SDValue combineBITREVERSE(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // Convert a (iX bitreverse(bitcast(vXi1 X))) -> (iX bitcast(shuffle(X)))
+ if (VT.isInteger() && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ if (SrcVT.isVector() && SrcVT.getScalarType() == MVT::i1 &&
+ (DCI.isBeforeLegalize() ||
+ DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)) &&
+ Subtarget.hasSSSE3()) {
+ unsigned NumElts = SrcVT.getVectorNumElements();
+ SmallVector<int, 32> ReverseMask(NumElts);
+ for (unsigned I = 0; I != NumElts; ++I)
+ ReverseMask[I] = (NumElts - 1) - I;
+ SDValue Rev =
+ DAG.getVectorShuffle(SrcVT, SDLoc(N), Src, Src, ReverseMask);
+ return DAG.getBitcast(VT, Rev);
+ }
+ }
+
+ return SDValue();
+}
+
static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -56124,6 +56154,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget);
case ISD::OR: return combineOr(N, DAG, DCI, Subtarget);
case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget);
+ case ISD::BITREVERSE: return combineBITREVERSE(N, DAG, DCI, Subtarget);
case X86ISD::BEXTR:
case X86ISD::BEXTRI: return combineBEXTR(N, DAG, DCI, Subtarget);
case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget);
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td
index 5cfa95e085e3..76b0fe5f5cad 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -1107,43 +1107,85 @@ def : Pat<(store (X86adc_flag GR64:$src, (loadi64 addr:$dst), EFLAGS),
// Patterns for basic arithmetic ops with relocImm for the immediate field.
multiclass ArithBinOp_RF_relocImm_Pats<SDNode OpNodeFlag, SDNode OpNode> {
- def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2),
- (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>;
- def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2),
- (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>;
- def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2),
- (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>;
- def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2),
- (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>;
-
- def : Pat<(store (OpNode (load addr:$dst), relocImm8_su:$src), addr:$dst),
- (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>;
- def : Pat<(store (OpNode (load addr:$dst), relocImm16_su:$src), addr:$dst),
- (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>;
- def : Pat<(store (OpNode (load addr:$dst), relocImm32_su:$src), addr:$dst),
- (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>;
- def : Pat<(store (OpNode (load addr:$dst), i64relocImmSExt32_su:$src), addr:$dst),
- (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>;
+ let Predicates = [NoNDD] in {
+ def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2),
+ (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2),
+ (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>;
+ def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2),
+ (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>;
+ def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2),
+ (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>;
+
+ def : Pat<(store (OpNode (load addr:$dst), relocImm8_su:$src), addr:$dst),
+ (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>;
+ def : Pat<(store (OpNode (load addr:$dst), relocImm16_su:$src), addr:$dst),
+ (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>;
+ def : Pat<(store (OpNode (load addr:$dst), relocImm32_su:$src), addr:$dst),
+ (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>;
+ def : Pat<(store (OpNode (load addr:$dst), i64relocImmSExt32_su:$src), addr:$dst),
+ (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>;
+ }
+ let Predicates = [HasNDD] in {
+ def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2),
+ (!cast<Instruction>(NAME#"8ri_ND") GR8:$src1, relocImm8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2),
+ (!cast<Instruction>(NAME#"16ri_ND") GR16:$src1, relocImm16_su:$src2)>;
+ def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2),
+ (!cast<Instruction>(NAME#"32ri_ND") GR32:$src1, relocImm32_su:$src2)>;
+ def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2),
+ (!cast<Instruction>(NAME#"64ri32_ND") GR64:$src1, i64relocImmSExt32_su:$src2)>;
+
+ def : Pat<(OpNode (load addr:$dst), relocImm8_su:$src),
+ (!cast<Instruction>(NAME#"8mi_ND") addr:$dst, relocImm8_su:$src)>;
+ def : Pat<(OpNode (load addr:$dst), relocImm16_su:$src),
+ (!cast<Instruction>(NAME#"16mi_ND") addr:$dst, relocImm16_su:$src)>;
+ def : Pat<(OpNode (load addr:$dst), relocImm32_su:$src),
+ (!cast<Instruction>(NAME#"32mi_ND") addr:$dst, relocImm32_su:$src)>;
+ def : Pat<(OpNode (load addr:$dst), i64relocImmSExt32_su:$src),
+ (!cast<Instruction>(NAME#"64mi32_ND") addr:$dst, i64relocImmSExt32_su:$src)>;
+ }
}
multiclass ArithBinOp_RFF_relocImm_Pats<SDNode OpNodeFlag> {
- def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2, EFLAGS),
- (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>;
- def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2, EFLAGS),
- (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>;
- def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2, EFLAGS),
- (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>;
- def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2, EFLAGS),
- (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>;
-
- def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm8_su:$src, EFLAGS), addr:$dst),
- (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>;
- def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm16_su:$src, EFLAGS), addr:$dst),
- (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>;
- def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm32_su:$src, EFLAGS), addr:$dst),
- (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>;
- def : Pat<(store (OpNodeFlag (load addr:$dst), i64relocImmSExt32_su:$src, EFLAGS), addr:$dst),
- (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>;
+ let Predicates = [NoNDD] in {
+ def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>;
+ def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>;
+ def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>;
+
+ def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm8_su:$src, EFLAGS), addr:$dst),
+ (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>;
+ def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm16_su:$src, EFLAGS), addr:$dst),
+ (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>;
+ def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm32_su:$src, EFLAGS), addr:$dst),
+ (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>;
+ def : Pat<(store (OpNodeFlag (load addr:$dst), i64relocImmSExt32_su:$src, EFLAGS), addr:$dst),
+ (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>;
+ }
+ let Predicates = [HasNDD] in {
+ def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"8ri_ND") GR8:$src1, relocImm8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"16ri_ND") GR16:$src1, relocImm16_su:$src2)>;
+ def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"32ri_ND") GR32:$src1, relocImm32_su:$src2)>;
+ def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"64ri32_ND") GR64:$src1, i64relocImmSExt32_su:$src2)>;
+
+ def : Pat<(OpNodeFlag (load addr:$dst), relocImm8_su:$src, EFLAGS),
+ (!cast<Instruction>(NAME#"8mi_ND") addr:$dst, relocImm8_su:$src)>;
+ def : Pat<(OpNodeFlag (load addr:$dst), relocImm16_su:$src, EFLAGS),
+ (!cast<Instruction>(NAME#"16mi_ND") addr:$dst, relocImm16_su:$src)>;
+ def : Pat<(OpNodeFlag (load addr:$dst), relocImm32_su:$src, EFLAGS),
+ (!cast<Instruction>(NAME#"32mi_ND") addr:$dst, relocImm32_su:$src)>;
+ def : Pat<(OpNodeFlag (load addr:$dst), i64relocImmSExt32_su:$src, EFLAGS),
+ (!cast<Instruction>(NAME#"64mi32_ND") addr:$dst, i64relocImmSExt32_su:$src)>;
+ }
}
multiclass ArithBinOp_F_relocImm_Pats<SDNode OpNodeFlag> {
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td
index c77c77ee4a3e..422391a6e02a 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1550,13 +1550,24 @@ def : Pat<(X86add_flag_nocf GR64:$src1, 0x0000000080000000),
// AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32.
let AddedComplexity = 1 in {
-def : Pat<(and GR64:$src, i64immZExt32:$imm),
- (SUBREG_TO_REG
- (i64 0),
- (AND32ri
- (EXTRACT_SUBREG GR64:$src, sub_32bit),
- (i32 (GetLo32XForm imm:$imm))),
- sub_32bit)>;
+ let Predicates = [NoNDD] in {
+ def : Pat<(and GR64:$src, i64immZExt32:$imm),
+ (SUBREG_TO_REG
+ (i64 0),
+ (AND32ri
+ (EXTRACT_SUBREG GR64:$src, sub_32bit),
+ (i32 (GetLo32XForm imm:$imm))),
+ sub_32bit)>;
+ }
+ let Predicates = [HasNDD] in {
+ def : Pat<(and GR64:$src, i64immZExt32:$imm),
+ (SUBREG_TO_REG
+ (i64 0),
+ (AND32ri_ND
+ (EXTRACT_SUBREG GR64:$src, sub_32bit),
+ (i32 (GetLo32XForm imm:$imm))),
+ sub_32bit)>;
+ }
} // AddedComplexity = 1
@@ -1762,10 +1773,18 @@ def : Pat<(X86xor_flag (i8 (trunc GR32:$src)),
// where the least significant bit is not 0. However, the probability of this
// happening is considered low enough that this is officially not a
// "real problem".
-def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
-def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
-def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
+let Predicates = [NoNDD] in {
+ def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
+ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
+ def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
+ def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
+}
+let Predicates = [HasNDD] in {
+ def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr_ND GR8 :$src1, GR8 :$src1)>;
+ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr_ND GR16:$src1, GR16:$src1)>;
+ def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr_ND GR32:$src1, GR32:$src1)>;
+ def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr_ND GR64:$src1, GR64:$src1)>;
+}
// Shift amount is implicitly masked.
multiclass MaskedShiftAmountPats<SDNode frag, string name> {
@@ -1937,75 +1956,179 @@ defm : one_bit_patterns<GR64, i64, BTR64rr, BTS64rr, BTC64rr, shiftMask64>;
// EFLAGS-defining Patterns
//===----------------------------------------------------------------------===//
-// add reg, reg
-def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(add GR64:$src1, GR64:$src2), (ADD64rr GR64:$src1, GR64:$src2)>;
-
-// add reg, mem
-def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
- (ADD8rm GR8:$src1, addr:$src2)>;
-def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
- (ADD16rm GR16:$src1, addr:$src2)>;
-def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
- (ADD32rm GR32:$src1, addr:$src2)>;
-def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
- (ADD64rm GR64:$src1, addr:$src2)>;
-
-// add reg, imm
-def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;
-def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
-def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(add GR64:$src1, i64immSExt32:$src2), (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// sub reg, reg
-def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(sub GR64:$src1, GR64:$src2), (SUB64rr GR64:$src1, GR64:$src2)>;
-
-// sub reg, mem
-def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
- (SUB8rm GR8:$src1, addr:$src2)>;
-def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
- (SUB16rm GR16:$src1, addr:$src2)>;
-def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
- (SUB32rm GR32:$src1, addr:$src2)>;
-def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
- (SUB64rm GR64:$src1, addr:$src2)>;
-
-// sub reg, imm
-def : Pat<(sub GR8:$src1, imm:$src2),
- (SUB8ri GR8:$src1, imm:$src2)>;
-def : Pat<(sub GR16:$src1, imm:$src2),
- (SUB16ri GR16:$src1, imm:$src2)>;
-def : Pat<(sub GR32:$src1, imm:$src2),
- (SUB32ri GR32:$src1, imm:$src2)>;
-def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
- (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// sub 0, reg
-def : Pat<(X86sub_flag 0, GR8 :$src), (NEG8r GR8 :$src)>;
-def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>;
-def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>;
-def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>;
-
-// mul reg, reg
-def : Pat<(mul GR16:$src1, GR16:$src2),
- (IMUL16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(mul GR32:$src1, GR32:$src2),
- (IMUL32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(mul GR64:$src1, GR64:$src2),
- (IMUL64rr GR64:$src1, GR64:$src2)>;
-
-// mul reg, mem
-def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
- (IMUL16rm GR16:$src1, addr:$src2)>;
-def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
- (IMUL32rm GR32:$src1, addr:$src2)>;
-def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
- (IMUL64rm GR64:$src1, addr:$src2)>;
+multiclass EFLAGSDefiningPats<string suffix, Predicate p> {
+ let Predicates = [p] in {
+ // add reg, reg
+ def : Pat<(add GR8 :$src1, GR8 :$src2), (!cast<Instruction>(ADD8rr#suffix) GR8 :$src1, GR8 :$src2)>;
+ def : Pat<(add GR16:$src1, GR16:$src2), (!cast<Instruction>(ADD16rr#suffix) GR16:$src1, GR16:$src2)>;
+ def : Pat<(add GR32:$src1, GR32:$src2), (!cast<Instruction>(ADD32rr#suffix) GR32:$src1, GR32:$src2)>;
+ def : Pat<(add GR64:$src1, GR64:$src2), (!cast<Instruction>(ADD64rr#suffix) GR64:$src1, GR64:$src2)>;
+
+ // add reg, mem
+ def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
+ (!cast<Instruction>(ADD8rm#suffix) GR8:$src1, addr:$src2)>;
+ def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
+ (!cast<Instruction>(ADD16rm#suffix) GR16:$src1, addr:$src2)>;
+ def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
+ (!cast<Instruction>(ADD32rm#suffix) GR32:$src1, addr:$src2)>;
+ def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
+ (!cast<Instruction>(ADD64rm#suffix) GR64:$src1, addr:$src2)>;
+
+ // add reg, imm
+ def : Pat<(add GR8 :$src1, imm:$src2), (!cast<Instruction>(ADD8ri#suffix) GR8:$src1 , imm:$src2)>;
+ def : Pat<(add GR16:$src1, imm:$src2), (!cast<Instruction>(ADD16ri#suffix) GR16:$src1, imm:$src2)>;
+ def : Pat<(add GR32:$src1, imm:$src2), (!cast<Instruction>(ADD32ri#suffix) GR32:$src1, imm:$src2)>;
+ def : Pat<(add GR64:$src1, i64immSExt32:$src2), (!cast<Instruction>(ADD64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>;
+
+ // sub reg, reg
+ def : Pat<(sub GR8 :$src1, GR8 :$src2), (!cast<Instruction>(SUB8rr#suffix) GR8 :$src1, GR8 :$src2)>;
+ def : Pat<(sub GR16:$src1, GR16:$src2), (!cast<Instruction>(SUB16rr#suffix) GR16:$src1, GR16:$src2)>;
+ def : Pat<(sub GR32:$src1, GR32:$src2), (!cast<Instruction>(SUB32rr#suffix) GR32:$src1, GR32:$src2)>;
+ def : Pat<(sub GR64:$src1, GR64:$src2), (!cast<Instruction>(SUB64rr#suffix) GR64:$src1, GR64:$src2)>;
+
+ // sub reg, mem
+ def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
+ (!cast<Instruction>(SUB8rm#suffix) GR8:$src1, addr:$src2)>;
+ def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
+ (!cast<Instruction>(SUB16rm#suffix) GR16:$src1, addr:$src2)>;
+ def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
+ (!cast<Instruction>(SUB32rm#suffix) GR32:$src1, addr:$src2)>;
+ def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
+ (!cast<Instruction>(SUB64rm#suffix) GR64:$src1, addr:$src2)>;
+
+ // sub reg, imm
+ def : Pat<(sub GR8:$src1, imm:$src2),
+ (!cast<Instruction>(SUB8ri#suffix) GR8:$src1, imm:$src2)>;
+ def : Pat<(sub GR16:$src1, imm:$src2),
+ (!cast<Instruction>(SUB16ri#suffix) GR16:$src1, imm:$src2)>;
+ def : Pat<(sub GR32:$src1, imm:$src2),
+ (!cast<Instruction>(SUB32ri#suffix) GR32:$src1, imm:$src2)>;
+ def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
+ (!cast<Instruction>(SUB64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>;
+
+ // sub 0, reg
+ def : Pat<(X86sub_flag 0, GR8 :$src), (!cast<Instruction>(NEG8r#suffix) GR8 :$src)>;
+ def : Pat<(X86sub_flag 0, GR16:$src), (!cast<Instruction>(NEG16r#suffix) GR16:$src)>;
+ def : Pat<(X86sub_flag 0, GR32:$src), (!cast<Instruction>(NEG32r#suffix) GR32:$src)>;
+ def : Pat<(X86sub_flag 0, GR64:$src), (!cast<Instruction>(NEG64r#suffix) GR64:$src)>;
+
+ // mul reg, reg
+ def : Pat<(mul GR16:$src1, GR16:$src2),
+ (!cast<Instruction>(IMUL16rr#suffix) GR16:$src1, GR16:$src2)>;
+ def : Pat<(mul GR32:$src1, GR32:$src2),
+ (!cast<Instruction>(IMUL32rr#suffix) GR32:$src1, GR32:$src2)>;
+ def : Pat<(mul GR64:$src1, GR64:$src2),
+ (!cast<Instruction>(IMUL64rr#suffix) GR64:$src1, GR64:$src2)>;
+
+ // mul reg, mem
+ def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
+ (!cast<Instruction>(IMUL16rm#suffix) GR16:$src1, addr:$src2)>;
+ def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
+ (!cast<Instruction>(IMUL32rm#suffix) GR32:$src1, addr:$src2)>;
+ def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
+ (!cast<Instruction>(IMUL64rm#suffix) GR64:$src1, addr:$src2)>;
+
+ // or reg/reg.
+ def : Pat<(or GR8 :$src1, GR8 :$src2), (!cast<Instruction>(OR8rr#suffix) GR8 :$src1, GR8 :$src2)>;
+ def : Pat<(or GR16:$src1, GR16:$src2), (!cast<Instruction>(OR16rr#suffix) GR16:$src1, GR16:$src2)>;
+ def : Pat<(or GR32:$src1, GR32:$src2), (!cast<Instruction>(OR32rr#suffix) GR32:$src1, GR32:$src2)>;
+ def : Pat<(or GR64:$src1, GR64:$src2), (!cast<Instruction>(OR64rr#suffix) GR64:$src1, GR64:$src2)>;
+
+ // or reg/mem
+ def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
+ (!cast<Instruction>(OR8rm#suffix) GR8:$src1, addr:$src2)>;
+ def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
+ (!cast<Instruction>(OR16rm#suffix) GR16:$src1, addr:$src2)>;
+ def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
+ (!cast<Instruction>(OR32rm#suffix) GR32:$src1, addr:$src2)>;
+ def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
+ (!cast<Instruction>(OR64rm#suffix) GR64:$src1, addr:$src2)>;
+
+ // or reg/imm
+ def : Pat<(or GR8:$src1 , imm:$src2), (!cast<Instruction>(OR8ri#suffix) GR8 :$src1, imm:$src2)>;
+ def : Pat<(or GR16:$src1, imm:$src2), (!cast<Instruction>(OR16ri#suffix) GR16:$src1, imm:$src2)>;
+ def : Pat<(or GR32:$src1, imm:$src2), (!cast<Instruction>(OR32ri#suffix) GR32:$src1, imm:$src2)>;
+ def : Pat<(or GR64:$src1, i64immSExt32:$src2),
+ (!cast<Instruction>(OR64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>;
+
+ // xor reg/reg
+ def : Pat<(xor GR8 :$src1, GR8 :$src2), (!cast<Instruction>(XOR8rr#suffix) GR8 :$src1, GR8 :$src2)>;
+ def : Pat<(xor GR16:$src1, GR16:$src2), (!cast<Instruction>(XOR16rr#suffix) GR16:$src1, GR16:$src2)>;
+ def : Pat<(xor GR32:$src1, GR32:$src2), (!cast<Instruction>(XOR32rr#suffix) GR32:$src1, GR32:$src2)>;
+ def : Pat<(xor GR64:$src1, GR64:$src2), (!cast<Instruction>(XOR64rr#suffix) GR64:$src1, GR64:$src2)>;
+
+ // xor reg/mem
+ def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
+ (!cast<Instruction>(XOR8rm#suffix) GR8:$src1, addr:$src2)>;
+ def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
+ (!cast<Instruction>(XOR16rm#suffix) GR16:$src1, addr:$src2)>;
+ def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
+ (!cast<Instruction>(XOR32rm#suffix) GR32:$src1, addr:$src2)>;
+ def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
+ (!cast<Instruction>(XOR64rm#suffix) GR64:$src1, addr:$src2)>;
+
+ // xor reg/imm
+ def : Pat<(xor GR8:$src1, imm:$src2),
+ (!cast<Instruction>(XOR8ri#suffix) GR8:$src1, imm:$src2)>;
+ def : Pat<(xor GR16:$src1, imm:$src2),
+ (!cast<Instruction>(XOR16ri#suffix) GR16:$src1, imm:$src2)>;
+ def : Pat<(xor GR32:$src1, imm:$src2),
+ (!cast<Instruction>(XOR32ri#suffix) GR32:$src1, imm:$src2)>;
+ def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
+ (!cast<Instruction>(XOR64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>;
+
+ // and reg/reg
+ def : Pat<(and GR8 :$src1, GR8 :$src2), (!cast<Instruction>(AND8rr#suffix) GR8 :$src1, GR8 :$src2)>;
+ def : Pat<(and GR16:$src1, GR16:$src2), (!cast<Instruction>(AND16rr#suffix) GR16:$src1, GR16:$src2)>;
+ def : Pat<(and GR32:$src1, GR32:$src2), (!cast<Instruction>(AND32rr#suffix) GR32:$src1, GR32:$src2)>;
+ def : Pat<(and GR64:$src1, GR64:$src2), (!cast<Instruction>(AND64rr#suffix) GR64:$src1, GR64:$src2)>;
+
+ // and reg/mem
+ def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
+ (!cast<Instruction>(AND8rm#suffix) GR8:$src1, addr:$src2)>;
+ def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
+ (!cast<Instruction>(AND16rm#suffix) GR16:$src1, addr:$src2)>;
+ def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
+ (!cast<Instruction>(AND32rm#suffix) GR32:$src1, addr:$src2)>;
+ def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
+ (!cast<Instruction>(AND64rm#suffix) GR64:$src1, addr:$src2)>;
+
+ // and reg/imm
+ def : Pat<(and GR8:$src1, imm:$src2),
+ (!cast<Instruction>(AND8ri#suffix) GR8:$src1, imm:$src2)>;
+ def : Pat<(and GR16:$src1, imm:$src2),
+ (!cast<Instruction>(AND16ri#suffix) GR16:$src1, imm:$src2)>;
+ def : Pat<(and GR32:$src1, imm:$src2),
+ (!cast<Instruction>(AND32ri#suffix) GR32:$src1, imm:$src2)>;
+ def : Pat<(and GR64:$src1, i64immSExt32:$src2),
+ (!cast<Instruction>(AND64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>;
+ }
+
+ // Increment/Decrement reg.
+ // Do not make INC/DEC if it is slow
+ let Predicates = [UseIncDec, p] in {
+ def : Pat<(add GR8:$src, 1), (!cast<Instruction>(INC8r#suffix) GR8:$src)>;
+ def : Pat<(add GR16:$src, 1), (!cast<Instruction>(INC16r#suffix) GR16:$src)>;
+ def : Pat<(add GR32:$src, 1), (!cast<Instruction>(INC32r#suffix) GR32:$src)>;
+ def : Pat<(add GR64:$src, 1), (!cast<Instruction>(INC64r#suffix) GR64:$src)>;
+ def : Pat<(add GR8:$src, -1), (!cast<Instruction>(DEC8r#suffix) GR8:$src)>;
+ def : Pat<(add GR16:$src, -1), (!cast<Instruction>(DEC16r#suffix) GR16:$src)>;
+ def : Pat<(add GR32:$src, -1), (!cast<Instruction>(DEC32r#suffix) GR32:$src)>;
+ def : Pat<(add GR64:$src, -1), (!cast<Instruction>(DEC64r#suffix) GR64:$src)>;
+
+ def : Pat<(X86add_flag_nocf GR8:$src, -1), (!cast<Instruction>(DEC8r#suffix) GR8:$src)>;
+ def : Pat<(X86add_flag_nocf GR16:$src, -1), (!cast<Instruction>(DEC16r#suffix) GR16:$src)>;
+ def : Pat<(X86add_flag_nocf GR32:$src, -1), (!cast<Instruction>(DEC32r#suffix) GR32:$src)>;
+ def : Pat<(X86add_flag_nocf GR64:$src, -1), (!cast<Instruction>(DEC64r#suffix) GR64:$src)>;
+ def : Pat<(X86sub_flag_nocf GR8:$src, -1), (!cast<Instruction>(INC8r#suffix) GR8:$src)>;
+ def : Pat<(X86sub_flag_nocf GR16:$src, -1), (!cast<Instruction>(INC16r#suffix) GR16:$src)>;
+ def : Pat<(X86sub_flag_nocf GR32:$src, -1), (!cast<Instruction>(INC32r#suffix) GR32:$src)>;
+ def : Pat<(X86sub_flag_nocf GR64:$src, -1), (!cast<Instruction>(INC64r#suffix) GR64:$src)>;
+ }
+}
+
+defm : EFLAGSDefiningPats<"", NoNDD>;
+defm : EFLAGSDefiningPats<"_ND", HasNDD>;
// mul reg, imm
def : Pat<(mul GR16:$src1, imm:$src2),
@@ -2023,103 +2146,6 @@ def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
(IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
-// Increment/Decrement reg.
-// Do not make INC/DEC if it is slow
-let Predicates = [UseIncDec] in {
- def : Pat<(add GR8:$src, 1), (INC8r GR8:$src)>;
- def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>;
- def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>;
- def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
- def : Pat<(add GR8:$src, -1), (DEC8r GR8:$src)>;
- def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>;
- def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>;
- def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
-
- def : Pat<(X86add_flag_nocf GR8:$src, -1), (DEC8r GR8:$src)>;
- def : Pat<(X86add_flag_nocf GR16:$src, -1), (DEC16r GR16:$src)>;
- def : Pat<(X86add_flag_nocf GR32:$src, -1), (DEC32r GR32:$src)>;
- def : Pat<(X86add_flag_nocf GR64:$src, -1), (DEC64r GR64:$src)>;
- def : Pat<(X86sub_flag_nocf GR8:$src, -1), (INC8r GR8:$src)>;
- def : Pat<(X86sub_flag_nocf GR16:$src, -1), (INC16r GR16:$src)>;
- def : Pat<(X86sub_flag_nocf GR32:$src, -1), (INC32r GR32:$src)>;
- def : Pat<(X86sub_flag_nocf GR64:$src, -1), (INC64r GR64:$src)>;
-}
-
-// or reg/reg.
-def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>;
-
-// or reg/mem
-def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
- (OR8rm GR8:$src1, addr:$src2)>;
-def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
- (OR16rm GR16:$src1, addr:$src2)>;
-def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
- (OR32rm GR32:$src1, addr:$src2)>;
-def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
- (OR64rm GR64:$src1, addr:$src2)>;
-
-// or reg/imm
-def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>;
-def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(or GR64:$src1, i64immSExt32:$src2),
- (OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// xor reg/reg
-def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>;
-
-// xor reg/mem
-def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
- (XOR8rm GR8:$src1, addr:$src2)>;
-def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
- (XOR16rm GR16:$src1, addr:$src2)>;
-def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
- (XOR32rm GR32:$src1, addr:$src2)>;
-def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
- (XOR64rm GR64:$src1, addr:$src2)>;
-
-// xor reg/imm
-def : Pat<(xor GR8:$src1, imm:$src2),
- (XOR8ri GR8:$src1, imm:$src2)>;
-def : Pat<(xor GR16:$src1, imm:$src2),
- (XOR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(xor GR32:$src1, imm:$src2),
- (XOR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
- (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// and reg/reg
-def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>;
-
-// and reg/mem
-def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
- (AND8rm GR8:$src1, addr:$src2)>;
-def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
- (AND16rm GR16:$src1, addr:$src2)>;
-def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
- (AND32rm GR32:$src1, addr:$src2)>;
-def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
- (AND64rm GR64:$src1, addr:$src2)>;
-
-// and reg/imm
-def : Pat<(and GR8:$src1, imm:$src2),
- (AND8ri GR8:$src1, imm:$src2)>;
-def : Pat<(and GR16:$src1, imm:$src2),
- (AND16ri GR16:$src1, imm:$src2)>;
-def : Pat<(and GR32:$src1, imm:$src2),
- (AND32ri GR32:$src1, imm:$src2)>;
-def : Pat<(and GR64:$src1, i64immSExt32:$src2),
- (AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
// Bit scan instruction patterns to match explicit zero-undef behavior.
def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>;
def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td
index 97c625a64cfc..753cf62392a1 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1523,28 +1523,28 @@ def MOVDIR64B64_EVEX : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$
// ENQCMD/S - Enqueue 64-byte command as user with 64-byte write atomicity
//
let SchedRW = [WriteStore], Defs = [EFLAGS] in {
- def ENQCMD16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src),
+ def ENQCMD16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem_GR16:$src),
"enqcmd\t{$src, $dst|$dst, $src}",
[(set EFLAGS, (X86enqcmd GR16:$dst, addr:$src))]>,
T8, XD, AdSize16, Requires<[HasENQCMD, Not64BitMode]>;
- def ENQCMD32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src),
+ def ENQCMD32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem_GR32:$src),
"enqcmd\t{$src, $dst|$dst, $src}",
[(set EFLAGS, (X86enqcmd GR32:$dst, addr:$src))]>,
T8, XD, AdSize32, Requires<[HasENQCMD]>;
- def ENQCMD64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
+ def ENQCMD64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$src),
"enqcmd\t{$src, $dst|$dst, $src}",
[(set EFLAGS, (X86enqcmd GR64:$dst, addr:$src))]>,
T8, XD, AdSize64, Requires<[HasENQCMD, In64BitMode]>;
- def ENQCMDS16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src),
+ def ENQCMDS16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem_GR16:$src),
"enqcmds\t{$src, $dst|$dst, $src}",
[(set EFLAGS, (X86enqcmds GR16:$dst, addr:$src))]>,
T8, XS, AdSize16, Requires<[HasENQCMD, Not64BitMode]>;
- def ENQCMDS32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src),
+ def ENQCMDS32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem_GR32:$src),
"enqcmds\t{$src, $dst|$dst, $src}",
[(set EFLAGS, (X86enqcmds GR32:$dst, addr:$src))]>,
T8, XS, AdSize32, Requires<[HasENQCMD]>;
- def ENQCMDS64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
+ def ENQCMDS64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$src),
"enqcmds\t{$src, $dst|$dst, $src}",
[(set EFLAGS, (X86enqcmds GR64:$dst, addr:$src))]>,
T8, XS, AdSize64, Requires<[HasENQCMD, In64BitMode]>;
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td b/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td
index 49ef6efc6aec..48d689549709 100644
--- a/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td
@@ -18,6 +18,10 @@ def DefaultPfmCounters : ProcPfmCounters {}
def : PfmCountersDefaultBinding<DefaultPfmCounters>;
// Intel X86 Counters.
+defvar DefaultIntelPfmValidationCounters = [
+ PfmValidationCounter<InstructionRetired, "INSTRUCTIONS_RETIRED">
+];
+
def PentiumPfmCounters : ProcPfmCounters {
let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
let UopsCounter = PfmCounter<"uops_retired">;
@@ -100,6 +104,7 @@ def SandyBridgePfmCounters : ProcPfmCounters {
PfmIssueCounter<"SBPort4", "uops_dispatched_port:port_4">,
PfmIssueCounter<"SBPort5", "uops_dispatched_port:port_5">
];
+ let ValidationCounters = DefaultIntelPfmValidationCounters;
}
def : PfmCountersBinding<"sandybridge", SandyBridgePfmCounters>;
def : PfmCountersBinding<"ivybridge", SandyBridgePfmCounters>;
@@ -117,6 +122,7 @@ def HaswellPfmCounters : ProcPfmCounters {
PfmIssueCounter<"HWPort6", "uops_executed_port:port_6">,
PfmIssueCounter<"HWPort7", "uops_executed_port:port_7">
];
+ let ValidationCounters = DefaultIntelPfmValidationCounters;
}
def : PfmCountersBinding<"haswell", HaswellPfmCounters>;
@@ -133,6 +139,7 @@ def BroadwellPfmCounters : ProcPfmCounters {
PfmIssueCounter<"BWPort6", "uops_executed_port:port_6">,
PfmIssueCounter<"BWPort7", "uops_executed_port:port_7">
];
+ let ValidationCounters = DefaultIntelPfmValidationCounters;
}
def : PfmCountersBinding<"broadwell", BroadwellPfmCounters>;
@@ -149,6 +156,7 @@ def SkylakeClientPfmCounters : ProcPfmCounters {
PfmIssueCounter<"SKLPort6", "uops_dispatched_port:port_6">,
PfmIssueCounter<"SKLPort7", "uops_dispatched_port:port_7">
];
+ let ValidationCounters = DefaultIntelPfmValidationCounters;
}
def : PfmCountersBinding<"skylake", SkylakeClientPfmCounters>;
@@ -165,6 +173,7 @@ def SkylakeServerPfmCounters : ProcPfmCounters {
PfmIssueCounter<"SKXPort6", "uops_dispatched_port:port_6">,
PfmIssueCounter<"SKXPort7", "uops_dispatched_port:port_7">
];
+ let ValidationCounters = DefaultIntelPfmValidationCounters;
}
def : PfmCountersBinding<"skylake-avx512", SkylakeServerPfmCounters>;
def : PfmCountersBinding<"cascadelake", SkylakeServerPfmCounters>;
@@ -182,6 +191,7 @@ def IceLakePfmCounters : ProcPfmCounters {
PfmIssueCounter<"ICXPort6", "uops_dispatched_port:port_6">,
PfmIssueCounter<"ICXPort78", "uops_dispatched_port:port_7_8">
];
+ let ValidationCounters = DefaultIntelPfmValidationCounters;
}
def : PfmCountersBinding<"icelake-client", IceLakePfmCounters>;
def : PfmCountersBinding<"icelake-server", IceLakePfmCounters>;
@@ -189,6 +199,10 @@ def : PfmCountersBinding<"rocketlake", IceLakePfmCounters>;
def : PfmCountersBinding<"tigerlake", IceLakePfmCounters>;
// AMD X86 Counters.
+defvar DefaultAMDPfmValidationCounters = [
+ PfmValidationCounter<InstructionRetired, "RETIRED_INSTRUCTIONS">
+];
+
// Set basic counters for AMD cpus that we know libpfm4 supports.
def DefaultAMDPfmCounters : ProcPfmCounters {
let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
@@ -265,6 +279,7 @@ def ZnVer1PfmCounters : ProcPfmCounters {
PfmIssueCounter<"ZnAGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">,
PfmIssueCounter<"ZnDivider", "div_op_count">
];
+ let ValidationCounters = DefaultAMDPfmValidationCounters;
}
def : PfmCountersBinding<"znver1", ZnVer1PfmCounters>;
@@ -275,6 +290,7 @@ def ZnVer2PfmCounters : ProcPfmCounters {
PfmIssueCounter<"Zn2AGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">,
PfmIssueCounter<"Zn2Divider", "div_op_count">
];
+ let ValidationCounters = DefaultAMDPfmValidationCounters;
}
def : PfmCountersBinding<"znver2", ZnVer2PfmCounters>;
@@ -288,6 +304,7 @@ def ZnVer3PfmCounters : ProcPfmCounters {
PfmIssueCounter<"Zn3Store", "ls_dispatch:store_dispatch">,
PfmIssueCounter<"Zn3Divider", "div_op_count">
];
+ let ValidationCounters = DefaultAMDPfmValidationCounters;
}
def : PfmCountersBinding<"znver3", ZnVer3PfmCounters>;
@@ -302,5 +319,6 @@ def ZnVer4PfmCounters : ProcPfmCounters {
PfmIssueCounter<"Zn4Divider", "div_op_count">,
PfmIssueCounter<"Zn4AGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">
];
+ let ValidationCounters = DefaultAMDPfmValidationCounters;
}
def : PfmCountersBinding<"znver4", ZnVer4PfmCounters>;