aboutsummaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-01-04 22:11:11 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-01-04 22:11:11 +0000
commitc82ad72f63369bc462e59458f09960d66daa58a9 (patch)
tree58bc455a8d052220f9ae11e65d6f06d671a7a4c4 /lib/Target
parentb915e9e0fc85ba6f398b3fab0db6a81a8913af94 (diff)
downloadsrc-c82ad72f63369bc462e59458f09960d66daa58a9.tar.gz
src-c82ad72f63369bc462e59458f09960d66daa58a9.zip
Notes
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/AArch64/AArch64.td6
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp56
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp3
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp1
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h1
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.td4
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp9
-rw-r--r--lib/Target/AMDGPU/SIInsertWaits.cpp5
-rw-r--r--lib/Target/AMDGPU/SOPInstructions.td5
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp3
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h3
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp38
-rw-r--r--lib/Target/Hexagon/BitTracker.cpp70
-rw-r--r--lib/Target/Hexagon/BitTracker.h53
-rw-r--r--lib/Target/Hexagon/HexagonBitTracker.cpp52
-rw-r--r--lib/Target/Hexagon/HexagonBitTracker.h22
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp181
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h19
-rw-r--r--lib/Target/Hexagon/HexagonMachineFunctionInfo.h27
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.cpp39
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp45
-rw-r--r--lib/Target/Hexagon/RDFCopy.h19
-rw-r--r--lib/Target/Hexagon/RDFGraph.cpp60
-rw-r--r--lib/Target/Hexagon/RDFGraph.h99
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp6
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h2
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp79
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp2
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp22
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp232
-rw-r--r--lib/Target/X86/X86InstrAVX512.td43
-rw-r--r--lib/Target/X86/X86InstrSSE.td53
-rwxr-xr-xlib/Target/X86/X86InstrTablesInfo.h90
-rw-r--r--lib/Target/X86/X86IntrinsicsInfo.h26
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp50
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp220
36 files changed, 749 insertions, 896 deletions
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index c40391d5ad9d..740766b151bb 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -264,9 +264,13 @@ def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
"Qualcomm Falkor processors", [
FeatureCRC,
FeatureCrypto,
+ FeatureCustomCheapAsMoveHandling,
FeatureFPARMv8,
FeatureNEON,
- FeaturePerfMon
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureZCZeroing
]>;
def ProcVulcan : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan",
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index b2d96a32fd3a..efc221893782 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -76,7 +76,6 @@ public:
void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
- void EmitXRayTable();
void EmitSled(const MachineInstr &MI, SledKind Kind);
/// \brief tblgen'erated driver function for lowering simple MI->MC
@@ -95,7 +94,7 @@ public:
AArch64FI = F.getInfo<AArch64FunctionInfo>();
STI = static_cast<const AArch64Subtarget*>(&F.getSubtarget());
bool Result = AsmPrinter::runOnMachineFunction(F);
- EmitXRayTable();
+ emitXRayTable();
return Result;
}
@@ -150,59 +149,6 @@ void AArch64AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI)
EmitSled(MI, SledKind::TAIL_CALL);
}
-void AArch64AsmPrinter::EmitXRayTable()
-{
- //TODO: merge the logic for ELF XRay sleds at a higher level, so to avoid
- // code duplication as it is now for x86_64, ARM32 and AArch64.
- if (Sleds.empty())
- return;
-
- auto PrevSection = OutStreamer->getCurrentSectionOnly();
- auto Fn = MF->getFunction();
- MCSection *Section;
-
- if (STI->isTargetELF()) {
- if (Fn->hasComdat())
- Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
- Fn->getComdat()->getName());
- else
- Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC);
- } else if (STI->isTargetMachO()) {
- Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
- SectionKind::getReadOnlyWithRel());
- } else {
- llvm_unreachable("Unsupported target");
- }
-
- // Before we switch over, we force a reference to a label inside the
- // xray_instr_map section. Since EmitXRayTable() is always called just
- // before the function's end, we assume that this is happening after the
- // last return instruction.
- //
- // We then align the reference to 16 byte boundaries, which we determined
- // experimentally to be beneficial to avoid causing decoder stalls.
- MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
- OutStreamer->EmitCodeAlignment(16);
- OutStreamer->EmitSymbolValue(Tmp, 8, false);
- OutStreamer->SwitchSection(Section);
- OutStreamer->EmitLabel(Tmp);
- for (const auto &Sled : Sleds) {
- OutStreamer->EmitSymbolValue(Sled.Sled, 8);
- OutStreamer->EmitSymbolValue(CurrentFnSym, 8);
- auto Kind = static_cast<uint8_t>(Sled.Kind);
- OutStreamer->EmitBytes(
- StringRef(reinterpret_cast<const char *>(&Kind), 1));
- OutStreamer->EmitBytes(
- StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
- OutStreamer->EmitZeros(14);
- }
- OutStreamer->SwitchSection(PrevSection);
-
- Sleds.clear();
-}
-
void AArch64AsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
{
static const int8_t NoopsInSledCount = 7;
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index dcb05601e5f4..8a76c42b5898 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1470,6 +1470,9 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
bool IsUnscaled = TII->isUnscaledLdSt(MI);
int Offset = getLdStOffsetOp(MI).getImm();
int OffsetStride = IsUnscaled ? getMemScale(MI) : 1;
+ // Allow one more for offset.
+ if (Offset > 0)
+ Offset -= OffsetStride;
if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
return false;
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index a87204d46eae..0b0a0e7d083e 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3048,6 +3048,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(KILL)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
NODE_NAME_CASE(SENDMSG)
+ NODE_NAME_CASE(SENDMSGHALT)
NODE_NAME_CASE(INTERP_MOV)
NODE_NAME_CASE(INTERP_P1)
NODE_NAME_CASE(INTERP_P2)
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 5cc5efb331e3..745c9923de2e 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -313,6 +313,7 @@ enum NodeType : unsigned {
/// Pointer to the start of the shader's constant data.
CONST_DATA_PTR,
SENDMSG,
+ SENDMSGHALT,
INTERP_MOV,
INTERP_P1,
INTERP_P2,
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index e7b40016e272..f079c8d0c70c 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -266,6 +266,10 @@ def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG",
SDTypeProfile<0, 1, [SDTCisInt<0>]>,
[SDNPHasChain, SDNPInGlue]>;
+def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT",
+ SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+ [SDNPHasChain, SDNPInGlue]>;
+
def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV",
SDTypeProfile<1, 3, [SDTCisFP<0>]>,
[SDNPInGlue]>;
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index fa53831cbe16..c78e97dfd46f 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2706,12 +2706,19 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntrinsicID) {
- case AMDGPUIntrinsic::SI_sendmsg: {
+ case AMDGPUIntrinsic::SI_sendmsg:
+ case Intrinsic::amdgcn_s_sendmsg: {
Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
SDValue Glue = Chain.getValue(1);
return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain,
Op.getOperand(2), Glue);
}
+ case Intrinsic::amdgcn_s_sendmsghalt: {
+ Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
+ SDValue Glue = Chain.getValue(1);
+ return DAG.getNode(AMDGPUISD::SENDMSGHALT, DL, MVT::Other, Chain,
+ Op.getOperand(2), Glue);
+ }
case AMDGPUIntrinsic::SI_tbuffer_store: {
SDValue Ops[] = {
Chain,
diff --git a/lib/Target/AMDGPU/SIInsertWaits.cpp b/lib/Target/AMDGPU/SIInsertWaits.cpp
index 202a1e9ed8ac..fceabd7a8fdd 100644
--- a/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -504,7 +504,7 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
return;
// There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
- if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
+ if (LastInstWritesM0 && (I->getOpcode() == AMDGPU::S_SENDMSG || I->getOpcode() == AMDGPU::S_SENDMSGHALT)) {
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
LastInstWritesM0 = false;
return;
@@ -619,7 +619,8 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
// signalling other hardware blocks
if ((I->getOpcode() == AMDGPU::S_BARRIER &&
ST->needWaitcntBeforeBarrier()) ||
- I->getOpcode() == AMDGPU::S_SENDMSG)
+ I->getOpcode() == AMDGPU::S_SENDMSG ||
+ I->getOpcode() == AMDGPU::S_SENDMSGHALT)
Required = LastIssued;
else
Required = handleOperands(*I);
diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td
index 0aeb1297d3a7..73cd5774128e 100644
--- a/lib/Target/AMDGPU/SOPInstructions.td
+++ b/lib/Target/AMDGPU/SOPInstructions.td
@@ -828,9 +828,12 @@ let Uses = [EXEC, M0] in {
def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
[(AMDGPUsendmsg (i32 imm:$simm16))]
>;
+
+def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16",
+ [(AMDGPUsendmsghalt (i32 imm:$simm16))]
+>;
} // End Uses = [EXEC, M0]
-def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16">;
def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;
def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
let simm16 = 0;
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index f20768ab77a5..8ec9cb02813c 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -164,9 +164,6 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Emit the rest of the function body.
EmitFunctionBody();
- // Emit the XRay table for this function.
- EmitXRayTable();
-
// If we need V4T thumb mode Register Indirect Jump pads, emit them.
// These are created per function, rather than per TU, since it's
// relatively easy to exceed the thumb branch range within a TU.
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index ce0b04d56d9e..93fed10eb2d0 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -113,9 +113,6 @@ public:
void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
- // Helper function that emits the XRay sleds we've collected for a particular
- // function.
- void EmitXRayTable();
private:
void EmitSled(const MachineInstr &MI, SledKind Kind);
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 293a527b09e8..07044b9697b6 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -22,9 +22,6 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbolELF.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCStreamer.h"
using namespace llvm;
@@ -226,38 +223,3 @@ void ARMAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI)
{
EmitSled(MI, SledKind::TAIL_CALL);
}
-
-void ARMAsmPrinter::EmitXRayTable()
-{
- if (Sleds.empty())
- return;
-
- MCSection *Section = nullptr;
- if (Subtarget->isTargetELF()) {
- Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC | ELF::SHF_GROUP |
- ELF::SHF_MERGE,
- 0, CurrentFnSym->getName());
- } else if (Subtarget->isTargetMachO()) {
- Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
- SectionKind::getReadOnlyWithRel());
- } else {
- llvm_unreachable("Unsupported target");
- }
-
- auto PrevSection = OutStreamer->getCurrentSectionOnly();
- OutStreamer->SwitchSection(Section);
- for (const auto &Sled : Sleds) {
- OutStreamer->EmitSymbolValue(Sled.Sled, 4);
- OutStreamer->EmitSymbolValue(CurrentFnSym, 4);
- auto Kind = static_cast<uint8_t>(Sled.Kind);
- OutStreamer->EmitBytes(
- StringRef(reinterpret_cast<const char *>(&Kind), 1));
- OutStreamer->EmitBytes(
- StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
- OutStreamer->EmitZeros(6);
- }
- OutStreamer->SwitchSection(PrevSection);
-
- Sleds.clear();
-}
diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp
index c0591c332dea..963fb99ce09b 100644
--- a/lib/Target/Hexagon/BitTracker.cpp
+++ b/lib/Target/Hexagon/BitTracker.cpp
@@ -53,28 +53,36 @@
//
// The code below is intended to be fully target-independent.
+#include "BitTracker.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
-
-#include "BitTracker.h"
+#include <iterator>
+#include <cassert>
+#include <cstdint>
using namespace llvm;
typedef BitTracker BT;
namespace {
+
// Local trickery to pretty print a register (without the whole "%vreg"
// business).
struct printv {
printv(unsigned r) : R(r) {}
+
unsigned R;
};
+
raw_ostream &operator<< (raw_ostream &OS, const printv &PV) {
if (PV.R)
OS << 'v' << TargetRegisterInfo::virtReg2Index(PV.R);
@@ -82,9 +90,11 @@ namespace {
OS << 's';
return OS;
}
-}
+
+} // end anonymous namespace
namespace llvm {
+
raw_ostream &operator<<(raw_ostream &OS, const BT::BitValue &BV) {
switch (BV.Type) {
case BT::BitValue::Top:
@@ -167,14 +177,14 @@ namespace llvm {
return OS;
}
-}
+
+} // end namespace llvm
void BitTracker::print_cells(raw_ostream &OS) const {
for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I)
dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n";
}
-
BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F)
: Trace(false), ME(E), MF(F), MRI(F.getRegInfo()), Map(*new CellMapType) {}
@@ -182,7 +192,6 @@ BitTracker::~BitTracker() {
delete &Map;
}
-
// If we were allowed to update a cell for a part of a register, the meet
// operation would need to be parametrized by the register number and the
// exact part of the register, so that the computer BitRefs correspond to
@@ -201,7 +210,6 @@ bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) {
return Changed;
}
-
// Insert the entire cell RC into the current cell at position given by M.
BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC,
const BitMask &M) {
@@ -224,7 +232,6 @@ BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC,
return *this;
}
-
BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const {
uint16_t B = M.first(), E = M.last(), W = width();
assert(B < W && E < W);
@@ -243,7 +250,6 @@ BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const {
return RC;
}
-
BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) {
// Rotate left (i.e. towards increasing bit indices).
// Swap the two parts: [0..W-Sh-1] [W-Sh..W-1]
@@ -265,7 +271,6 @@ BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) {
return *this;
}
-
BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E,
const BitValue &V) {
assert(B <= E);
@@ -274,7 +279,6 @@ BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E,
return *this;
}
-
BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) {
// Append the cell given as the argument to the "this" cell.
// Bit 0 of RC becomes bit W of the result, where W is this->width().
@@ -285,7 +289,6 @@ BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) {
return *this;
}
-
uint16_t BT::RegisterCell::ct(bool B) const {
uint16_t W = width();
uint16_t C = 0;
@@ -295,7 +298,6 @@ uint16_t BT::RegisterCell::ct(bool B) const {
return C;
}
-
uint16_t BT::RegisterCell::cl(bool B) const {
uint16_t W = width();
uint16_t C = 0;
@@ -305,7 +307,6 @@ uint16_t BT::RegisterCell::cl(bool B) const {
return C;
}
-
bool BT::RegisterCell::operator== (const RegisterCell &RC) const {
uint16_t W = Bits.size();
if (RC.Bits.size() != W)
@@ -316,7 +317,6 @@ bool BT::RegisterCell::operator== (const RegisterCell &RC) const {
return true;
}
-
uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
// The general problem is with finding a register class that corresponds
// to a given reference reg:sub. There can be several such classes, and
@@ -342,7 +342,6 @@ uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const {
return BW;
}
-
BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR,
const CellMapType &M) const {
uint16_t BW = getRegBitWidth(RR);
@@ -370,7 +369,6 @@ BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR,
return RegisterCell::top(BW);
}
-
void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
CellMapType &M) const {
// While updating the cell map can be done in a meaningful way for
@@ -388,7 +386,6 @@ void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC,
M[RR.Reg] = RC;
}
-
// Check if the cell represents a compile-time integer value.
bool BT::MachineEvaluator::isInt(const RegisterCell &A) const {
uint16_t W = A.width();
@@ -398,7 +395,6 @@ bool BT::MachineEvaluator::isInt(const RegisterCell &A) const {
return true;
}
-
// Convert a cell to the integer value. The result must fit in uint64_t.
uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const {
assert(isInt(A));
@@ -411,7 +407,6 @@ uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const {
return Val;
}
-
// Evaluator helper functions. These implement some common operation on
// register cells that can be used to implement target-specific instructions
// in a target-specific evaluator.
@@ -426,7 +421,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(int64_t V, uint16_t W) const {
return Res;
}
-
BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const {
const APInt &A = CI->getValue();
uint16_t BW = A.getBitWidth();
@@ -437,7 +431,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const {
return Res;
}
-
BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1,
const RegisterCell &A2) const {
uint16_t W = A1.width();
@@ -471,7 +464,6 @@ BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1,
return Res;
}
-
BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1,
const RegisterCell &A2) const {
uint16_t W = A1.width();
@@ -505,29 +497,26 @@ BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1,
return Res;
}
-
BT::RegisterCell BT::MachineEvaluator::eMLS(const RegisterCell &A1,
const RegisterCell &A2) const {
uint16_t W = A1.width() + A2.width();
- uint16_t Z = A1.ct(0) + A2.ct(0);
+ uint16_t Z = A1.ct(false) + A2.ct(false);
RegisterCell Res(W);
Res.fill(0, Z, BitValue::Zero);
Res.fill(Z, W, BitValue::self());
return Res;
}
-
BT::RegisterCell BT::MachineEvaluator::eMLU(const RegisterCell &A1,
const RegisterCell &A2) const {
uint16_t W = A1.width() + A2.width();
- uint16_t Z = A1.ct(0) + A2.ct(0);
+ uint16_t Z = A1.ct(false) + A2.ct(false);
RegisterCell Res(W);
Res.fill(0, Z, BitValue::Zero);
Res.fill(Z, W, BitValue::self());
return Res;
}
-
BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1,
uint16_t Sh) const {
assert(Sh <= A1.width());
@@ -537,7 +526,6 @@ BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1,
return Res;
}
-
BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1,
uint16_t Sh) const {
uint16_t W = A1.width();
@@ -548,7 +536,6 @@ BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1,
return Res;
}
-
BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1,
uint16_t Sh) const {
uint16_t W = A1.width();
@@ -560,7 +547,6 @@ BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1,
return Res;
}
-
BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1,
const RegisterCell &A2) const {
uint16_t W = A1.width();
@@ -583,7 +569,6 @@ BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1,
return Res;
}
-
BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1,
const RegisterCell &A2) const {
uint16_t W = A1.width();
@@ -606,7 +591,6 @@ BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1,
return Res;
}
-
BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1,
const RegisterCell &A2) const {
uint16_t W = A1.width();
@@ -627,7 +611,6 @@ BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1,
return Res;
}
-
BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const {
uint16_t W = A1.width();
RegisterCell Res(W);
@@ -643,7 +626,6 @@ BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const {
return Res;
}
-
BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1,
uint16_t BitN) const {
assert(BitN < A1.width());
@@ -652,7 +634,6 @@ BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1,
return Res;
}
-
BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1,
uint16_t BitN) const {
assert(BitN < A1.width());
@@ -661,7 +642,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1,
return Res;
}
-
BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B,
uint16_t W) const {
uint16_t C = A1.cl(B), AW = A1.width();
@@ -672,7 +652,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B,
return RegisterCell::self(0, W);
}
-
BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B,
uint16_t W) const {
uint16_t C = A1.ct(B), AW = A1.width();
@@ -683,7 +662,6 @@ BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B,
return RegisterCell::self(0, W);
}
-
BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1,
uint16_t FromN) const {
uint16_t W = A1.width();
@@ -695,7 +673,6 @@ BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1,
return Res;
}
-
BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1,
uint16_t FromN) const {
uint16_t W = A1.width();
@@ -705,7 +682,6 @@ BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1,
return Res;
}
-
BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1,
uint16_t B, uint16_t E) const {
uint16_t W = A1.width();
@@ -718,7 +694,6 @@ BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1,
return Res;
}
-
BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1,
const RegisterCell &A2, uint16_t AtN) const {
uint16_t W1 = A1.width(), W2 = A2.width();
@@ -731,7 +706,6 @@ BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1,
return Res;
}
-
BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const {
assert(Sub == 0 && "Generic BitTracker::mask called for Sub != 0");
uint16_t W = getRegBitWidth(Reg);
@@ -785,7 +759,6 @@ bool BT::MachineEvaluator::evaluate(const MachineInstr &MI,
return true;
}
-
// Main W-Z implementation.
void BT::visitPHI(const MachineInstr &PI) {
@@ -977,7 +950,6 @@ void BT::visitBranchesFrom(const MachineInstr &BI) {
}
}
-
void BT::visitUsesOf(unsigned Reg) {
if (Trace)
dbgs() << "visiting uses of " << PrintReg(Reg, &ME.TRI) << "\n";
@@ -997,17 +969,14 @@ void BT::visitUsesOf(unsigned Reg) {
}
}
-
BT::RegisterCell BT::get(RegisterRef RR) const {
return ME.getCell(RR, Map);
}
-
void BT::put(RegisterRef RR, const RegisterCell &RC) {
ME.putCell(RR, RC, Map);
}
-
// Replace all references to bits from OldRR with the corresponding bits
// in NewRR.
void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
@@ -1033,7 +1002,6 @@ void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
}
}
-
// Check if the block has been "executed" during propagation. (If not, the
// block is dead, but it may still appear to be reachable.)
bool BT::reached(const MachineBasicBlock *B) const {
@@ -1047,7 +1015,6 @@ bool BT::reached(const MachineBasicBlock *B) const {
return false;
}
-
// Visit an individual instruction. This could be a newly added instruction,
// or one that has been modified by an optimization.
void BT::visit(const MachineInstr &MI) {
@@ -1061,14 +1028,12 @@ void BT::visit(const MachineInstr &MI) {
FlowQ.pop();
}
-
void BT::reset() {
EdgeExec.clear();
InstrExec.clear();
Map.clear();
}
-
void BT::run() {
reset();
assert(FlowQ.empty());
@@ -1141,4 +1106,3 @@ void BT::run() {
if (Trace)
print_cells(dbgs() << "Cells after propagation:\n");
}
-
diff --git a/lib/Target/Hexagon/BitTracker.h b/lib/Target/Hexagon/BitTracker.h
index 74cafcd00b60..48c5f2266acf 100644
--- a/lib/Target/Hexagon/BitTracker.h
+++ b/lib/Target/Hexagon/BitTracker.h
@@ -1,4 +1,4 @@
-//===--- BitTracker.h -----------------------------------------------------===//
+//===--- BitTracker.h -------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,24 +7,27 @@
//
//===----------------------------------------------------------------------===//
-#ifndef BITTRACKER_H
-#define BITTRACKER_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
+#define LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
-
+#include "llvm/CodeGen/MachineOperand.h"
+#include <cassert>
+#include <cstdint>
#include <map>
#include <queue>
#include <set>
+#include <utility>
namespace llvm {
- class ConstantInt;
- class MachineRegisterInfo;
- class MachineBasicBlock;
- class MachineInstr;
- class MachineOperand;
- class raw_ostream;
+
+class ConstantInt;
+class MachineRegisterInfo;
+class MachineBasicBlock;
+class MachineInstr;
+class raw_ostream;
struct BitTracker {
struct BitRef;
@@ -76,19 +79,19 @@ private:
CellMapType &Map;
};
-
// Abstraction of a reference to bit at position Pos from a register Reg.
struct BitTracker::BitRef {
BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {}
+
bool operator== (const BitRef &BR) const {
// If Reg is 0, disregard Pos.
return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos);
}
+
unsigned Reg;
uint16_t Pos;
};
-
// Abstraction of a register reference in MachineOperand. It contains the
// register number and the subregister index.
struct BitTracker::RegisterRef {
@@ -96,10 +99,10 @@ struct BitTracker::RegisterRef {
: Reg(R), Sub(S) {}
RegisterRef(const MachineOperand &MO)
: Reg(MO.getReg()), Sub(MO.getSubReg()) {}
+
unsigned Reg, Sub;
};
-
// Value that a single bit can take. This is outside of the context of
// any register, it is more of an abstraction of the two-element set of
// possible bit values. One extension here is the "Ref" type, which
@@ -158,6 +161,7 @@ struct BitTracker::BitValue {
bool operator!= (const BitValue &V) const {
return !operator==(V);
}
+
bool is(unsigned T) const {
assert(T == 0 || T == 1);
return T == 0 ? Type == Zero
@@ -209,6 +213,7 @@ struct BitTracker::BitValue {
bool num() const {
return Type == Zero || Type == One;
}
+
operator bool() const {
assert(Type == Zero || Type == One);
return Type == One;
@@ -217,7 +222,6 @@ struct BitTracker::BitValue {
friend raw_ostream &operator<<(raw_ostream &OS, const BitValue &BV);
};
-
// This operation must be idempotent, i.e. ref(ref(V)) == ref(V).
inline BitTracker::BitValue
BitTracker::BitValue::ref(const BitValue &V) {
@@ -228,26 +232,26 @@ BitTracker::BitValue::ref(const BitValue &V) {
return self();
}
-
inline BitTracker::BitValue
BitTracker::BitValue::self(const BitRef &Self) {
return BitValue(Self.Reg, Self.Pos);
}
-
// A sequence of bits starting from index B up to and including index E.
// If E < B, the mask represents two sections: [0..E] and [B..W) where
// W is the width of the register.
struct BitTracker::BitMask {
- BitMask() : B(0), E(0) {}
+ BitMask() = default;
BitMask(uint16_t b, uint16_t e) : B(b), E(e) {}
+
uint16_t first() const { return B; }
uint16_t last() const { return E; }
+
private:
- uint16_t B, E;
+ uint16_t B = 0;
+ uint16_t E = 0;
};
-
// Representation of a register: a list of BitValues.
struct BitTracker::RegisterCell {
RegisterCell(uint16_t Width = DefaultBitN) : Bits(Width) {}
@@ -255,6 +259,7 @@ struct BitTracker::RegisterCell {
uint16_t width() const {
return Bits.size();
}
+
const BitValue &operator[](uint16_t BitN) const {
assert(BitN < Bits.size());
return Bits[BitN];
@@ -297,12 +302,10 @@ private:
friend raw_ostream &operator<<(raw_ostream &OS, const RegisterCell &RC);
};
-
inline bool BitTracker::has(unsigned Reg) const {
return Map.find(Reg) != Map.end();
}
-
inline const BitTracker::RegisterCell&
BitTracker::lookup(unsigned Reg) const {
CellMapType::const_iterator F = Map.find(Reg);
@@ -310,7 +313,6 @@ BitTracker::lookup(unsigned Reg) const {
return F->second;
}
-
inline BitTracker::RegisterCell
BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) {
RegisterCell RC(Width);
@@ -319,7 +321,6 @@ BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) {
return RC;
}
-
inline BitTracker::RegisterCell
BitTracker::RegisterCell::top(uint16_t Width) {
RegisterCell RC(Width);
@@ -328,7 +329,6 @@ BitTracker::RegisterCell::top(uint16_t Width) {
return RC;
}
-
inline BitTracker::RegisterCell
BitTracker::RegisterCell::ref(const RegisterCell &C) {
uint16_t W = C.width();
@@ -345,12 +345,13 @@ BitTracker::RegisterCell::ref(const RegisterCell &C) {
struct BitTracker::MachineEvaluator {
MachineEvaluator(const TargetRegisterInfo &T, MachineRegisterInfo &M)
: TRI(T), MRI(M) {}
- virtual ~MachineEvaluator() {}
+ virtual ~MachineEvaluator() = default;
uint16_t getRegBitWidth(const RegisterRef &RR) const;
RegisterCell getCell(const RegisterRef &RR, const CellMapType &M) const;
void putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const;
+
// A result of any operation should use refs to the source cells, not
// the cells directly. This function is a convenience wrapper to quickly
// generate a ref for a cell corresponding to a register reference.
@@ -435,4 +436,4 @@ struct BitTracker::MachineEvaluator {
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H
diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp
index b78c4126e0b1..436f88dcd450 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -7,16 +7,30 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
#include "Hexagon.h"
+#include "HexagonBitTracker.h"
#include "HexagonInstrInfo.h"
#include "HexagonRegisterInfo.h"
#include "HexagonTargetMachine.h"
-#include "HexagonBitTracker.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -76,11 +90,11 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri,
}
}
-
BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
+ using namespace Hexagon;
+
if (Sub == 0)
return MachineEvaluator::mask(Reg, 0);
- using namespace Hexagon;
const TargetRegisterClass *RC = MRI.getRegClass(Reg);
unsigned ID = RC->getID();
uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub));
@@ -102,6 +116,7 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
}
namespace {
+
class RegisterRefs {
std::vector<BT::RegisterRef> Vector;
@@ -117,17 +132,21 @@ public:
}
size_t size() const { return Vector.size(); }
+
const BT::RegisterRef &operator[](unsigned n) const {
// The main purpose of this operator is to assert with bad argument.
assert(n < Vector.size());
return Vector[n];
}
};
-}
+
+} // end anonymous namespace
bool HexagonEvaluator::evaluate(const MachineInstr &MI,
const CellMapType &Inputs,
CellMapType &Outputs) const {
+ using namespace Hexagon;
+
unsigned NumDefs = 0;
// Sanity verification: there should not be any defs with subregisters.
@@ -142,7 +161,6 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
if (NumDefs == 0)
return false;
- using namespace Hexagon;
unsigned Opc = MI.getOpcode();
if (MI.mayLoad()) {
@@ -779,10 +797,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
case S2_cl0:
case S2_cl0p:
// Always produce a 32-bit result.
- return rr0(eCLB(rc(1), 0/*bit*/, 32), Outputs);
+ return rr0(eCLB(rc(1), false/*bit*/, 32), Outputs);
case S2_cl1:
case S2_cl1p:
- return rr0(eCLB(rc(1), 1/*bit*/, 32), Outputs);
+ return rr0(eCLB(rc(1), true/*bit*/, 32), Outputs);
case S2_clb:
case S2_clbp: {
uint16_t W1 = getRegBitWidth(Reg[1]);
@@ -794,10 +812,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
}
case S2_ct0:
case S2_ct0p:
- return rr0(eCTB(rc(1), 0/*bit*/, 32), Outputs);
+ return rr0(eCTB(rc(1), false/*bit*/, 32), Outputs);
case S2_ct1:
case S2_ct1p:
- return rr0(eCTB(rc(1), 1/*bit*/, 32), Outputs);
+ return rr0(eCTB(rc(1), true/*bit*/, 32), Outputs);
case S5_popcountp:
// TODO
break;
@@ -953,6 +971,8 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI,
bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
const CellMapType &Inputs,
CellMapType &Outputs) const {
+ using namespace Hexagon;
+
if (TII.isPredicated(MI))
return false;
assert(MI.mayLoad() && "A load that mayn't?");
@@ -960,7 +980,6 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
uint16_t BitNum;
bool SignEx;
- using namespace Hexagon;
switch (Opc) {
default:
@@ -1141,9 +1160,9 @@ bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr &MI,
return true;
}
-
unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const {
using namespace Hexagon;
+
bool Is64 = DoubleRegsRegClass.contains(PReg);
assert(PReg == 0 || Is64 || IntRegsRegClass.contains(PReg));
@@ -1180,7 +1199,6 @@ unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const {
return (Idx64+1 < Num64) ? Phys64[Idx64+1] : 0;
}
-
unsigned HexagonEvaluator::getVirtRegFor(unsigned PReg) const {
typedef MachineRegisterInfo::livein_iterator iterator;
for (iterator I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) {
diff --git a/lib/Target/Hexagon/HexagonBitTracker.h b/lib/Target/Hexagon/HexagonBitTracker.h
index 9e7b1dbe298f..2cbf65e66ca6 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.h
+++ b/lib/Target/Hexagon/HexagonBitTracker.h
@@ -1,4 +1,4 @@
-//===--- HexagonBitTracker.h ----------------------------------------------===//
+//===--- HexagonBitTracker.h ------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,15 +7,17 @@
//
//===----------------------------------------------------------------------===//
-#ifndef HEXAGONBITTRACKER_H
-#define HEXAGONBITTRACKER_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H
#include "BitTracker.h"
#include "llvm/ADT/DenseMap.h"
+#include <cstdint>
namespace llvm {
- class HexagonInstrInfo;
- class HexagonRegisterInfo;
+
+class HexagonInstrInfo;
+class HexagonRegisterInfo;
struct HexagonEvaluator : public BitTracker::MachineEvaluator {
typedef BitTracker::CellMapType CellMapType;
@@ -49,10 +51,12 @@ private:
// Type of formal parameter extension.
struct ExtType {
enum { SExt, ZExt };
- char Type;
- uint16_t Width;
- ExtType() : Type(0), Width(0) {}
+
+ ExtType() = default;
ExtType(char t, uint16_t w) : Type(t), Width(w) {}
+
+ char Type = 0;
+ uint16_t Width = 0;
};
// Map VR -> extension type.
typedef DenseMap<unsigned, ExtType> RegExtMap;
@@ -61,4 +65,4 @@ private:
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 34ce3e652995..0a7dc6b49d00 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -11,26 +11,45 @@
//
//===----------------------------------------------------------------------===//
+#include "Hexagon.h"
#include "HexagonHazardRecognizer.h"
#include "HexagonInstrInfo.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
#include <cctype>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
using namespace llvm;
@@ -108,19 +127,16 @@ HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
: HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
RI() {}
-
static bool isIntRegForSubInst(unsigned Reg) {
return (Reg >= Hexagon::R0 && Reg <= Hexagon::R7) ||
(Reg >= Hexagon::R16 && Reg <= Hexagon::R23);
}
-
static bool isDblRegForSubInst(unsigned Reg, const HexagonRegisterInfo &HRI) {
return isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::isub_lo)) &&
isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::isub_hi));
}
-
/// Calculate number of instructions excluding the debug instructions.
static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB,
MachineBasicBlock::const_instr_iterator MIE) {
@@ -132,7 +148,6 @@ static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB,
return Count;
}
-
/// Find the hardware loop instruction used to set-up the specified loop.
/// On Hexagon, we have two instructions used to set-up the hardware loop
/// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions
@@ -164,17 +179,16 @@ static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
return &*I;
// We've reached a different loop, which means the loop0 has been removed.
if (Opc == EndLoopOp)
- return 0;
+ return nullptr;
}
// Check the predecessors for the LOOP instruction.
MachineInstr *loop = findLoopInstr(*PB, EndLoopOp, Visited);
if (loop)
return loop;
}
- return 0;
+ return nullptr;
}
-
/// Gather register def/uses from MI.
/// This treats possible (predicated) defs as actually happening ones
/// (conservatively).
@@ -201,7 +215,6 @@ static inline void parseOperands(const MachineInstr &MI,
}
}
-
// Position dependent, so check twice for swap.
static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) {
switch (Ga) {
@@ -228,8 +241,6 @@ static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) {
return false;
}
-
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
@@ -280,7 +291,6 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
return 0;
}
-
/// isStoreToStackSlot - If the specified machine instruction is a direct
/// store to a stack slot, return the virtual or physical register number of
/// the source reg along with the FrameIndex of the loaded stack slot. If
@@ -337,7 +347,6 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
return 0;
}
-
/// This function can analyze one/two way branching only and should (mostly) be
/// called by target independent side.
/// First entry is always the opcode of the branching instruction, except when
@@ -401,7 +410,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
// Delete the J2_jump if it's equivalent to a fall-through.
if (AllowModify && JumpToBlock &&
MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
- DEBUG(dbgs()<< "\nErasing the jump to successor block\n";);
+ DEBUG(dbgs() << "\nErasing the jump to successor block\n";);
I->eraseFromParent();
I = MBB.instr_end();
if (I == MBB.instr_begin())
@@ -415,7 +424,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
MachineInstr *LastInst = &*I;
MachineInstr *SecondLastInst = nullptr;
// Find one more terminator if present.
- for (;;) {
+ while (true) {
if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(*I)) {
if (!SecondLastInst)
SecondLastInst = &*I;
@@ -524,7 +533,6 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
return true;
}
-
unsigned HexagonInstrInfo::removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved) const {
assert(!BytesRemoved && "code size not handled");
@@ -730,7 +738,6 @@ bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
return nonDbgBBSize(&MBB) <= 3;
}
-
bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned NumTCycles, unsigned ExtraTCycles, MachineBasicBlock &FMBB,
unsigned NumFCycles, unsigned ExtraFCycles, BranchProbability Probability)
@@ -738,7 +745,6 @@ bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
return nonDbgBBSize(&TMBB) <= 3 && nonDbgBBSize(&FMBB) <= 3;
}
-
bool HexagonInstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
unsigned NumInstrs, BranchProbability Probability) const {
return NumInstrs <= 4;
@@ -853,7 +859,6 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
llvm_unreachable("Unimplemented");
}
-
void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const {
@@ -976,7 +981,6 @@ void HexagonInstrInfo::loadRegFromStackSlot(
}
}
-
static void getLiveRegsAt(LivePhysRegs &Regs, const MachineInstr &MI) {
const MachineBasicBlock &B = *MI.getParent();
Regs.addLiveOuts(B);
@@ -1307,7 +1311,6 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return false;
}
-
// We indicate that we want to reverse the branch by
// inserting the reversed branching opcode.
bool HexagonInstrInfo::reverseBranchCondition(
@@ -1325,19 +1328,16 @@ bool HexagonInstrInfo::reverseBranchCondition(
return false;
}
-
void HexagonInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
DebugLoc DL;
BuildMI(MBB, MI, DL, get(Hexagon::A2_nop));
}
-
bool HexagonInstrInfo::isPostIncrement(const MachineInstr &MI) const {
return getAddrMode(MI) == HexagonII::PostInc;
}
-
// Returns true if an instruction is predicated irrespective of the predicate
// sense. For example, all of the following will return true.
// if (p0) R1 = add(R2, R3)
@@ -1351,7 +1351,6 @@ bool HexagonInstrInfo::isPredicated(const MachineInstr &MI) const {
return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask;
}
-
bool HexagonInstrInfo::PredicateInstruction(
MachineInstr &MI, ArrayRef<MachineOperand> Cond) const {
if (Cond.empty() || isNewValueJump(Cond[0].getImm()) ||
@@ -1403,14 +1402,12 @@ bool HexagonInstrInfo::PredicateInstruction(
return true;
}
-
bool HexagonInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
ArrayRef<MachineOperand> Pred2) const {
// TODO: Fix this
return false;
}
-
bool HexagonInstrInfo::DefinesPredicate(
MachineInstr &MI, std::vector<MachineOperand> &Pred) const {
auto &HRI = getRegisterInfo();
@@ -1427,7 +1424,6 @@ bool HexagonInstrInfo::DefinesPredicate(
return false;
}
-
bool HexagonInstrInfo::isPredicable(MachineInstr &MI) const {
return MI.getDesc().isPredicable();
}
@@ -1466,7 +1462,6 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
return false;
}
-
/// Measure the specified inline asm to determine an approximation of its
/// length.
/// Comments (which run till the next SeparatorString or newline) do not
@@ -1502,7 +1497,6 @@ unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str,
return Length;
}
-
ScheduleHazardRecognizer*
HexagonInstrInfo::CreateTargetPostRAHazardRecognizer(
const InstrItineraryData *II, const ScheduleDAG *DAG) const {
@@ -1513,7 +1507,6 @@ HexagonInstrInfo::CreateTargetPostRAHazardRecognizer(
return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
}
-
/// \brief For a comparison instruction, return the source registers in
/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction
@@ -1609,14 +1602,12 @@ unsigned HexagonInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
return getInstrTimingClassLatency(ItinData, MI);
}
-
DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState(
const TargetSubtargetInfo &STI) const {
const InstrItineraryData *II = STI.getInstrItineraryData();
return static_cast<const HexagonSubtarget&>(STI).createDFAPacketizer(II);
}
-
// Inspired by this pair:
// %R13<def> = L2_loadri_io %R29, 136; mem:LD4[FixedStack0]
// S2_storeri_io %R29, 132, %R1<kill>; flags: mem:ST4[FixedStack1]
@@ -1661,7 +1652,6 @@ bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(
return false;
}
-
/// If the instruction is an increment of a constant value, return the amount.
bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI,
int &Value) const {
@@ -1677,7 +1667,6 @@ bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI,
return false;
}
-
unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const {
MachineRegisterInfo &MRI = MF->getRegInfo();
const TargetRegisterClass *TRC;
@@ -1695,18 +1684,15 @@ unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const {
return NewReg;
}
-
bool HexagonInstrInfo::isAbsoluteSet(const MachineInstr &MI) const {
return (getAddrMode(MI) == HexagonII::AbsoluteSet);
}
-
bool HexagonInstrInfo::isAccumulator(const MachineInstr &MI) const {
const uint64_t F = MI.getDesc().TSFlags;
return((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask);
}
-
bool HexagonInstrInfo::isComplex(const MachineInstr &MI) const {
const MachineFunction *MF = MI.getParent()->getParent();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
@@ -1727,13 +1713,11 @@ bool HexagonInstrInfo::isComplex(const MachineInstr &MI) const {
return false;
}
-
// Return true if the instruction is a compund branch instruction.
bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr &MI) const {
return (getType(MI) == HexagonII::TypeCOMPOUND && MI.isBranch());
}
-
bool HexagonInstrInfo::isCondInst(const MachineInstr &MI) const {
return (MI.isBranch() && isPredicated(MI)) ||
isConditionalTransfer(MI) ||
@@ -1744,7 +1728,6 @@ bool HexagonInstrInfo::isCondInst(const MachineInstr &MI) const {
!isPredicatedNew(MI));
}
-
bool HexagonInstrInfo::isConditionalALU32(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case Hexagon::A2_paddf:
@@ -1802,7 +1785,6 @@ bool HexagonInstrInfo::isConditionalALU32(const MachineInstr &MI) const {
return false;
}
-
// FIXME - Function name and it's functionality don't match.
// It should be renamed to hasPredNewOpcode()
bool HexagonInstrInfo::isConditionalLoad(const MachineInstr &MI) const {
@@ -1814,7 +1796,6 @@ bool HexagonInstrInfo::isConditionalLoad(const MachineInstr &MI) const {
return PNewOpcode >= 0;
}
-
// Returns true if an instruction is a conditional store.
//
// Note: It doesn't include conditional new-value stores as they can't be
@@ -1872,7 +1853,6 @@ bool HexagonInstrInfo::isConditionalStore(const MachineInstr &MI) const {
}
}
-
bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case Hexagon::A2_tfrt:
@@ -1893,7 +1873,6 @@ bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr &MI) const {
return false;
}
-
// TODO: In order to have isExtendable for fpimm/f32Ext, we need to handle
// isFPImm and later getFPImm as well.
bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const {
@@ -1942,7 +1921,6 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const {
return (ImmValue < MinValue || ImmValue > MaxValue);
}
-
bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case Hexagon::L4_return :
@@ -1957,7 +1935,6 @@ bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const {
return false;
}
-
// Return true when ConsMI uses a register defined by ProdMI.
bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI,
const MachineInstr &ConsMI) const {
@@ -1994,7 +1971,6 @@ bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI,
return false;
}
-
// Returns true if the instruction is alread a .cur.
bool HexagonInstrInfo::isDotCurInst(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
@@ -2007,7 +1983,6 @@ bool HexagonInstrInfo::isDotCurInst(const MachineInstr &MI) const {
return false;
}
-
// Returns true, if any one of the operands is a dot new
// insn, whether it is predicated dot new or register dot new.
bool HexagonInstrInfo::isDotNewInst(const MachineInstr &MI) const {
@@ -2017,7 +1992,6 @@ bool HexagonInstrInfo::isDotNewInst(const MachineInstr &MI) const {
return false;
}
-
/// Symmetrical. See if these two instructions are fit for duplex pair.
bool HexagonInstrInfo::isDuplexPair(const MachineInstr &MIa,
const MachineInstr &MIb) const {
@@ -2026,7 +2000,6 @@ bool HexagonInstrInfo::isDuplexPair(const MachineInstr &MIa,
return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG));
}
-
bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr &MI) const {
if (MI.mayLoad() || MI.mayStore() || MI.isCompare())
return true;
@@ -2038,13 +2011,11 @@ bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr &MI) const {
return false;
}
-
bool HexagonInstrInfo::isEndLoopN(unsigned Opcode) const {
return (Opcode == Hexagon::ENDLOOP0 ||
Opcode == Hexagon::ENDLOOP1);
}
-
bool HexagonInstrInfo::isExpr(unsigned OpType) const {
switch(OpType) {
case MachineOperand::MO_MachineBasicBlock:
@@ -2059,7 +2030,6 @@ bool HexagonInstrInfo::isExpr(unsigned OpType) const {
}
}
-
bool HexagonInstrInfo::isExtendable(const MachineInstr &MI) const {
const MCInstrDesc &MID = MI.getDesc();
const uint64_t F = MID.TSFlags;
@@ -2079,7 +2049,6 @@ bool HexagonInstrInfo::isExtendable(const MachineInstr &MI) const {
return false;
}
-
// This returns true in two cases:
// - The OP code itself indicates that this is an extended instruction.
// - One of MOs has been marked with HMOTF_ConstExtended flag.
@@ -2098,14 +2067,12 @@ bool HexagonInstrInfo::isExtended(const MachineInstr &MI) const {
return false;
}
-
bool HexagonInstrInfo::isFloat(const MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
const uint64_t F = get(Opcode).TSFlags;
return (F >> HexagonII::FPPos) & HexagonII::FPMask;
}
-
// No V60 HVX VMEM with A_INDIRECT.
bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr &I,
const MachineInstr &J) const {
@@ -2116,7 +2083,6 @@ bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr &I,
return J.isIndirectBranch() || isIndirectCall(J) || isIndirectL4Return(J);
}
-
bool HexagonInstrInfo::isIndirectCall(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case Hexagon::J2_callr :
@@ -2128,7 +2094,6 @@ bool HexagonInstrInfo::isIndirectCall(const MachineInstr &MI) const {
return false;
}
-
bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case Hexagon::L4_return :
@@ -2143,7 +2108,6 @@ bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr &MI) const {
return false;
}
-
bool HexagonInstrInfo::isJumpR(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case Hexagon::J2_jumpr :
@@ -2158,7 +2122,6 @@ bool HexagonInstrInfo::isJumpR(const MachineInstr &MI) const {
return false;
}
-
// Return true if a given MI can accommodate given offset.
// Use abs estimate as oppose to the exact number.
// TODO: This will need to be changed to use MC level
@@ -2203,7 +2166,6 @@ bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr &MI,
}
}
-
bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI,
const MachineInstr &ESMI) const {
bool isLate = isLateResultInstr(LRMI);
@@ -2222,7 +2184,6 @@ bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI,
return false;
}
-
bool HexagonInstrInfo::isLateResultInstr(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case TargetOpcode::EXTRACT_SUBREG:
@@ -2259,14 +2220,12 @@ bool HexagonInstrInfo::isLateResultInstr(const MachineInstr &MI) const {
return true;
}
-
bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr &MI) const {
// Instructions with iclass A_CVI_VX and attribute A_CVI_LATE uses a multiply
// resource, but all operands can be received late like an ALU instruction.
return MI.getDesc().getSchedClass() == Hexagon::Sched::CVI_VX_LATE;
}
-
bool HexagonInstrInfo::isLoopN(const MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
return Opcode == Hexagon::J2_loop0i ||
@@ -2279,7 +2238,6 @@ bool HexagonInstrInfo::isLoopN(const MachineInstr &MI) const {
Opcode == Hexagon::J2_loop1rext;
}
-
bool HexagonInstrInfo::isMemOp(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default: return false;
@@ -2312,46 +2270,38 @@ bool HexagonInstrInfo::isMemOp(const MachineInstr &MI) const {
return false;
}
-
bool HexagonInstrInfo::isNewValue(const MachineInstr &MI) const {
const uint64_t F = MI.getDesc().TSFlags;
return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask;
}
-
bool HexagonInstrInfo::isNewValue(unsigned Opcode) const {
const uint64_t F = get(Opcode).TSFlags;
return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask;
}
-
bool HexagonInstrInfo::isNewValueInst(const MachineInstr &MI) const {
return isNewValueJump(MI) || isNewValueStore(MI);
}
-
bool HexagonInstrInfo::isNewValueJump(const MachineInstr &MI) const {
return isNewValue(MI) && MI.isBranch();
}
-
bool HexagonInstrInfo::isNewValueJump(unsigned Opcode) const {
return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode);
}
-
bool HexagonInstrInfo::isNewValueStore(const MachineInstr &MI) const {
const uint64_t F = MI.getDesc().TSFlags;
return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask;
}
-
bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const {
const uint64_t F = get(Opcode).TSFlags;
return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask;
}
-
// Returns true if a particular operand is extendable for an instruction.
bool HexagonInstrInfo::isOperandExtended(const MachineInstr &MI,
unsigned OperandNum) const {
@@ -2360,28 +2310,24 @@ bool HexagonInstrInfo::isOperandExtended(const MachineInstr &MI,
== OperandNum;
}
-
bool HexagonInstrInfo::isPredicatedNew(const MachineInstr &MI) const {
const uint64_t F = MI.getDesc().TSFlags;
assert(isPredicated(MI));
return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask;
}
-
bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const {
const uint64_t F = get(Opcode).TSFlags;
assert(isPredicated(Opcode));
return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask;
}
-
bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr &MI) const {
const uint64_t F = MI.getDesc().TSFlags;
return !((F >> HexagonII::PredicatedFalsePos) &
HexagonII::PredicatedFalseMask);
}
-
bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const {
const uint64_t F = get(Opcode).TSFlags;
// Make sure that the instruction is predicated.
@@ -2390,19 +2336,16 @@ bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const {
HexagonII::PredicatedFalseMask);
}
-
bool HexagonInstrInfo::isPredicated(unsigned Opcode) const {
const uint64_t F = get(Opcode).TSFlags;
return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask;
}
-
bool HexagonInstrInfo::isPredicateLate(unsigned Opcode) const {
const uint64_t F = get(Opcode).TSFlags;
return ~(F >> HexagonII::PredicateLatePos) & HexagonII::PredicateLateMask;
}
-
bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const {
const uint64_t F = get(Opcode).TSFlags;
assert(get(Opcode).isBranch() &&
@@ -2410,7 +2353,6 @@ bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const {
return (F >> HexagonII::TakenPos) & HexagonII::TakenMask;
}
-
bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr &MI) const {
return MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4 ||
MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT ||
@@ -2496,13 +2438,11 @@ bool HexagonInstrInfo::isSignExtendingLoad(const MachineInstr &MI) const {
}
}
-
bool HexagonInstrInfo::isSolo(const MachineInstr &MI) const {
const uint64_t F = MI.getDesc().TSFlags;
return (F >> HexagonII::SoloPos) & HexagonII::SoloMask;
}
-
bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case Hexagon::STriw_pred :
@@ -2513,7 +2453,6 @@ bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr &MI) const {
}
}
-
bool HexagonInstrInfo::isTailCall(const MachineInstr &MI) const {
if (!MI.isBranch())
return false;
@@ -2524,7 +2463,6 @@ bool HexagonInstrInfo::isTailCall(const MachineInstr &MI) const {
return false;
}
-
// Returns true when SU has a timing class TC1.
bool HexagonInstrInfo::isTC1(const MachineInstr &MI) const {
unsigned SchedClass = MI.getDesc().getSchedClass();
@@ -2544,7 +2482,6 @@ bool HexagonInstrInfo::isTC1(const MachineInstr &MI) const {
}
}
-
bool HexagonInstrInfo::isTC2(const MachineInstr &MI) const {
unsigned SchedClass = MI.getDesc().getSchedClass();
switch (SchedClass) {
@@ -2561,7 +2498,6 @@ bool HexagonInstrInfo::isTC2(const MachineInstr &MI) const {
}
}
-
bool HexagonInstrInfo::isTC2Early(const MachineInstr &MI) const {
unsigned SchedClass = MI.getDesc().getSchedClass();
switch (SchedClass) {
@@ -2582,13 +2518,11 @@ bool HexagonInstrInfo::isTC2Early(const MachineInstr &MI) const {
}
}
-
bool HexagonInstrInfo::isTC4x(const MachineInstr &MI) const {
unsigned SchedClass = MI.getDesc().getSchedClass();
return SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23;
}
-
// Schedule this ASAP.
bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr &MI1,
const MachineInstr &MI2) const {
@@ -2608,13 +2542,11 @@ bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr &MI1,
return false;
}
-
bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr &MI) const {
const uint64_t V = getType(MI);
return HexagonII::TypeCVI_FIRST <= V && V <= HexagonII::TypeCVI_LAST;
}
-
// Check if the Offset is a valid auto-inc imm by Load/Store Type.
//
bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const {
@@ -2653,7 +2585,6 @@ bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const {
llvm_unreachable("Not an auto-inc opc!");
}
-
bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
bool Extend) const {
// This function is to check whether the "Offset" is in the correct range of
@@ -2808,12 +2739,10 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
"Please define it in the above switch statement!");
}
-
bool HexagonInstrInfo::isVecAcc(const MachineInstr &MI) const {
return isV60VectorInstruction(MI) && isAccumulator(MI);
}
-
bool HexagonInstrInfo::isVecALU(const MachineInstr &MI) const {
const uint64_t F = get(MI.getOpcode()).TSFlags;
const uint64_t V = ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
@@ -2822,7 +2751,6 @@ bool HexagonInstrInfo::isVecALU(const MachineInstr &MI) const {
V == HexagonII::TypeCVI_VA_DV;
}
-
bool HexagonInstrInfo::isVecUsableNextPacket(const MachineInstr &ProdMI,
const MachineInstr &ConsMI) const {
if (EnableACCForwarding && isVecAcc(ProdMI) && isVecAcc(ConsMI))
@@ -2915,7 +2843,6 @@ bool HexagonInstrInfo::isZeroExtendingLoad(const MachineInstr &MI) const {
}
}
-
// Add latency to instruction.
bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1,
const MachineInstr &MI2) const {
@@ -2925,7 +2852,6 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1,
return false;
}
-
/// \brief Get the base register and byte offset of a load/store instr.
bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt,
unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI)
@@ -2937,7 +2863,6 @@ bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt,
return BaseReg != 0;
}
-
/// \brief Can these instructions execute at the same time in a bundle.
bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First,
const MachineInstr &Second) const {
@@ -2959,13 +2884,11 @@ bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First,
return false;
}
-
bool HexagonInstrInfo::doesNotReturn(const MachineInstr &CallMI) const {
unsigned Opc = CallMI.getOpcode();
return Opc == Hexagon::PS_call_nr || Opc == Hexagon::PS_callr_nr;
}
-
bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const {
for (auto &I : *B)
if (I.isEHLabel())
@@ -2973,7 +2896,6 @@ bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const {
return false;
}
-
// Returns true if an instruction can be converted into a non-extended
// equivalent instruction.
bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr &MI) const {
@@ -3011,13 +2933,11 @@ bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr &MI) const {
return false;
}
-
bool HexagonInstrInfo::hasPseudoInstrPair(const MachineInstr &MI) const {
return Hexagon::getRealHWInstr(MI.getOpcode(),
Hexagon::InstrType_Pseudo) >= 0;
}
-
bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B)
const {
MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end();
@@ -3029,7 +2949,6 @@ bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B)
return false;
}
-
// Returns true, if a LD insn can be promoted to a cur load.
bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr &MI) const {
auto &HST = MI.getParent()->getParent()->getSubtarget<HexagonSubtarget>();
@@ -3038,14 +2957,12 @@ bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr &MI) const {
HST.hasV60TOps();
}
-
// Returns true, if a ST insn can be promoted to a new-value store.
bool HexagonInstrInfo::mayBeNewStore(const MachineInstr &MI) const {
const uint64_t F = MI.getDesc().TSFlags;
return (F >> HexagonII::mayNVStorePos) & HexagonII::mayNVStoreMask;
}
-
bool HexagonInstrInfo::producesStall(const MachineInstr &ProdMI,
const MachineInstr &ConsMI) const {
// There is no stall when ProdMI is not a V60 vector.
@@ -3064,7 +2981,6 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &ProdMI,
return true;
}
-
bool HexagonInstrInfo::producesStall(const MachineInstr &MI,
MachineBasicBlock::const_instr_iterator BII) const {
// There is no stall when I is not a V60 vector.
@@ -3091,7 +3007,6 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &MI,
return false;
}
-
bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI,
unsigned PredReg) const {
for (unsigned opNum = 0; opNum < MI.getNumOperands(); opNum++) {
@@ -3106,7 +3021,6 @@ bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI,
return MI.getOpcode() != Hexagon::A4_tlbmatch;
}
-
bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const {
return (Opcode == Hexagon::J2_jumpt) ||
(Opcode == Hexagon::J2_jumpf) ||
@@ -3116,25 +3030,21 @@ bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const {
(Opcode == Hexagon::J2_jumpfnewpt);
}
-
bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const {
if (Cond.empty() || !isPredicated(Cond[0].getImm()))
return false;
return !isPredicatedTrue(Cond[0].getImm());
}
-
short HexagonInstrInfo::getAbsoluteForm(const MachineInstr &MI) const {
return Hexagon::getAbsoluteForm(MI.getOpcode());
}
-
unsigned HexagonInstrInfo::getAddrMode(const MachineInstr &MI) const {
const uint64_t F = MI.getDesc().TSFlags;
return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask;
}
-
// Returns the base register in a memory access (load/store). The offset is
// returned in Offset and the access size is returned in AccessSize.
unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI,
@@ -3171,7 +3081,6 @@ unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI,
return MI.getOperand(basePos).getReg();
}
-
/// Return the position of the base and offset operands for this instruction.
bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr &MI,
unsigned &BasePos, unsigned &OffsetPos) const {
@@ -3203,7 +3112,6 @@ bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr &MI,
return true;
}
-
// Inserts branching instructions in reverse order of their occurrence.
// e.g. jump_t t1 (i1)
// jump t2 (i2)
@@ -3265,24 +3173,20 @@ SmallVector<MachineInstr*, 2> HexagonInstrInfo::getBranchingInstrs(
return Jumpers;
}
-
short HexagonInstrInfo::getBaseWithLongOffset(short Opcode) const {
if (Opcode < 0)
return -1;
return Hexagon::getBaseWithLongOffset(Opcode);
}
-
short HexagonInstrInfo::getBaseWithLongOffset(const MachineInstr &MI) const {
return Hexagon::getBaseWithLongOffset(MI.getOpcode());
}
-
short HexagonInstrInfo::getBaseWithRegOffset(const MachineInstr &MI) const {
return Hexagon::getBaseWithRegOffset(MI.getOpcode());
}
-
// Returns Operand Index for the constant extended instruction.
unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr &MI) const {
const uint64_t F = MI.getDesc().TSFlags;
@@ -3379,7 +3283,6 @@ HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup(
return HexagonII::HCG_None;
}
-
// Returns -1 when there is no opcode found.
unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA,
const MachineInstr &GB) const {
@@ -3398,7 +3301,6 @@ unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA,
return -1;
}
-
int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
enum Hexagon::PredSense inPredSense;
inPredSense = invertPredicate ? Hexagon::PredSense_false :
@@ -3410,7 +3312,6 @@ int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
llvm_unreachable("Unexpected predicable instruction");
}
-
// Return the cur value instruction for a given store.
int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
@@ -3428,8 +3329,6 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const {
return 0;
}
-
-
// The diagram below shows the steps involved in the conversion of a predicated
// store instruction to its .new predicated new-value form.
//
@@ -3509,7 +3408,6 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const {
// promoted. Therefore, in case of dependence check failure (due to R5) during
// next iteration, it should be converted back to its most basic form.
-
// Return the new value instruction for a given store.
int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const {
int NVOpcode = Hexagon::getNewValueOpcode(MI.getOpcode());
@@ -3552,7 +3450,6 @@ int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const {
return 0;
}
-
// Returns the opcode to use when converting MI, which is a conditional jump,
// into a conditional instruction which uses the .new value of the predicate.
// We also use branch probabilities to add a hint to the jump.
@@ -3579,7 +3476,6 @@ int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI,
}
}
-
// Return .new predicate version for an instruction.
int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI,
const MachineBranchProbabilityInfo *MBPI) const {
@@ -3599,7 +3495,6 @@ int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI,
return 0;
}
-
int HexagonInstrInfo::getDotOldOp(const int opc) const {
int NewOp = opc;
if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form
@@ -3615,7 +3510,6 @@ int HexagonInstrInfo::getDotOldOp(const int opc) const {
return NewOp;
}
-
// See if instruction could potentially be a duplex candidate.
// If so, return its group. Zero otherwise.
HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
@@ -3960,12 +3854,10 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
return HexagonII::HSIG_None;
}
-
short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr &MI) const {
return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Real);
}
-
// Return first non-debug instruction in the basic block.
MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB)
const {
@@ -3978,7 +3870,6 @@ MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB)
return nullptr;
}
-
unsigned HexagonInstrInfo::getInstrTimingClassLatency(
const InstrItineraryData *ItinData, const MachineInstr &MI) const {
// Default to one cycle for no itinerary. However, an "empty" itinerary may
@@ -4000,7 +3891,6 @@ unsigned HexagonInstrInfo::getInstrTimingClassLatency(
return Latency;
}
-
// inverts the predication logic.
// p -> NotP
// NotP -> P
@@ -4013,7 +3903,6 @@ bool HexagonInstrInfo::getInvertedPredSense(
return true;
}
-
unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
int InvPredOpcode;
InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc)
@@ -4024,7 +3913,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
llvm_unreachable("Unexpected predicated instruction");
}
-
// Returns the max value that doesn't need to be extended.
int HexagonInstrInfo::getMaxValue(const MachineInstr &MI) const {
const uint64_t F = MI.getDesc().TSFlags;
@@ -4039,13 +3927,11 @@ int HexagonInstrInfo::getMaxValue(const MachineInstr &MI) const {
return ~(-1U << bits);
}
-
unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr &MI) const {
const uint64_t F = MI.getDesc().TSFlags;
return (F >> HexagonII::MemAccessSizePos) & HexagonII::MemAccesSizeMask;
}
-
// Returns the min value that doesn't need to be extended.
int HexagonInstrInfo::getMinValue(const MachineInstr &MI) const {
const uint64_t F = MI.getDesc().TSFlags;
@@ -4060,7 +3946,6 @@ int HexagonInstrInfo::getMinValue(const MachineInstr &MI) const {
return 0;
}
-
// Returns opcode of the non-extended equivalent instruction.
short HexagonInstrInfo::getNonExtOpcode(const MachineInstr &MI) const {
// Check if the instruction has a register form that uses register in place
@@ -4086,7 +3971,6 @@ short HexagonInstrInfo::getNonExtOpcode(const MachineInstr &MI) const {
return -1;
}
-
bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond,
unsigned &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const {
if (Cond.empty())
@@ -4107,17 +3991,14 @@ bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond,
return true;
}
-
short HexagonInstrInfo::getPseudoInstrPair(const MachineInstr &MI) const {
return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Pseudo);
}
-
short HexagonInstrInfo::getRegForm(const MachineInstr &MI) const {
return Hexagon::getRegForm(MI.getOpcode());
}
-
// Return the number of bytes required to encode the instruction.
// Hexagon instructions are fixed length, 4 bytes, unless they
// use a constant extender, which requires another 4 bytes.
@@ -4156,13 +4037,11 @@ unsigned HexagonInstrInfo::getSize(const MachineInstr &MI) const {
return Size;
}
-
uint64_t HexagonInstrInfo::getType(const MachineInstr &MI) const {
const uint64_t F = MI.getDesc().TSFlags;
return (F >> HexagonII::TypePos) & HexagonII::TypeMask;
}
-
unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const {
const TargetSubtargetInfo &ST = MI.getParent()->getParent()->getSubtarget();
const InstrItineraryData &II = *ST.getInstrItineraryData();
@@ -4171,19 +4050,16 @@ unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const {
return IS.getUnits();
}
-
unsigned HexagonInstrInfo::getValidSubTargets(const unsigned Opcode) const {
const uint64_t F = get(Opcode).TSFlags;
return (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask;
}
-
// Calculate size of the basic block without debug instructions.
unsigned HexagonInstrInfo::nonDbgBBSize(const MachineBasicBlock *BB) const {
return nonDbgMICount(BB->instr_begin(), BB->instr_end());
}
-
unsigned HexagonInstrInfo::nonDbgBundleSize(
MachineBasicBlock::const_iterator BundleHead) const {
assert(BundleHead->isBundle() && "Not a bundle header");
@@ -4192,7 +4068,6 @@ unsigned HexagonInstrInfo::nonDbgBundleSize(
return nonDbgMICount(++MII, getBundleEnd(BundleHead.getInstrIterator()));
}
-
/// immediateExtend - Changes the instruction in place to one using an immediate
/// extender.
void HexagonInstrInfo::immediateExtend(MachineInstr &MI) const {
@@ -4208,7 +4083,6 @@ void HexagonInstrInfo::immediateExtend(MachineInstr &MI) const {
MO.addTargetFlag(HexagonII::HMOTF_ConstExtended);
}
-
bool HexagonInstrInfo::invertAndChangeJumpTarget(
MachineInstr &MI, MachineBasicBlock *NewTarget) const {
DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to BB#"
@@ -4229,7 +4103,6 @@ bool HexagonInstrInfo::invertAndChangeJumpTarget(
return true;
}
-
void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const {
/* +++ The code below is used to generate complete set of Hexagon Insn +++ */
MachineFunction::iterator A = MF.begin();
@@ -4248,7 +4121,6 @@ void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const {
/* --- The code above is used to generate complete set of Hexagon Insn --- */
}
-
// inverts the predication logic.
// p -> NotP
// NotP -> P
@@ -4258,7 +4130,6 @@ bool HexagonInstrInfo::reversePredSense(MachineInstr &MI) const {
return true;
}
-
// Reverse the branch prediction.
unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const {
int PredRevOpcode = -1;
@@ -4270,14 +4141,12 @@ unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const {
return PredRevOpcode;
}
-
// TODO: Add more rigorous validation.
bool HexagonInstrInfo::validateBranchCond(const ArrayRef<MachineOperand> &Cond)
const {
return Cond.empty() || (Cond[0].isImm() && (Cond.size() != 1));
}
-
short HexagonInstrInfo::xformRegToImmOffset(const MachineInstr &MI) const {
return Hexagon::xformRegToImmOffset(MI.getOpcode());
}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 2d184d1484e9..2358d4b7e4c0 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -16,9 +16,14 @@
#include "HexagonRegisterInfo.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include <cstdint>
+#include <vector>
#define GET_INSTRINFO_HEADER
#include "HexagonGenInstrInfo.inc"
@@ -29,9 +34,10 @@ struct EVT;
class HexagonSubtarget;
class HexagonInstrInfo : public HexagonGenInstrInfo {
- virtual void anchor();
const HexagonRegisterInfo RI;
+ virtual void anchor();
+
public:
explicit HexagonInstrInfo(HexagonSubtarget &ST);
@@ -260,7 +266,7 @@ public:
/// PredCost.
unsigned getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr &MI,
- unsigned *PredCost = 0) const override;
+ unsigned *PredCost = nullptr) const override;
/// Create machine specific model for scheduling.
DFAPacketizer *
@@ -378,7 +384,6 @@ public:
bool PredOpcodeHasJMP_c(unsigned Opcode) const;
bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;
-
short getAbsoluteForm(const MachineInstr &MI) const;
unsigned getAddrMode(const MachineInstr &MI) const;
unsigned getBaseAndOffset(const MachineInstr &MI, int &Offset,
@@ -421,13 +426,11 @@ public:
unsigned getUnits(const MachineInstr &MI) const;
unsigned getValidSubTargets(const unsigned Opcode) const;
-
/// getInstrTimingClassLatency - Compute the instruction latency of a given
/// instruction using Timing Class information, if available.
unsigned nonDbgBBSize(const MachineBasicBlock *BB) const;
unsigned nonDbgBundleSize(MachineBasicBlock::const_iterator BundleHead) const;
-
void immediateExtend(MachineInstr &MI) const;
bool invertAndChangeJumpTarget(MachineInstr &MI,
MachineBasicBlock* NewTarget) const;
@@ -438,6 +441,6 @@ public:
short xformRegToImmOffset(const MachineInstr &MI) const;
};
-}
+} // end namespace llvm
-#endif
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index 371b52108b9b..d83bcbc41553 100644
--- a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -15,33 +15,31 @@
namespace llvm {
- namespace Hexagon {
+namespace Hexagon {
+
const unsigned int StartPacket = 0x1;
const unsigned int EndPacket = 0x2;
- }
+} // end namespace Hexagon
/// Hexagon target-specific information for each MachineFunction.
class HexagonMachineFunctionInfo : public MachineFunctionInfo {
// SRetReturnReg - Some subtargets require that sret lowering includes
// returning the value of the returned struct in a register. This field
// holds the virtual register into which the sret argument is passed.
- unsigned SRetReturnReg;
- unsigned StackAlignBaseVReg; // Aligned-stack base register (virtual)
- unsigned StackAlignBasePhysReg; // (physical)
+ unsigned SRetReturnReg = 0;
+ unsigned StackAlignBaseVReg = 0; // Aligned-stack base register (virtual)
+ unsigned StackAlignBasePhysReg = 0; // (physical)
int VarArgsFrameIndex;
- bool HasClobberLR;
- bool HasEHReturn;
+ bool HasClobberLR = false;
+ bool HasEHReturn = false;
std::map<const MachineInstr*, unsigned> PacketInfo;
virtual void anchor();
public:
- HexagonMachineFunctionInfo() : SRetReturnReg(0), StackAlignBaseVReg(0),
- StackAlignBasePhysReg(0), HasClobberLR(0), HasEHReturn(false) {}
+ HexagonMachineFunctionInfo() = default;
- HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0),
- StackAlignBaseVReg(0), StackAlignBasePhysReg(0), HasClobberLR(0),
- HasEHReturn(false) {}
+ HexagonMachineFunctionInfo(MachineFunction &MF) {}
unsigned getSRetReturnReg() const { return SRetReturnReg; }
void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
@@ -75,6 +73,7 @@ public:
void setStackAlignBasePhysReg(unsigned R) { StackAlignBasePhysReg = R; }
unsigned getStackAlignBasePhysReg() const { return StackAlignBasePhysReg; }
};
-} // End llvm namespace
-#endif
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index e902f600e881..c9c4f95dbaaa 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -10,17 +10,27 @@
// This file contains the declarations of the HexagonTargetAsmInfo properties.
//
//===----------------------------------------------------------------------===//
+
#define DEBUG_TYPE "hexagon-sdata"
-#include "HexagonTargetMachine.h"
#include "HexagonTargetObjectFile.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Type.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -44,13 +54,21 @@ static cl::opt<bool> TraceGVPlacement("trace-gv-placement",
// (e.g. -debug and -debug-only=globallayout)
#define TRACE_TO(s, X) s << X
#ifdef NDEBUG
-#define TRACE(X) do { if (TraceGVPlacement) { TRACE_TO(errs(), X); } } while (0)
+#define TRACE(X) \
+ do { \
+ if (TraceGVPlacement) { \
+ TRACE_TO(errs(), X); \
+ } \
+ } while (false)
#else
-#define TRACE(X) \
- do { \
- if (TraceGVPlacement) { TRACE_TO(errs(), X); } \
- else { DEBUG( TRACE_TO(dbgs(), X) ); } \
- } while (0)
+#define TRACE(X) \
+ do { \
+ if (TraceGVPlacement) { \
+ TRACE_TO(errs(), X); \
+ } else { \
+ DEBUG(TRACE_TO(dbgs(), X)); \
+ } \
+ } while (false)
#endif
// Returns true if the section name is such that the symbol will be put
@@ -69,7 +87,6 @@ static bool isSmallDataSection(StringRef Sec) {
Sec.find(".scommon.") != StringRef::npos;
}
-
static const char *getSectionSuffixForSize(unsigned Size) {
switch (Size) {
default:
@@ -163,7 +180,6 @@ MCSection *HexagonTargetObjectFile::getExplicitSectionGlobal(
return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, Kind, TM);
}
-
/// Return true if this global value should be placed into small data/bss
/// section.
bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
@@ -232,17 +248,14 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
return true;
}
-
bool HexagonTargetObjectFile::isSmallDataEnabled() const {
return SmallDataThreshold > 0;
}
-
unsigned HexagonTargetObjectFile::getSmallDataSize() const {
return SmallDataThreshold;
}
-
/// Descends any type down to "elementary" components,
/// discovering the smallest addressable one.
/// If zero is returned, declaration will not be modified.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index 5feaffe6efb9..9a09a17767a6 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -1,5 +1,4 @@
-
-//=== HexagonMCCompound.cpp - Hexagon Compound checker -------===//
+//=== HexagonMCCompound.cpp - Hexagon Compound checker -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,18 +10,17 @@
// This file is looks at a packet and tries to form compound insns
//
//===----------------------------------------------------------------------===//
+
#include "Hexagon.h"
#include "MCTargetDesc/HexagonBaseInfo.h"
-#include "MCTargetDesc/HexagonMCShuffler.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/MC/MCAssembler.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
using namespace llvm;
using namespace Hexagon;
@@ -79,8 +77,7 @@ static const unsigned cmpgtn1BitOpcode[8] = {
};
// enum HexagonII::CompoundGroup
-namespace {
-unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
+static unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
switch (MI.getOpcode()) {
@@ -173,11 +170,9 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
return HexagonII::HCG_None;
}
-}
/// getCompoundOp - Return the index from 0-7 into the above opcode lists.
-namespace {
-unsigned getCompoundOp(MCInst const &HMCI) {
+static unsigned getCompoundOp(MCInst const &HMCI) {
const MCOperand &Predicate = HMCI.getOperand(0);
unsigned PredReg = Predicate.getReg();
@@ -198,11 +193,10 @@ unsigned getCompoundOp(MCInst const &HMCI) {
return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t;
}
}
-}
-namespace {
-MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
- MCInst *CompoundInsn = 0;
+static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
+ MCInst const &R) {
+ MCInst *CompoundInsn = nullptr;
unsigned compoundOpcode;
MCOperand Rs, Rt;
int64_t Value;
@@ -336,12 +330,10 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
return CompoundInsn;
}
-}
/// Non-Symmetrical. See if these two instructions are fit for compound pair.
-namespace {
-bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
- MCInst const &MIb, bool IsExtendedB) {
+static bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
+ MCInst const &MIb, bool IsExtendedB) {
unsigned MIaG = getCompoundCandidateGroup(MIa, IsExtendedA);
unsigned MIbG = getCompoundCandidateGroup(MIb, IsExtendedB);
// We have two candidates - check that this is the same register
@@ -353,10 +345,9 @@ bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA,
return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) &&
(MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg()));
}
-}
-namespace {
-bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
+static bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context,
+ MCInst &MCI) {
assert(HexagonMCInstrInfo::isBundle(MCI));
bool JExtended = false;
for (MCInst::iterator J =
@@ -367,8 +358,7 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
JExtended = true;
continue;
}
- if (llvm::HexagonMCInstrInfo::getType(MCII, *JumpInst) ==
- HexagonII::TypeJ) {
+ if (HexagonMCInstrInfo::getType(MCII, *JumpInst) == HexagonII::TypeJ) {
// Try to pair with another insn (B)undled with jump.
bool BExtended = false;
for (MCInst::iterator B =
@@ -401,7 +391,6 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
}
return false;
}
-}
/// tryCompound - Given a bundle check for compound insns when one
/// is found update the contents fo the bundle with the compound insn.
@@ -420,6 +409,4 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII,
// a compound is found.
while (lookForCompound(MCII, Context, MCI))
;
-
- return;
}
diff --git a/lib/Target/Hexagon/RDFCopy.h b/lib/Target/Hexagon/RDFCopy.h
index 517f17cc9c64..5ece11bd5ce4 100644
--- a/lib/Target/Hexagon/RDFCopy.h
+++ b/lib/Target/Hexagon/RDFCopy.h
@@ -1,4 +1,4 @@
-//===--- RDFCopy.h --------------------------------------------------------===//
+//===--- RDFCopy.h ----------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,23 +7,26 @@
//
//===----------------------------------------------------------------------===//
-#ifndef RDF_COPY_H
-#define RDF_COPY_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
+#define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
#include "RDFGraph.h"
#include <map>
#include <vector>
namespace llvm {
+
class MachineBasicBlock;
class MachineDominatorTree;
class MachineInstr;
namespace rdf {
+
struct CopyPropagation {
CopyPropagation(DataFlowGraph &dfg) : MDT(dfg.getDT()), DFG(dfg),
Trace(false) {}
- virtual ~CopyPropagation() {}
+
+ virtual ~CopyPropagation() = default;
bool run();
void trace(bool On) { Trace = On; }
@@ -49,7 +52,9 @@ namespace rdf {
void updateMap(NodeAddr<InstrNode*> IA);
bool scanBlock(MachineBasicBlock *B);
};
-} // namespace rdf
-} // namespace llvm
-#endif
+} // end namespace rdf
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H
diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp
index 33c3f03790f3..fa272ea1a76a 100644
--- a/lib/Target/Hexagon/RDFGraph.cpp
+++ b/lib/Target/Hexagon/RDFGraph.cpp
@@ -10,16 +10,31 @@
// Target-independent, SSA-based data flow graph for register data flow (RDF).
//
#include "RDFGraph.h"
-
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominanceFrontier.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <utility>
+#include <vector>
using namespace llvm;
using namespace rdf;
@@ -88,14 +103,12 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) {
return OS;
}
-namespace {
- void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
- const DataFlowGraph &G) {
- OS << Print<NodeId>(RA.Id, G) << '<'
- << Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>';
- if (RA.Addr->getFlags() & NodeAttrs::Fixed)
- OS << '!';
- }
+static void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
+ const DataFlowGraph &G) {
+ OS << Print<NodeId>(RA.Id, G) << '<'
+ << Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>';
+ if (RA.Addr->getFlags() & NodeAttrs::Fixed)
+ OS << '!';
}
template<>
@@ -183,9 +196,11 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) {
}
namespace {
+
template <typename T>
struct PrintListV {
PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {}
+
typedef T Type;
const NodeList &List;
const DataFlowGraph &G;
@@ -201,7 +216,8 @@ namespace {
}
return OS;
}
-}
+
+} // end anonymous namespace
template<>
raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) {
@@ -219,10 +235,10 @@ raw_ostream &operator<< (raw_ostream &OS,
// Print the target for calls and branches (for readability).
if (MI.isCall() || MI.isBranch()) {
MachineInstr::const_mop_iterator T =
- find_if(MI.operands(),
- [] (const MachineOperand &Op) -> bool {
- return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
- });
+ llvm::find_if(MI.operands(),
+ [] (const MachineOperand &Op) -> bool {
+ return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
+ });
if (T != MI.operands_end()) {
OS << ' ';
if (T->isMBB())
@@ -327,8 +343,8 @@ raw_ostream &operator<< (raw_ostream &OS,
return OS;
}
-} // namespace rdf
-} // namespace llvm
+} // end namespace rdf
+} // end namespace llvm
// Node allocation functions.
//
@@ -390,7 +406,6 @@ void NodeAllocator::clear() {
ActiveEnd = nullptr;
}
-
// Insert node NA after "this" in the circular chain.
void NodeBase::append(NodeAddr<NodeBase*> NA) {
NodeId Nx = Next;
@@ -401,7 +416,6 @@ void NodeBase::append(NodeAddr<NodeBase*> NA) {
}
}
-
// Fundamental node manipulator functions.
// Obtain the register reference from a reference node.
@@ -590,7 +604,6 @@ NodeAddr<BlockNode*> FuncNode::getEntryBlock(const DataFlowGraph &G) {
return findBlock(EntryB, G);
}
-
// Target operand information.
//
@@ -641,7 +654,6 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
return false;
}
-
RegisterRef RegisterAggr::normalize(RegisterRef RR) const {
RegisterId SuperReg = RR.Reg;
while (true) {
@@ -745,7 +757,6 @@ void RegisterAggr::print(raw_ostream &OS) const {
OS << " }";
}
-
//
// The data flow graph construction.
//
@@ -753,10 +764,9 @@ void RegisterAggr::print(raw_ostream &OS) const {
DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi)
- : LMI(), MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) {
+ : MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) {
}
-
// The implementation of the definition stack.
// Each register reference has its own definition stack. In particular,
// for a register references "Reg" and "Reg:subreg" will each have their
@@ -845,7 +855,6 @@ unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const {
return P;
}
-
// Register information.
// Get the list of references aliased to RR. Lane masks are ignored.
@@ -915,7 +924,6 @@ NodeAddr<NodeBase*> DataFlowGraph::cloneNode(const NodeAddr<NodeBase*> B) {
return NA;
}
-
// Allocation routines for specific node types/kinds.
NodeAddr<UseNode*> DataFlowGraph::newUse(NodeAddr<InstrNode*> Owner,
@@ -1248,7 +1256,6 @@ bool DataFlowGraph::alias(RegisterRef RA, RegisterRef RB) const {
return false;
}
-
// Clear all information in the graph.
void DataFlowGraph::reset() {
Memory.clear();
@@ -1256,7 +1263,6 @@ void DataFlowGraph::reset() {
Func = NodeAddr<FuncNode*>();
}
-
// Return the next reference node in the instruction node IA that is related
// to RA. Conceptually, two reference nodes are related if they refer to the
// same instance of a register access, but differ in flags or other minor
diff --git a/lib/Target/Hexagon/RDFGraph.h b/lib/Target/Hexagon/RDFGraph.h
index 871062ff2b05..49d78a8b22b5 100644
--- a/lib/Target/Hexagon/RDFGraph.h
+++ b/lib/Target/Hexagon/RDFGraph.h
@@ -1,4 +1,4 @@
-//===--- RDFGraph.h -------------------------------------------------------===//
+//===--- RDFGraph.h ---------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -221,20 +221,25 @@
// The statement s5 has two use nodes for t0: u7" and u9". The quotation
// mark " indicates that the node is a shadow.
//
-#ifndef RDF_GRAPH_H
-#define RDF_GRAPH_H
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
+#define LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/LaneBitmask.h"
#include "llvm/Support/Allocator.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Timer.h"
#include "llvm/Target/TargetRegisterInfo.h"
-
+#include <cassert>
+#include <cstdint>
+#include <cstring>
#include <functional>
#include <map>
#include <set>
#include <unordered_map>
+#include <utility>
#include <vector>
// RDF uses uint32_t to refer to registers. This is to ensure that the type
@@ -243,6 +248,7 @@
static_assert(sizeof(uint32_t) == sizeof(unsigned), "Those should be equal");
namespace llvm {
+
class MachineBasicBlock;
class MachineFunction;
class MachineInstr;
@@ -252,6 +258,7 @@ namespace llvm {
class TargetInstrInfo;
namespace rdf {
+
typedef uint32_t NodeId;
typedef uint32_t RegisterId;
@@ -293,9 +300,11 @@ namespace rdf {
static uint16_t set_type(uint16_t A, uint16_t T) {
return (A & ~TypeMask) | T;
}
+
static uint16_t set_kind(uint16_t A, uint16_t K) {
return (A & ~KindMask) | K;
}
+
static uint16_t set_flags(uint16_t A, uint16_t F) {
return (A & ~FlagMask) | F;
}
@@ -326,9 +335,14 @@ namespace rdf {
};
template <typename T> struct NodeAddr {
- NodeAddr() : Addr(nullptr), Id(0) {}
+ NodeAddr() : Addr(nullptr) {}
NodeAddr(T A, NodeId I) : Addr(A), Id(I) {}
+ // Type cast (casting constructor). The reason for having this class
+ // instead of std::pair.
+ template <typename S> NodeAddr(const NodeAddr<S> &NA)
+ : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}
+
bool operator== (const NodeAddr<T> &NA) const {
assert((Addr == NA.Addr) == (Id == NA.Id));
return Addr == NA.Addr;
@@ -336,13 +350,9 @@ namespace rdf {
bool operator!= (const NodeAddr<T> &NA) const {
return !operator==(NA);
}
- // Type cast (casting constructor). The reason for having this class
- // instead of std::pair.
- template <typename S> NodeAddr(const NodeAddr<S> &NA)
- : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}
T Addr;
- NodeId Id;
+ NodeId Id = 0;
};
struct NodeBase;
@@ -366,17 +376,20 @@ namespace rdf {
struct NodeAllocator {
// Amount of storage for a single node.
enum { NodeMemSize = 32 };
+
NodeAllocator(uint32_t NPB = 4096)
: NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)),
- IndexMask((1 << BitsPerIndex)-1), ActiveEnd(nullptr) {
+ IndexMask((1 << BitsPerIndex)-1) {
assert(isPowerOf2_32(NPB));
}
+
NodeBase *ptr(NodeId N) const {
uint32_t N1 = N-1;
uint32_t BlockN = N1 >> BitsPerIndex;
uint32_t Offset = (N1 & IndexMask) * NodeMemSize;
return reinterpret_cast<NodeBase*>(Blocks[BlockN]+Offset);
}
+
NodeId id(const NodeBase *P) const;
NodeAddr<NodeBase*> New();
void clear();
@@ -384,6 +397,7 @@ namespace rdf {
private:
void startNewBlock();
bool needNewBlock();
+
uint32_t makeId(uint32_t Block, uint32_t Index) const {
// Add 1 to the id, to avoid the id of 0, which is treated as "null".
return ((Block << BitsPerIndex) | Index) + 1;
@@ -392,7 +406,7 @@ namespace rdf {
const uint32_t NodesPerBlock;
const uint32_t BitsPerIndex;
const uint32_t IndexMask;
- char *ActiveEnd;
+ char *ActiveEnd = nullptr;
std::vector<char*> Blocks;
typedef BumpPtrAllocatorImpl<MallocAllocator, 65536> AllocatorTy;
AllocatorTy MemPool;
@@ -405,6 +419,7 @@ namespace rdf {
RegisterRef() : RegisterRef(0) {}
explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll())
: Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {}
+
operator bool() const { return Reg != 0 && Mask.any(); }
bool operator== (const RegisterRef &RR) const {
return Reg == RR.Reg && Mask == RR.Mask;
@@ -420,7 +435,8 @@ namespace rdf {
struct TargetOperandInfo {
TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {}
- virtual ~TargetOperandInfo() {}
+ virtual ~TargetOperandInfo() = default;
+
virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const;
virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const;
virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const;
@@ -428,7 +444,6 @@ namespace rdf {
const TargetInstrInfo &TII;
};
-
// Packed register reference. Only used for storage.
struct PackedRegisterRef {
RegisterId Reg;
@@ -442,11 +457,13 @@ namespace rdf {
template <typename T, unsigned N = 32>
struct IndexedSet {
IndexedSet() : Map() { Map.reserve(N); }
+
T get(uint32_t Idx) const {
// Index Idx corresponds to Map[Idx-1].
assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size());
return Map[Idx-1];
}
+
uint32_t insert(T Val) {
// Linear search.
auto F = llvm::find(Map, Val);
@@ -455,11 +472,13 @@ namespace rdf {
Map.push_back(Val);
return Map.size(); // Return actual_index + 1.
}
+
uint32_t find(T Val) const {
auto F = llvm::find(Map, Val);
assert(F != Map.end());
return F - Map.begin();
}
+
private:
std::vector<T> Map;
};
@@ -478,12 +497,14 @@ namespace rdf {
assert(LM.any());
return LM.all() ? 0 : find(LM);
}
+
PackedRegisterRef pack(RegisterRef RR) {
return { RR.Reg, getIndexForLaneMask(RR.Mask) };
}
PackedRegisterRef pack(RegisterRef RR) const {
return { RR.Reg, getIndexForLaneMask(RR.Mask) };
}
+
RegisterRef unpack(PackedRegisterRef PR) const {
return RegisterRef(PR.Reg, getLaneMaskForIndex(PR.MaskId));
}
@@ -491,11 +512,8 @@ namespace rdf {
struct RegisterAggr {
RegisterAggr(const TargetRegisterInfo &tri)
- : Masks(), ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false),
- TRI(tri) {}
- RegisterAggr(const RegisterAggr &RG)
- : Masks(RG.Masks), ExpAliasUnits(RG.ExpAliasUnits),
- CheckUnits(RG.CheckUnits), TRI(RG.TRI) {}
+ : ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false), TRI(tri) {}
+ RegisterAggr(const RegisterAggr &RG) = default;
bool empty() const { return Masks.empty(); }
bool hasAliasOf(RegisterRef RR) const;
@@ -530,11 +548,11 @@ namespace rdf {
const TargetRegisterInfo &TRI;
};
-
struct NodeBase {
public:
// Make sure this is a POD.
NodeBase() = default;
+
uint16_t getType() const { return NodeAttrs::type(Attrs); }
uint16_t getKind() const { return NodeAttrs::kind(Attrs); }
uint16_t getFlags() const { return NodeAttrs::flags(Attrs); }
@@ -596,29 +614,36 @@ namespace rdf {
struct RefNode : public NodeBase {
RefNode() = default;
+
RegisterRef getRegRef(const DataFlowGraph &G) const;
+
MachineOperand &getOp() {
assert(!(getFlags() & NodeAttrs::PhiRef));
return *Ref.Op;
}
+
void setRegRef(RegisterRef RR, DataFlowGraph &G);
void setRegRef(MachineOperand *Op, DataFlowGraph &G);
+
NodeId getReachingDef() const {
return Ref.RD;
}
void setReachingDef(NodeId RD) {
Ref.RD = RD;
}
+
NodeId getSibling() const {
return Ref.Sib;
}
void setSibling(NodeId Sib) {
Ref.Sib = Sib;
}
+
bool isUse() const {
assert(getType() == NodeAttrs::Ref);
return getKind() == NodeAttrs::Use;
}
+
bool isDef() const {
assert(getType() == NodeAttrs::Ref);
return getKind() == NodeAttrs::Def;
@@ -702,6 +727,7 @@ namespace rdf {
MachineBasicBlock *getCode() const {
return CodeNode::getCode<MachineBasicBlock*>();
}
+
void addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G);
};
@@ -709,6 +735,7 @@ namespace rdf {
MachineFunction *getCode() const {
return CodeNode::getCode<MachineFunction*>();
}
+
NodeAddr<BlockNode*> findBlock(const MachineBasicBlock *BB,
const DataFlowGraph &G) const;
NodeAddr<BlockNode*> getEntryBlock(const DataFlowGraph &G);
@@ -723,6 +750,7 @@ namespace rdf {
template <typename T> T ptr(NodeId N) const {
return static_cast<T>(ptr(N));
}
+
NodeId id(const NodeBase *P) const;
template <typename T> NodeAddr<T> addr(NodeId N) const {
@@ -738,13 +766,17 @@ namespace rdf {
struct DefStack {
DefStack() = default;
+
bool empty() const { return Stack.empty() || top() == bottom(); }
+
private:
typedef NodeAddr<DefNode*> value_type;
struct Iterator {
typedef DefStack::value_type value_type;
+
Iterator &up() { Pos = DS.nextUp(Pos); return *this; }
Iterator &down() { Pos = DS.nextDown(Pos); return *this; }
+
value_type operator*() const {
assert(Pos >= 1);
return DS.Stack[Pos-1];
@@ -755,14 +787,17 @@ namespace rdf {
}
bool operator==(const Iterator &It) const { return Pos == It.Pos; }
bool operator!=(const Iterator &It) const { return Pos != It.Pos; }
+
private:
Iterator(const DefStack &S, bool Top);
+
// Pos-1 is the index in the StorageType object that corresponds to
// the top of the DefStack.
const DefStack &DS;
unsigned Pos;
friend struct DefStack;
};
+
public:
typedef Iterator iterator;
iterator top() const { return Iterator(*this, true); }
@@ -773,14 +808,18 @@ namespace rdf {
void pop();
void start_block(NodeId N);
void clear_block(NodeId N);
+
private:
friend struct Iterator;
typedef std::vector<value_type> StorageType;
+
bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const {
return (P.Addr == nullptr) && (N == 0 || P.Id == N);
}
+
unsigned nextUp(unsigned P) const;
unsigned nextDown(unsigned P) const;
+
StorageType Stack;
};
@@ -819,6 +858,7 @@ namespace rdf {
if (RemoveFromOwner)
removeFromOwner(UA);
}
+
void unlinkDef(NodeAddr<DefNode*> DA, bool RemoveFromOwner) {
unlinkDefDF(DA);
if (RemoveFromOwner)
@@ -831,23 +871,28 @@ namespace rdf {
return BA.Addr->getType() == NodeAttrs::Ref &&
BA.Addr->getKind() == Kind;
}
+
template <uint16_t Kind>
static bool IsCode(const NodeAddr<NodeBase*> BA) {
return BA.Addr->getType() == NodeAttrs::Code &&
BA.Addr->getKind() == Kind;
}
+
static bool IsDef(const NodeAddr<NodeBase*> BA) {
return BA.Addr->getType() == NodeAttrs::Ref &&
BA.Addr->getKind() == NodeAttrs::Def;
}
+
static bool IsUse(const NodeAddr<NodeBase*> BA) {
return BA.Addr->getType() == NodeAttrs::Ref &&
BA.Addr->getKind() == NodeAttrs::Use;
}
+
static bool IsPhi(const NodeAddr<NodeBase*> BA) {
return BA.Addr->getType() == NodeAttrs::Code &&
BA.Addr->getKind() == NodeAttrs::Phi;
}
+
static bool IsPreservingDef(const NodeAddr<DefNode*> DA) {
uint16_t Flags = DA.Addr->getFlags();
return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef);
@@ -902,6 +947,7 @@ namespace rdf {
void unlinkUseDF(NodeAddr<UseNode*> UA);
void unlinkDefDF(NodeAddr<DefNode*> DA);
+
void removeFromOwner(NodeAddr<RefNode*> RA) {
NodeAddr<InstrNode*> IA = RA.Addr->getOwner(*this);
IA.Addr->removeMember(RA, *this);
@@ -967,7 +1013,6 @@ namespace rdf {
return MM;
}
-
// Optionally print the lane mask, if it is not ~0.
struct PrintLaneMaskOpt {
PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {}
@@ -991,7 +1036,9 @@ namespace rdf {
PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g)
: Print<NodeAddr<T>>(x, g) {}
};
-} // namespace rdf
-} // namespace llvm
-#endif // RDF_GRAPH_H
+} // end namespace rdf
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 6f0fdddd7d55..92d3c001df94 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -43,6 +44,11 @@ bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
return MipsDAGToDAGISel::runOnMachineFunction(MF);
}
+void MipsSEDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTreeWrapperPass>();
+ SelectionDAGISel::getAnalysisUsage(AU);
+}
+
void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
MachineFunction &MF) {
MachineInstrBuilder MIB(MF, &MI);
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 2a8e5877e848..f89a350cab04 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -28,6 +28,8 @@ private:
bool runOnMachineFunction(MachineFunction &MF) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
void addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
MachineFunction &MF);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index aa3ffde24b99..2b9195b095e1 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3981,40 +3981,46 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
static bool isFunctionGlobalAddress(SDValue Callee);
static bool
-resideInSameModule(SDValue Callee, Reloc::Model RelMod) {
+resideInSameSection(const Function *Caller, SDValue Callee,
+ const TargetMachine &TM) {
// If !G, Callee can be an external symbol.
GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
- if (!G) return false;
+ if (!G)
+ return false;
const GlobalValue *GV = G->getGlobal();
-
- if (GV->isDeclaration()) return false;
-
- switch(GV->getLinkage()) {
- default: llvm_unreachable("unknow linkage type");
- case GlobalValue::AvailableExternallyLinkage:
- case GlobalValue::ExternalWeakLinkage:
+ if (!GV->isStrongDefinitionForLinker())
return false;
- // Callee with weak linkage is allowed if it has hidden or protected
- // visibility
- case GlobalValue::LinkOnceAnyLinkage:
- case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions
- case GlobalValue::WeakAnyLinkage:
- case GlobalValue::WeakODRLinkage: // e.g. c++ template instantiation
- if (GV->hasDefaultVisibility())
+ // Any explicitly-specified sections and section prefixes must also match.
+ // Also, if we're using -ffunction-sections, then each function is always in
+ // a different section (the same is true for COMDAT functions).
+ if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
+ GV->getSection() != Caller->getSection())
+ return false;
+ if (const auto *F = dyn_cast<Function>(GV)) {
+ if (F->getSectionPrefix() != Caller->getSectionPrefix())
return false;
-
- case GlobalValue::ExternalLinkage:
- case GlobalValue::InternalLinkage:
- case GlobalValue::PrivateLinkage:
- break;
}
- // With '-fPIC', calling default visiblity function need insert 'nop' after
- // function call, no matter that function resides in same module or not, so
- // we treat it as in different module.
- if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility())
+ // If the callee might be interposed, then we can't assume the ultimate call
+ // target will be in the same section. Even in cases where we can assume that
+ // interposition won't happen, in any case where the linker might insert a
+ // stub to allow for interposition, we must generate code as though
+ // interposition might occur. To understand why this matters, consider a
+ // situation where: a -> b -> c where the arrows indicate calls. b and c are
+ // in the same section, but a is in a different module (i.e. has a different
+ // TOC base pointer). If the linker allows for interposition between b and c,
+ // then it will generate a stub for the call edge between b and c which will
+ // save the TOC pointer into the designated stack slot allocated by b. If we
+ // return true here, and therefore allow a tail call between b and c, that
+ // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
+ // pointer into the stack slot allocated by a (where the a -> b stub saved
+ // a's TOC base pointer). If we're not considering a tail call, but rather,
+ // whether a nop is needed after the call instruction in b, because the linker
+ // will insert a stub, it might complain about a missing nop if we omit it
+ // (although many don't complain in this case).
+ if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
return false;
return true;
@@ -4130,11 +4136,11 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
!isa<ExternalSymbolSDNode>(Callee))
return false;
- // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI
- // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
- // module.
+ // Check if Callee resides in the same section, because for now, PPC64 SVR4
+ // ABI (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another
+ // section.
// ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
- if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel()))
+ if (!resideInSameSection(MF.getFunction(), Callee, getTargetMachine()))
return false;
// TCO allows altering callee ABI, so we don't have to check further.
@@ -4592,14 +4598,6 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
return CallOpc;
}
-static
-bool isLocalCall(const SDValue &Callee)
-{
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- return G->getGlobal()->isStrongDefinitionForLinker();
- return false;
-}
-
SDValue PPCTargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@@ -4701,6 +4699,7 @@ SDValue PPCTargetLowering::FinishCall(
// stack frame. If caller and callee belong to the same module (and have the
// same TOC), the NOP will remain unchanged.
+ MachineFunction &MF = DAG.getMachineFunction();
if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
!isPatchPoint) {
if (CallOpc == PPCISD::BCTRL) {
@@ -4724,11 +4723,11 @@ SDValue PPCTargetLowering::FinishCall(
// The address needs to go after the chain input but before the flag (or
// any other variadic arguments).
Ops.insert(std::next(Ops.begin()), AddTOC);
- } else if ((CallOpc == PPCISD::CALL) &&
- (!isLocalCall(Callee) ||
- DAG.getTarget().getRelocationModel() == Reloc::PIC_))
+ } else if (CallOpc == PPCISD::CALL &&
+ !resideInSameSection(MF.getFunction(), Callee, DAG.getTarget())) {
// Otherwise insert NOP for non-local calls.
CallOpc = PPCISD::CALL_NOP;
+ }
}
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index d42e1187ce64..e1825ca1eda1 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -70,7 +70,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
EmitFunctionBody();
// Emit the XRay table for this function.
- EmitXRayTable();
+ emitXRayTable();
// We didn't modify anything.
return false;
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 1deefe1231ca..cd690442bb9f 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -373,6 +373,10 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr
: std::next(MBBI);
+ PI = skipDebugInstructionsBackward(PI, MBB.begin());
+ if (NI != nullptr)
+ NI = skipDebugInstructionsForward(NI, MBB.end());
+
unsigned Opc = PI->getOpcode();
int Offset = 0;
@@ -2586,6 +2590,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0;
I = MBB.erase(I);
+ auto InsertPos = skipDebugInstructionsForward(I, MBB.end());
if (!reserveCallFrame) {
// If the stack pointer can be changed after prologue, turn the
@@ -2615,7 +2620,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
if (HasDwarfEHHandlers && !isDestroy &&
MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
- BuildCFI(MBB, I, DL,
+ BuildCFI(MBB, InsertPos, DL,
MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
if (Amount == 0)
@@ -2629,7 +2634,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// If this is a callee-pop calling convention, emit a CFA adjust for
// the amount the callee popped.
if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
- BuildCFI(MBB, I, DL,
+ BuildCFI(MBB, InsertPos, DL,
MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
// Add Amount to SP to destroy a frame, or subtract to setup.
@@ -2640,13 +2645,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// Merge with any previous or following adjustment instruction. Note: the
// instructions merged with here do not have CFI, so their stack
// adjustments do not feed into CfaAdjustment.
- StackAdjustment += mergeSPUpdates(MBB, I, true);
- StackAdjustment += mergeSPUpdates(MBB, I, false);
+ StackAdjustment += mergeSPUpdates(MBB, InsertPos, true);
+ StackAdjustment += mergeSPUpdates(MBB, InsertPos, false);
if (StackAdjustment) {
if (!(Fn->optForMinSize() &&
- adjustStackWithPops(MBB, I, DL, StackAdjustment)))
- BuildStackAdjustment(MBB, I, DL, StackAdjustment,
+ adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment)))
+ BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment,
/*InEpilogue=*/false);
}
}
@@ -2662,8 +2667,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// TODO: When not using precise CFA, we also need to adjust for the
// InternalAmt here.
if (CfaAdjustment) {
- BuildCFI(MBB, I, DL, MCCFIInstruction::createAdjustCfaOffset(
- nullptr, CfaAdjustment));
+ BuildCFI(MBB, InsertPos, DL,
+ MCCFIInstruction::createAdjustCfaOffset(nullptr,
+ CfaAdjustment));
}
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b293dfa98f82..fd2189397279 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -11474,6 +11474,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
const SmallBitVector &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
+ SmallVector<int, 4> WidenedMask;
+ if (!canWidenShuffleElements(Mask, WidenedMask))
+ return SDValue();
+
// TODO: If minimizing size and one of the inputs is a zero vector and the
// the zero vector has only one use, we could use a VPERM2X128 to save the
// instruction bytes needed to explicitly generate the zero vector.
@@ -11521,15 +11525,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
// [6] - ignore
// [7] - zero high half of destination
- int MaskLO = Mask[0];
- if (MaskLO == SM_SentinelUndef)
- MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1];
-
- int MaskHI = Mask[2];
- if (MaskHI == SM_SentinelUndef)
- MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3];
+ int MaskLO = WidenedMask[0] < 0 ? 0 : WidenedMask[0];
+ int MaskHI = WidenedMask[1] < 0 ? 0 : WidenedMask[1];
- unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4;
+ unsigned PermMask = MaskLO | (MaskHI << 4);
// If either input is a zero vector, replace it with an undef input.
// Shuffle mask values < 4 are selecting elements of V1.
@@ -11538,16 +11537,16 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
// selecting the zero vector and setting the zero mask bit.
if (IsV1Zero) {
V1 = DAG.getUNDEF(VT);
- if (MaskLO < 4)
+ if (MaskLO < 2)
PermMask = (PermMask & 0xf0) | 0x08;
- if (MaskHI < 4)
+ if (MaskHI < 2)
PermMask = (PermMask & 0x0f) | 0x80;
}
if (IsV2Zero) {
V2 = DAG.getUNDEF(VT);
- if (MaskLO >= 4)
+ if (MaskLO >= 2)
PermMask = (PermMask & 0xf0) | 0x08;
- if (MaskHI >= 4)
+ if (MaskHI >= 2)
PermMask = (PermMask & 0x0f) | 0x80;
}
@@ -12012,11 +12011,9 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!");
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
- SmallVector<int, 4> WidenedMask;
- if (canWidenShuffleElements(Mask, WidenedMask))
- if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
- return V;
+ if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return V;
if (V2.isUndef()) {
// Check for being able to broadcast a single element.
@@ -12107,11 +12104,9 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!");
- SmallVector<int, 4> WidenedMask;
- if (canWidenShuffleElements(Mask, WidenedMask))
- if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
- return V;
+ if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return V;
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
Zeroable, Subtarget, DAG))
@@ -12605,33 +12600,72 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
if (!canWidenShuffleElements(Mask, WidenedMask))
return SDValue();
+ // Check for patterns which can be matched with a single insert of a 256-bit
+ // subvector.
+ bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask,
+ {0, 1, 2, 3, 0, 1, 2, 3});
+ if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask,
+ {0, 1, 2, 3, 8, 9, 10, 11})) {
+ MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4);
+ SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
+ OnlyUsesV1 ? V1 : V2,
+ DAG.getIntPtrConstant(0, DL));
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
+ }
+
+ assert(WidenedMask.size() == 4);
+
+ // See if this is an insertion of the lower 128-bits of V2 into V1.
+ bool IsInsert = true;
+ int V2Index = -1;
+ for (int i = 0; i < 4; ++i) {
+ assert(WidenedMask[i] >= -1);
+ if (WidenedMask[i] < 0)
+ continue;
+
+ // Make sure all V1 subvectors are in place.
+ if (WidenedMask[i] < 4) {
+ if (WidenedMask[i] != i) {
+ IsInsert = false;
+ break;
+ }
+ } else {
+ // Make sure we only have a single V2 index and its the lowest 128-bits.
+ if (V2Index >= 0 || WidenedMask[i] != 4) {
+ IsInsert = false;
+ break;
+ }
+ V2Index = i;
+ }
+ }
+ if (IsInsert && V2Index >= 0) {
+ MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2);
+ SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
+ DAG.getIntPtrConstant(0, DL));
+ return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL);
+ }
+
+ // Try to lower to to vshuf64x2/vshuf32x4.
SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
+ unsigned PermMask = 0;
// Insure elements came from the same Op.
- int MaxOp1Index = VT.getVectorNumElements()/2 - 1;
- for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
- if (WidenedMask[i] == SM_SentinelZero)
- return SDValue();
- if (WidenedMask[i] == SM_SentinelUndef)
+ for (int i = 0; i < 4; ++i) {
+ assert(WidenedMask[i] >= -1);
+ if (WidenedMask[i] < 0)
continue;
- SDValue Op = WidenedMask[i] > MaxOp1Index ? V2 : V1;
- unsigned OpIndex = (i < Size/2) ? 0 : 1;
+ SDValue Op = WidenedMask[i] >= 4 ? V2 : V1;
+ unsigned OpIndex = i / 2;
if (Ops[OpIndex].isUndef())
Ops[OpIndex] = Op;
else if (Ops[OpIndex] != Op)
return SDValue();
- }
- // Form a 128-bit permutation.
- // Convert the 64-bit shuffle mask selection values into 128-bit selection
- // bits defined by a vshuf64x2 instruction's immediate control byte.
- unsigned PermMask = 0, Imm = 0;
- unsigned ControlBitsNum = WidenedMask.size() / 2;
-
- for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
- // Use first element in place of undef mask.
- Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i];
- PermMask |= (Imm % WidenedMask.size()) << (i * ControlBitsNum);
+ // Convert the 128-bit shuffle mask selection values into 128-bit selection
+ // bits defined by a vshuf64x2 instruction's immediate control byte.
+ PermMask |= (WidenedMask[i] % 4) << (i * 2);
}
return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],
@@ -13051,10 +13085,10 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) {
int NumElements = Mask.size();
- int NumV1Elements = 0, NumV2Elements = 0, NumSentinelElements = 0;
+ int NumV1Elements = 0, NumV2Elements = 0;
for (int M : Mask)
if (M < 0)
- ++NumSentinelElements;
+ continue;
else if (M < NumElements)
++NumV1Elements;
else
@@ -18660,8 +18694,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_3OP_IMM8_MASK:
- case INTR_TYPE_3OP_MASK:
- case INSERT_SUBVEC: {
+ case INTR_TYPE_3OP_MASK: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
@@ -18670,13 +18703,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK)
Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);
- else if (IntrData->Type == INSERT_SUBVEC) {
- // imm should be adapted to ISD::INSERT_SUBVECTOR behavior
- assert(isa<ConstantSDNode>(Src3) && "Expected a ConstantSDNode here!");
- unsigned Imm = cast<ConstantSDNode>(Src3)->getZExtValue();
- Imm *= Src2.getSimpleValueType().getVectorNumElements();
- Src3 = DAG.getTargetConstant(Imm, dl, MVT::i32);
- }
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
@@ -28693,6 +28719,29 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1),
Op.getOperand(2));
}
+ case ISD::INSERT_SUBVECTOR: {
+ unsigned EltSize = EltVT.getSizeInBits();
+ if (EltSize != 32 && EltSize != 64)
+ return false;
+ MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
+ // Only change element size, not type.
+ if (VT.isInteger() != OpEltVT.isInteger())
+ return false;
+ uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize;
+ SDValue Op0 = DAG.getBitcast(VT, Op.getOperand(0));
+ DCI.AddToWorklist(Op0.getNode());
+ // Op1 needs to be bitcasted to a smaller vector with the same element type.
+ SDValue Op1 = Op.getOperand(1);
+ MVT Op1VT = MVT::getVectorVT(EltVT,
+ Op1.getSimpleValueType().getSizeInBits() / EltSize);
+ Op1 = DAG.getBitcast(Op1VT, Op1);
+ DCI.AddToWorklist(Op1.getNode());
+ DCI.CombineTo(OrigOp.getNode(),
+ DAG.getNode(Opcode, DL, VT, Op0, Op1,
+ DAG.getConstant(Imm, DL, MVT::i8)));
+ return true;
+ }
}
return false;
@@ -31784,6 +31833,83 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify
+/// the codegen.
+/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) )
+static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget,
+ SDLoc &DL) {
+ assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode");
+ SDValue Src = N->getOperand(0);
+ unsigned Opcode = Src.getOpcode();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ EVT VT = N->getValueType(0);
+ EVT SrcVT = Src.getValueType();
+
+ auto IsRepeatedOpOrOneUseConstant = [](SDValue Op0, SDValue Op1) {
+ // TODO: Add extra cases where we can truncate both inputs for the
+ // cost of one (or none).
+ // e.g. TRUNC( BINOP( EXT( X ), EXT( Y ) ) ) --> BINOP( X, Y )
+ if (Op0 == Op1)
+ return true;
+
+ SDValue BC0 = peekThroughOneUseBitcasts(Op0);
+ SDValue BC1 = peekThroughOneUseBitcasts(Op1);
+ return ISD::isBuildVectorOfConstantSDNodes(BC0.getNode()) ||
+ ISD::isBuildVectorOfConstantSDNodes(BC1.getNode());
+ };
+
+ auto TruncateArithmetic = [&](SDValue N0, SDValue N1) {
+ SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
+ SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ return DAG.getNode(Opcode, DL, VT, Trunc0, Trunc1);
+ };
+
+ // Don't combine if the operation has other uses.
+ if (!N->isOnlyUserOf(Src.getNode()))
+ return SDValue();
+
+ // Only support vector truncation for now.
+ // TODO: i64 scalar math would benefit as well.
+ if (!VT.isVector())
+ return SDValue();
+
+ // In most cases its only worth pre-truncating if we're only facing the cost
+ // of one truncation.
+ // i.e. if one of the inputs will constant fold or the input is repeated.
+ switch (Opcode) {
+ case ISD::AND:
+ case ISD::XOR:
+ case ISD::OR: {
+ SDValue Op0 = Src.getOperand(0);
+ SDValue Op1 = Src.getOperand(1);
+ if (TLI.isOperationLegalOrPromote(Opcode, VT) &&
+ IsRepeatedOpOrOneUseConstant(Op0, Op1))
+ return TruncateArithmetic(Op0, Op1);
+ break;
+ }
+
+ case ISD::MUL:
+ // X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its
+ // better to truncate if we have the chance.
+ if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) &&
+ !TLI.isOperationLegal(Opcode, SrcVT))
+ return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1));
+ LLVM_FALLTHROUGH;
+ case ISD::ADD: {
+ SDValue Op0 = Src.getOperand(0);
+ SDValue Op1 = Src.getOperand(1);
+ if (TLI.isOperationLegal(Opcode, VT) &&
+ IsRepeatedOpOrOneUseConstant(Op0, Op1))
+ return TruncateArithmetic(Op0, Op1);
+ break;
+ }
+ }
+
+ return SDValue();
+}
+
/// Truncate a group of v4i32 into v16i8/v8i16 using X86ISD::PACKUS.
static SDValue
combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG,
@@ -31970,6 +32096,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
SDValue Src = N->getOperand(0);
SDLoc DL(N);
+ // Attempt to pre-truncate inputs to arithmetic ops instead.
+ if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL))
+ return V;
+
// Try to detect AVG pattern first.
if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
return Avg;
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index da7437ea0ccb..908053e1342d 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -650,33 +650,6 @@ multiclass vextract_for_size<int Opcode,
From.ZSuffix # "rrkz")
To.KRCWM:$mask, From.RC:$src1,
(EXTRACT_get_vextract_imm To.RC:$ext))>;
-
- // Intrinsic call with masking.
- def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
- "x" # To.NumElts # "_" # From.Size)
- From.RC:$src1, (iPTR imm:$idx), To.RC:$src0, To.MRC:$mask),
- (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
- From.ZSuffix # "rrk")
- To.RC:$src0,
- (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
- From.RC:$src1, imm:$idx)>;
-
- // Intrinsic call with zero-masking.
- def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
- "x" # To.NumElts # "_" # From.Size)
- From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, To.MRC:$mask),
- (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
- From.ZSuffix # "rrkz")
- (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
- From.RC:$src1, imm:$idx)>;
-
- // Intrinsic call without masking.
- def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
- "x" # To.NumElts # "_" # From.Size)
- From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
- (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
- From.ZSuffix # "rr")
- From.RC:$src1, imm:$idx)>;
}
// Codegen pattern for the alternative types
@@ -6871,18 +6844,18 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
}
let isCodeGenOnly = 1 in {
- defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem,
- load, "ucomiss">, PS, EVEX, VEX_LIG,
+ defm Int_VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
+ sse_load_f32, "ucomiss">, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
- defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem,
- load, "ucomisd">, PD, EVEX,
+ defm Int_VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
+ sse_load_f64, "ucomisd">, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
- defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem,
- load, "comiss">, PS, EVEX, VEX_LIG,
+ defm Int_VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
+ sse_load_f32, "comiss">, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
- defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem,
- load, "comisd">, PD, EVEX,
+ defm Int_VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
+ sse_load_f64, "comisd">, PD, EVEX,
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
}
}
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 9d6a89363044..4cd6ae563f03 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2373,6 +2373,23 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
Sched<[WriteFAddLd, ReadAfterLd]>;
}
+// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp
+multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode,
+ ValueType vt, Operand memop,
+ ComplexPattern mem_cpat, string OpcodeStr> {
+ def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
+ IIC_SSE_COMIS_RR>,
+ Sched<[WriteFAdd]>;
+ def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (vt RC:$src1),
+ mem_cpat:$src2))],
+ IIC_SSE_COMIS_RM>,
+ Sched<[WriteFAddLd, ReadAfterLd]>;
+}
+
let Defs = [EFLAGS] in {
defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
"ucomiss">, PS, VEX, VEX_LIG;
@@ -2386,15 +2403,15 @@ let Defs = [EFLAGS] in {
}
let isCodeGenOnly = 1 in {
- defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
- load, "ucomiss">, PS, VEX;
- defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
- load, "ucomisd">, PD, VEX;
-
- defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
- load, "comiss">, PS, VEX;
- defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
- load, "comisd">, PD, VEX;
+ defm Int_VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
+ sse_load_f32, "ucomiss">, PS, VEX;
+ defm Int_VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
+ sse_load_f64, "ucomisd">, PD, VEX;
+
+ defm Int_VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
+ sse_load_f32, "comiss">, PS, VEX;
+ defm Int_VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
+ sse_load_f64, "comisd">, PD, VEX;
}
defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
"ucomiss">, PS;
@@ -2409,15 +2426,15 @@ let Defs = [EFLAGS] in {
}
let isCodeGenOnly = 1 in {
- defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
- load, "ucomiss">, PS;
- defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
- load, "ucomisd">, PD;
-
- defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
- "comiss">, PS;
- defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
- "comisd">, PD;
+ defm Int_UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
+ sse_load_f32, "ucomiss">, PS;
+ defm Int_UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
+ sse_load_f64, "ucomisd">, PD;
+
+ defm Int_COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
+ sse_load_f32, "comiss">, PS;
+ defm Int_COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
+ sse_load_f64, "comisd">, PD;
}
} // Defs = [EFLAGS]
diff --git a/lib/Target/X86/X86InstrTablesInfo.h b/lib/Target/X86/X86InstrTablesInfo.h
index 5d2af829028a..415a891bfd97 100755
--- a/lib/Target/X86/X86InstrTablesInfo.h
+++ b/lib/Target/X86/X86InstrTablesInfo.h
@@ -1,4 +1,4 @@
-//===-- X86AVX512Info.h - X86 Instruction Tables Information ----*- C++ -*-===//
+//===-- X86InstrTablesInfo.h - X86 Instruction Tables -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -25,8 +25,7 @@ struct X86EvexToVexCompressTableEntry {
// X86 EVEX encoded instructions that have a VEX 128 encoding
// (table format: <EVEX opcode, VEX-128 opcode>).
-static const X86EvexToVexCompressTableEntry
- X86EvexToVex128CompressTable[] = {
+static const X86EvexToVexCompressTableEntry X86EvexToVex128CompressTable[] = {
// EVEX scalar with corresponding VEX.
{ X86::Int_VCOMISDZrm , X86::Int_VCOMISDrm },
{ X86::Int_VCOMISDZrr , X86::Int_VCOMISDrr },
@@ -250,20 +249,20 @@ static const X86EvexToVexCompressTableEntry
{ X86::VUCOMISDZrr , X86::VUCOMISDrr },
{ X86::VUCOMISSZrm , X86::VUCOMISSrm },
{ X86::VUCOMISSZrr , X86::VUCOMISSrr },
-
+
{ X86::VMOV64toPQIZrr , X86::VMOV64toPQIrr },
{ X86::VMOV64toSDZrr , X86::VMOV64toSDrr },
{ X86::VMOVDI2PDIZrm , X86::VMOVDI2PDIrm },
{ X86::VMOVDI2PDIZrr , X86::VMOVDI2PDIrr },
{ X86::VMOVLHPSZrr , X86::VMOVLHPSrr },
- { X86::VMOVHLPSZrr , X86::VMOVHLPSrr },
+ { X86::VMOVHLPSZrr , X86::VMOVHLPSrr },
{ X86::VMOVPDI2DIZmr , X86::VMOVPDI2DImr },
{ X86::VMOVPDI2DIZrr , X86::VMOVPDI2DIrr },
{ X86::VMOVPQI2QIZmr , X86::VMOVPQI2QImr },
{ X86::VMOVPQIto64Zrr , X86::VMOVPQIto64rr },
{ X86::VMOVQI2PQIZrm , X86::VMOVQI2PQIrm },
{ X86::VMOVZPQILo2PQIZrr , X86::VMOVZPQILo2PQIrr },
-
+
{ X86::VPEXTRBZmr , X86::VPEXTRBmr },
{ X86::VPEXTRBZrr , X86::VPEXTRBrr },
{ X86::VPEXTRDZmr , X86::VPEXTRDmr },
@@ -272,7 +271,7 @@ static const X86EvexToVexCompressTableEntry
{ X86::VPEXTRQZrr , X86::VPEXTRQrr },
{ X86::VPEXTRWZmr , X86::VPEXTRWmr },
{ X86::VPEXTRWZrr , X86::VPEXTRWri },
-
+
{ X86::VPINSRBZrm , X86::VPINSRBrm },
{ X86::VPINSRBZrr , X86::VPINSRBrr },
{ X86::VPINSRDZrm , X86::VPINSRDrm },
@@ -294,7 +293,7 @@ static const X86EvexToVexCompressTableEntry
{ X86::VANDPDZ128rm , X86::VANDPDrm },
{ X86::VANDPDZ128rr , X86::VANDPDrr },
{ X86::VANDPSZ128rm , X86::VANDPSrm },
- { X86::VANDPSZ128rr , X86::VANDPSrr },
+ { X86::VANDPSZ128rr , X86::VANDPSrr },
{ X86::VBROADCASTSSZ128m , X86::VBROADCASTSSrm },
{ X86::VBROADCASTSSZ128r , X86::VBROADCASTSSrr },
{ X86::VBROADCASTSSZ128r_s , X86::VBROADCASTSSrr },
@@ -414,8 +413,8 @@ static const X86EvexToVexCompressTableEntry
{ X86::VMOVAPDZ128rm , X86::VMOVAPDrm },
{ X86::VMOVAPDZ128rr , X86::VMOVAPDrr },
{ X86::VMOVAPDZ128rr_REV , X86::VMOVAPDrr_REV },
- { X86::VMOVAPSZ128mr , X86::VMOVAPSmr },
- { X86::VMOVAPSZ128rm , X86::VMOVAPSrm },
+ { X86::VMOVAPSZ128mr , X86::VMOVAPSmr },
+ { X86::VMOVAPSZ128rm , X86::VMOVAPSrm },
{ X86::VMOVAPSZ128rr , X86::VMOVAPSrr },
{ X86::VMOVAPSZ128rr_REV , X86::VMOVAPSrr_REV },
{ X86::VMOVDDUPZ128rm , X86::VMOVDDUPrm },
@@ -464,8 +463,8 @@ static const X86EvexToVexCompressTableEntry
{ X86::VMOVUPDZ128rm , X86::VMOVUPDrm },
{ X86::VMOVUPDZ128rr , X86::VMOVUPDrr },
{ X86::VMOVUPDZ128rr_REV , X86::VMOVUPDrr_REV },
- { X86::VMOVUPSZ128mr , X86::VMOVUPSmr },
- { X86::VMOVUPSZ128rm , X86::VMOVUPSrm },
+ { X86::VMOVUPSZ128mr , X86::VMOVUPSmr },
+ { X86::VMOVUPSZ128rm , X86::VMOVUPSrm },
{ X86::VMOVUPSZ128rr , X86::VMOVUPSrr },
{ X86::VMOVUPSZ128rr_REV , X86::VMOVUPSrr_REV },
{ X86::VMULPDZ128rm , X86::VMULPDrm },
@@ -520,9 +519,9 @@ static const X86EvexToVexCompressTableEntry
{ X86::VPBROADCASTBZ128r , X86::VPBROADCASTBrr },
{ X86::VPBROADCASTDZ128m , X86::VPBROADCASTDrm },
{ X86::VPBROADCASTDZ128r , X86::VPBROADCASTDrr },
- { X86::VPBROADCASTQZ128m , X86::VPBROADCASTQrm },
- { X86::VPBROADCASTQZ128r , X86::VPBROADCASTQrr },
- { X86::VPBROADCASTWZ128m , X86::VPBROADCASTWrm },
+ { X86::VPBROADCASTQZ128m , X86::VPBROADCASTQrm },
+ { X86::VPBROADCASTQZ128r , X86::VPBROADCASTQrr },
+ { X86::VPBROADCASTWZ128m , X86::VPBROADCASTWrm },
{ X86::VPBROADCASTWZ128r , X86::VPBROADCASTWrr },
{ X86::VPERMILPDZ128mi , X86::VPERMILPDmi },
{ X86::VPERMILPDZ128ri , X86::VPERMILPDri },
@@ -583,7 +582,7 @@ static const X86EvexToVexCompressTableEntry
{ X86::VPMOVZXWDZ128rm , X86::VPMOVZXWDrm },
{ X86::VPMOVZXWDZ128rr , X86::VPMOVZXWDrr },
{ X86::VPMOVZXWQZ128rm , X86::VPMOVZXWQrm },
- { X86::VPMOVZXWQZ128rr , X86::VPMOVZXWQrr },
+ { X86::VPMOVZXWQZ128rr , X86::VPMOVZXWQrr },
{ X86::VPMULDQZ128rm , X86::VPMULDQrm },
{ X86::VPMULDQZ128rr , X86::VPMULDQrr },
{ X86::VPMULHRSWZ128rm , X86::VPMULHRSWrm },
@@ -612,10 +611,10 @@ static const X86EvexToVexCompressTableEntry
{ X86::VPSHUFHWZ128ri , X86::VPSHUFHWri },
{ X86::VPSHUFLWZ128mi , X86::VPSHUFLWmi },
{ X86::VPSHUFLWZ128ri , X86::VPSHUFLWri },
- { X86::VPSLLDQZ128rr , X86::VPSLLDQri },
+ { X86::VPSLLDQZ128rr , X86::VPSLLDQri },
{ X86::VPSLLDZ128ri , X86::VPSLLDri },
{ X86::VPSLLDZ128rm , X86::VPSLLDrm },
- { X86::VPSLLDZ128rr , X86::VPSLLDrr },
+ { X86::VPSLLDZ128rr , X86::VPSLLDrr },
{ X86::VPSLLQZ128ri , X86::VPSLLQri },
{ X86::VPSLLQZ128rm , X86::VPSLLQrm },
{ X86::VPSLLQZ128rr , X86::VPSLLQrr },
@@ -713,8 +712,7 @@ static const X86EvexToVexCompressTableEntry
// X86 EVEX encoded instructions that have a VEX 256 encoding
// (table format: <EVEX opcode, VEX-256 opcode>).
- static const X86EvexToVexCompressTableEntry
- X86EvexToVex256CompressTable[] = {
+ static const X86EvexToVexCompressTableEntry X86EvexToVex256CompressTable[] = {
{ X86::VADDPDZ256rm , X86::VADDPDYrm },
{ X86::VADDPDZ256rr , X86::VADDPDYrr },
{ X86::VADDPSZ256rm , X86::VADDPSYrm },
@@ -727,11 +725,11 @@ static const X86EvexToVexCompressTableEntry
{ X86::VANDPDZ256rr , X86::VANDPDYrr },
{ X86::VANDPSZ256rm , X86::VANDPSYrm },
{ X86::VANDPSZ256rr , X86::VANDPSYrr },
- { X86::VBROADCASTSDZ256m , X86::VBROADCASTSDYrm },
- { X86::VBROADCASTSDZ256r , X86::VBROADCASTSDYrr },
- { X86::VBROADCASTSDZ256r_s , X86::VBROADCASTSDYrr },
+ { X86::VBROADCASTSDZ256m , X86::VBROADCASTSDYrm },
+ { X86::VBROADCASTSDZ256r , X86::VBROADCASTSDYrr },
+ { X86::VBROADCASTSDZ256r_s , X86::VBROADCASTSDYrr },
{ X86::VBROADCASTSSZ256m , X86::VBROADCASTSSYrm },
- { X86::VBROADCASTSSZ256r , X86::VBROADCASTSSYrr },
+ { X86::VBROADCASTSSZ256r , X86::VBROADCASTSSYrr },
{ X86::VBROADCASTSSZ256r_s , X86::VBROADCASTSSYrr },
{ X86::VCVTDQ2PDZ256rm , X86::VCVTDQ2PDYrm },
{ X86::VCVTDQ2PDZ256rr , X86::VCVTDQ2PDYrr },
@@ -757,6 +755,14 @@ static const X86EvexToVexCompressTableEntry
{ X86::VDIVPDZ256rr , X86::VDIVPDYrr },
{ X86::VDIVPSZ256rm , X86::VDIVPSYrm },
{ X86::VDIVPSZ256rr , X86::VDIVPSYrr },
+ { X86::VEXTRACTF32x4Z256mr , X86::VEXTRACTF128mr },
+ { X86::VEXTRACTF64x2Z256mr , X86::VEXTRACTF128mr },
+ { X86::VEXTRACTF32x4Z256rr , X86::VEXTRACTF128rr },
+ { X86::VEXTRACTF64x2Z256rr , X86::VEXTRACTF128rr },
+ { X86::VEXTRACTI32x4Z256mr , X86::VEXTRACTI128mr },
+ { X86::VEXTRACTI64x2Z256mr , X86::VEXTRACTI128mr },
+ { X86::VEXTRACTI32x4Z256rr , X86::VEXTRACTI128rr },
+ { X86::VEXTRACTI64x2Z256rr , X86::VEXTRACTI128rr },
{ X86::VFMADD132PDZ256m , X86::VFMADD132PDYm },
{ X86::VFMADD132PDZ256r , X86::VFMADD132PDYr },
{ X86::VFMADD132PSZ256m , X86::VFMADD132PSYm },
@@ -829,6 +835,14 @@ static const X86EvexToVexCompressTableEntry
{ X86::VFNMSUB231PDZ256r , X86::VFNMSUB231PDYr },
{ X86::VFNMSUB231PSZ256m , X86::VFNMSUB231PSYm },
{ X86::VFNMSUB231PSZ256r , X86::VFNMSUB231PSYr },
+ { X86::VINSERTF32x4Z256rm , X86::VINSERTF128rm },
+ { X86::VINSERTF64x2Z256rm , X86::VINSERTF128rm },
+ { X86::VINSERTF32x4Z256rr , X86::VINSERTF128rr },
+ { X86::VINSERTF64x2Z256rr , X86::VINSERTF128rr },
+ { X86::VINSERTI32x4Z256rm , X86::VINSERTI128rm },
+ { X86::VINSERTI64x2Z256rm , X86::VINSERTI128rm },
+ { X86::VINSERTI32x4Z256rr , X86::VINSERTI128rr },
+ { X86::VINSERTI64x2Z256rr , X86::VINSERTI128rr },
{ X86::VMAXCPDZ256rm , X86::VMAXCPDYrm },
{ X86::VMAXCPDZ256rr , X86::VMAXCPDYrr },
{ X86::VMAXCPSZ256rm , X86::VMAXCPSYrm },
@@ -849,8 +863,8 @@ static const X86EvexToVexCompressTableEntry
{ X86::VMOVAPDZ256rm , X86::VMOVAPDYrm },
{ X86::VMOVAPDZ256rr , X86::VMOVAPDYrr },
{ X86::VMOVAPDZ256rr_REV , X86::VMOVAPDYrr_REV },
- { X86::VMOVAPSZ256mr , X86::VMOVAPSYmr },
- { X86::VMOVAPSZ256rm , X86::VMOVAPSYrm },
+ { X86::VMOVAPSZ256mr , X86::VMOVAPSYmr },
+ { X86::VMOVAPSZ256rm , X86::VMOVAPSYrm },
{ X86::VMOVAPSZ256rr , X86::VMOVAPSYrr },
{ X86::VMOVAPSZ256rr_REV , X86::VMOVAPSYrr_REV },
{ X86::VMOVDDUPZ256rm , X86::VMOVDDUPYrm },
@@ -943,14 +957,14 @@ static const X86EvexToVexCompressTableEntry
{ X86::VPAVGBZ256rr , X86::VPAVGBYrr },
{ X86::VPAVGWZ256rm , X86::VPAVGWYrm },
{ X86::VPAVGWZ256rr , X86::VPAVGWYrr },
- { X86::VPBROADCASTBZ256m , X86::VPBROADCASTBYrm },
- { X86::VPBROADCASTBZ256r , X86::VPBROADCASTBYrr },
- { X86::VPBROADCASTDZ256m , X86::VPBROADCASTDYrm },
- { X86::VPBROADCASTDZ256r , X86::VPBROADCASTDYrr },
- { X86::VPBROADCASTQZ256m , X86::VPBROADCASTQYrm },
- { X86::VPBROADCASTQZ256r , X86::VPBROADCASTQYrr },
- { X86::VPBROADCASTWZ256m , X86::VPBROADCASTWYrm },
- { X86::VPBROADCASTWZ256r , X86::VPBROADCASTWYrr },
+ { X86::VPBROADCASTBZ256m , X86::VPBROADCASTBYrm },
+ { X86::VPBROADCASTBZ256r , X86::VPBROADCASTBYrr },
+ { X86::VPBROADCASTDZ256m , X86::VPBROADCASTDYrm },
+ { X86::VPBROADCASTDZ256r , X86::VPBROADCASTDYrr },
+ { X86::VPBROADCASTQZ256m , X86::VPBROADCASTQYrm },
+ { X86::VPBROADCASTQZ256r , X86::VPBROADCASTQYrr },
+ { X86::VPBROADCASTWZ256m , X86::VPBROADCASTWYrm },
+ { X86::VPBROADCASTWZ256r , X86::VPBROADCASTWYrr },
{ X86::VPERMDZ256rm , X86::VPERMDYrm },
{ X86::VPERMDZ256rr , X86::VPERMDYrr },
{ X86::VPERMILPDZ256mi , X86::VPERMILPDYmi },
@@ -1050,7 +1064,7 @@ static const X86EvexToVexCompressTableEntry
{ X86::VPSLLDQZ256rr , X86::VPSLLDQYri },
{ X86::VPSLLDZ256ri , X86::VPSLLDYri },
{ X86::VPSLLDZ256rm , X86::VPSLLDYrm },
- { X86::VPSLLDZ256rr , X86::VPSLLDYrr },
+ { X86::VPSLLDZ256rr , X86::VPSLLDYrr },
{ X86::VPSLLQZ256ri , X86::VPSLLQYri },
{ X86::VPSLLQZ256rm , X86::VPSLLQYrm },
{ X86::VPSLLQZ256rr , X86::VPSLLQYrr },
@@ -1060,7 +1074,7 @@ static const X86EvexToVexCompressTableEntry
{ X86::VPSLLVQZ256rr , X86::VPSLLVQYrr },
{ X86::VPSLLWZ256ri , X86::VPSLLWYri },
{ X86::VPSLLWZ256rm , X86::VPSLLWYrm },
- { X86::VPSLLWZ256rr , X86::VPSLLWYrr },
+ { X86::VPSLLWZ256rr , X86::VPSLLWYrr },
{ X86::VPSRADZ256ri , X86::VPSRADYri },
{ X86::VPSRADZ256rm , X86::VPSRADYrm },
{ X86::VPSRADZ256rr , X86::VPSRADYrr },
@@ -1072,7 +1086,7 @@ static const X86EvexToVexCompressTableEntry
{ X86::VPSRLDQZ256rr , X86::VPSRLDQYri },
{ X86::VPSRLDZ256ri , X86::VPSRLDYri },
{ X86::VPSRLDZ256rm , X86::VPSRLDYrm },
- { X86::VPSRLDZ256rr , X86::VPSRLDYrr },
+ { X86::VPSRLDZ256rr , X86::VPSRLDYrr },
{ X86::VPSRLQZ256ri , X86::VPSRLQYri },
{ X86::VPSRLQZ256rm , X86::VPSRLQYrm },
{ X86::VPSRLQZ256rr , X86::VPSRLQYrr },
@@ -1145,4 +1159,4 @@ static const X86EvexToVexCompressTableEntry
{ X86::VXORPSZ256rr , X86::VXORPSYrr },
};
-#endif \ No newline at end of file
+#endif
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index df47b4ad583d..63a02af02faa 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -34,7 +34,7 @@ enum IntrinsicType : uint16_t {
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, BRCST32x2_TO_VEC,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
- EXPAND_FROM_MEM, INSERT_SUBVEC,
+ EXPAND_FROM_MEM,
TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
};
@@ -795,30 +795,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VGETMANTS, 0),
X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM,
X86ISD::VGETMANTS, 0),
- X86_INTRINSIC_DATA(avx512_mask_insertf32x4_256, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_insertf32x4_512, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_insertf32x8_512, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_insertf64x2_256, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_insertf64x2_512, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_insertf64x4_512, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_inserti32x4_256, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_inserti32x4_512, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_inserti32x8_512, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_inserti64x2_256, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_inserti64x2_512, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
- X86_INTRINSIC_DATA(avx512_mask_inserti64x4_512, INSERT_SUBVEC,
- ISD::INSERT_SUBVECTOR, 0),
X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK,
ISD::CTLZ, 0),
X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK,
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 2f69df064e7f..a38a4b30b77d 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -1115,56 +1115,6 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLo
OutStreamer->EmitInstruction(TC, getSubtargetInfo());
}
-void X86AsmPrinter::EmitXRayTable() {
- if (Sleds.empty())
- return;
-
- auto PrevSection = OutStreamer->getCurrentSectionOnly();
- auto Fn = MF->getFunction();
- MCSection *Section = nullptr;
- if (Subtarget->isTargetELF()) {
- if (Fn->hasComdat()) {
- Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
- Fn->getComdat()->getName());
- } else {
- Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
- ELF::SHF_ALLOC);
- }
- } else if (Subtarget->isTargetMachO()) {
- Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
- SectionKind::getReadOnlyWithRel());
- } else {
- llvm_unreachable("Unsupported target");
- }
-
- // Before we switch over, we force a reference to a label inside the
- // xray_instr_map section. Since EmitXRayTable() is always called just
- // before the function's end, we assume that this is happening after the
- // last return instruction.
- //
- // We then align the reference to 16 byte boundaries, which we determined
- // experimentally to be beneficial to avoid causing decoder stalls.
- MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
- OutStreamer->EmitCodeAlignment(16);
- OutStreamer->EmitSymbolValue(Tmp, 8, false);
- OutStreamer->SwitchSection(Section);
- OutStreamer->EmitLabel(Tmp);
- for (const auto &Sled : Sleds) {
- OutStreamer->EmitSymbolValue(Sled.Sled, 8);
- OutStreamer->EmitSymbolValue(CurrentFnSym, 8);
- auto Kind = static_cast<uint8_t>(Sled.Kind);
- OutStreamer->EmitBytes(
- StringRef(reinterpret_cast<const char *>(&Kind), 1));
- OutStreamer->EmitBytes(
- StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
- OutStreamer->EmitZeros(14);
- }
- OutStreamer->SwitchSection(PrevSection);
-
- Sleds.clear();
-}
-
// Returns instruction preceding MBBI in MachineFunction.
// If MBBI is the first instruction of the first basic block, returns null.
static MachineBasicBlock::const_iterator
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 2b0e672d56f2..d7792e296a58 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -598,198 +598,136 @@ int X86TTIImpl::getArithmeticInstrCost(
int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
-
- if (Kind == TTI::SK_Reverse) {
+ if (Kind == TTI::SK_Reverse || Kind == TTI::SK_Alternate) {
+ // 64-bit packed float vectors (v2f32) are widened to type v4f32.
+ // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
static const CostTblEntry AVX512VBMIShuffleTbl[] = {
- { ISD::VECTOR_SHUFFLE, MVT::v64i8, 1 }, // vpermb
- { ISD::VECTOR_SHUFFLE, MVT::v32i8, 1 } // vpermb
+ { TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb
+ { TTI::SK_Reverse, MVT::v32i8, 1 } // vpermb
};
if (ST->hasVBMI())
- if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
+ if (const auto *Entry =
+ CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry AVX512BWShuffleTbl[] = {
- { ISD::VECTOR_SHUFFLE, MVT::v32i16, 1 }, // vpermw
- { ISD::VECTOR_SHUFFLE, MVT::v16i16, 1 }, // vpermw
- { ISD::VECTOR_SHUFFLE, MVT::v64i8, 6 } // vextracti64x4 + 2*vperm2i128
- // + 2*pshufb + vinserti64x4
+ { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw
+ { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw
+ { TTI::SK_Reverse, MVT::v64i8, 6 } // vextracti64x4 + 2*vperm2i128
+ // + 2*pshufb + vinserti64x4
};
if (ST->hasBWI())
- if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
+ if (const auto *Entry =
+ CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry AVX512ShuffleTbl[] = {
- { ISD::VECTOR_SHUFFLE, MVT::v8f64, 1 }, // vpermpd
- { ISD::VECTOR_SHUFFLE, MVT::v16f32, 1 }, // vpermps
- { ISD::VECTOR_SHUFFLE, MVT::v8i64, 1 }, // vpermq
- { ISD::VECTOR_SHUFFLE, MVT::v16i32, 1 }, // vpermd
+ { TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd
+ { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps
+ { TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq
+ { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd
};
if (ST->hasAVX512())
if (const auto *Entry =
- CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+ CostTableLookup(AVX512ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry AVX2ShuffleTbl[] = {
- { ISD::VECTOR_SHUFFLE, MVT::v4f64, 1 }, // vpermpd
- { ISD::VECTOR_SHUFFLE, MVT::v8f32, 1 }, // vpermps
- { ISD::VECTOR_SHUFFLE, MVT::v4i64, 1 }, // vpermq
- { ISD::VECTOR_SHUFFLE, MVT::v8i32, 1 }, // vpermd
- { ISD::VECTOR_SHUFFLE, MVT::v16i16, 2 }, // vperm2i128 + pshufb
- { ISD::VECTOR_SHUFFLE, MVT::v32i8, 2 } // vperm2i128 + pshufb
+ { TTI::SK_Reverse, MVT::v4f64, 1 }, // vpermpd
+ { TTI::SK_Reverse, MVT::v8f32, 1 }, // vpermps
+ { TTI::SK_Reverse, MVT::v4i64, 1 }, // vpermq
+ { TTI::SK_Reverse, MVT::v8i32, 1 }, // vpermd
+ { TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb
+ { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb
+
+ { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
+ { TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb
};
if (ST->hasAVX2())
- if (const auto *Entry =
- CostTableLookup(AVX2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+ if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry AVX1ShuffleTbl[] = {
- { ISD::VECTOR_SHUFFLE, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
- { ISD::VECTOR_SHUFFLE, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
- { ISD::VECTOR_SHUFFLE, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
- { ISD::VECTOR_SHUFFLE, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
- { ISD::VECTOR_SHUFFLE, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
- // + vinsertf128
- { ISD::VECTOR_SHUFFLE, MVT::v32i8, 4 } // vextractf128 + 2*pshufb
- // + vinsertf128
+ { TTI::SK_Reverse, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd
+ { TTI::SK_Reverse, MVT::v8f32, 2 }, // vperm2f128 + vpermilps
+ { TTI::SK_Reverse, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd
+ { TTI::SK_Reverse, MVT::v8i32, 2 }, // vperm2f128 + vpermilps
+ { TTI::SK_Reverse, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
+ // + vinsertf128
+ { TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb
+ // + vinsertf128
+
+ { TTI::SK_Alternate, MVT::v4i64, 1 }, // vblendpd
+ { TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd
+ { TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps
+ { TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps
+ { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor
+ { TTI::SK_Alternate, MVT::v32i8, 3 } // vpand + vpandn + vpor
};
if (ST->hasAVX())
- if (const auto *Entry =
- CostTableLookup(AVX1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+ if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
+
+ static const CostTblEntry SSE41ShuffleTbl[] = {
+ { TTI::SK_Alternate, MVT::v2i64, 1 }, // pblendw
+ { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
+ { TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw
+ { TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps
+ { TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw
+ { TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb
+ };
+
+ if (ST->hasSSE41())
+ if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry SSSE3ShuffleTbl[] = {
- { ISD::VECTOR_SHUFFLE, MVT::v8i16, 1 }, // pshufb
- { ISD::VECTOR_SHUFFLE, MVT::v16i8, 1 } // pshufb
+ { TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb
+ { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb
+
+ { TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por
+ { TTI::SK_Alternate, MVT::v16i8, 3 } // pshufb + pshufb + por
};
if (ST->hasSSSE3())
- if (const auto *Entry =
- CostTableLookup(SSSE3ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+ if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry SSE2ShuffleTbl[] = {
- { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 }, // shufpd
- { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 }, // pshufd
- { ISD::VECTOR_SHUFFLE, MVT::v4i32, 1 }, // pshufd
- { ISD::VECTOR_SHUFFLE, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd
- { ISD::VECTOR_SHUFFLE, MVT::v16i8, 9 } // 2*pshuflw + 2*pshufhw
- // + 2*pshufd + 2*unpck + packus
+ { TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd
+ { TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd
+ { TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd
+ { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd
+ { TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw
+ // + 2*pshufd + 2*unpck + packus
+
+ { TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd
+ { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
+ { TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps
+ { TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por
+ { TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por
};
if (ST->hasSSE2())
- if (const auto *Entry =
- CostTableLookup(SSE2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+ if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
static const CostTblEntry SSE1ShuffleTbl[] = {
- { ISD::VECTOR_SHUFFLE, MVT::v4f32, 1 }, // shufps
+ { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps
+ { TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps
};
if (ST->hasSSE1())
- if (const auto *Entry =
- CostTableLookup(SSE1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+ if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
- } else if (Kind == TTI::SK_Alternate) {
- // 64-bit packed float vectors (v2f32) are widened to type v4f32.
- // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
- std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
-
- // The backend knows how to generate a single VEX.256 version of
- // instruction VPBLENDW if the target supports AVX2.
- if (ST->hasAVX2() && LT.second == MVT::v16i16)
- return LT.first;
-
- static const CostTblEntry AVXAltShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vblendpd
- {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vblendpd
-
- {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vblendps
- {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vblendps
-
- // This shuffle is custom lowered into a sequence of:
- // 2x vextractf128 , 2x vpblendw , 1x vinsertf128
- {ISD::VECTOR_SHUFFLE, MVT::v16i16, 5},
-
- // This shuffle is custom lowered into a long sequence of:
- // 2x vextractf128 , 4x vpshufb , 2x vpor , 1x vinsertf128
- {ISD::VECTOR_SHUFFLE, MVT::v32i8, 9}
- };
-
- if (ST->hasAVX())
- if (const auto *Entry = CostTableLookup(AVXAltShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
- return LT.first * Entry->Cost;
-
- static const CostTblEntry SSE41AltShuffleTbl[] = {
- // These are lowered into movsd.
- {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
-
- // packed float vectors with four elements are lowered into BLENDI dag
- // nodes. A v4i32/v4f32 BLENDI generates a single 'blendps'/'blendpd'.
- {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
- {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
-
- // This shuffle generates a single pshufw.
- {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
-
- // There is no instruction that matches a v16i8 alternate shuffle.
- // The backend will expand it into the sequence 'pshufb + pshufb + or'.
- {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}
- };
-
- if (ST->hasSSE41())
- if (const auto *Entry = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE,
- LT.second))
- return LT.first * Entry->Cost;
-
- static const CostTblEntry SSSE3AltShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
- {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
-
- // SSE3 doesn't have 'blendps'. The following shuffles are expanded into
- // the sequence 'shufps + pshufd'
- {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
- {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
-
- {ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or
- {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or
- };
-
- if (ST->hasSSSE3())
- if (const auto *Entry = CostTableLookup(SSSE3AltShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
- return LT.first * Entry->Cost;
-
- static const CostTblEntry SSEAltShuffleTbl[] = {
- {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
- {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
-
- {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd
- {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd
-
- // This is expanded into a long sequence of four extract + four insert.
- {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw.
-
- // 8 x (pinsrw + pextrw + and + movb + movzb + or)
- {ISD::VECTOR_SHUFFLE, MVT::v16i8, 48}
- };
-
- // Fall-back (SSE3 and SSE2).
- if (const auto *Entry = CostTableLookup(SSEAltShuffleTbl,
- ISD::VECTOR_SHUFFLE, LT.second))
- return LT.first * Entry->Cost;
-
} else if (Kind == TTI::SK_PermuteTwoSrc) {
// We assume that source and destination have the same vector type.
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);