aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/Hexagon
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2018-07-28 10:51:19 +0000
committerDimitry Andric <dim@FreeBSD.org>2018-07-28 10:51:19 +0000
commiteb11fae6d08f479c0799db45860a98af528fa6e7 (patch)
tree44d492a50c8c1a7eb8e2d17ea3360ec4d066f042 /lib/Target/Hexagon
parentb8a2042aa938069e862750553db0e4d82d25822c (diff)
downloadsrc-eb11fae6d08f479c0799db45860a98af528fa6e7.tar.gz
src-eb11fae6d08f479c0799db45860a98af528fa6e7.zip
Notes
Diffstat (limited to 'lib/Target/Hexagon')
-rw-r--r--lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp83
-rw-r--r--lib/Target/Hexagon/BitTracker.cpp24
-rw-r--r--lib/Target/Hexagon/BitTracker.h12
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt7
-rw-r--r--lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp60
-rw-r--r--lib/Target/Hexagon/Hexagon.h2
-rw-r--r--lib/Target/Hexagon/Hexagon.td70
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp97
-rwxr-xr-xlib/Target/Hexagon/HexagonAsmPrinter.h16
-rw-r--r--lib/Target/Hexagon/HexagonBitSimplify.cpp23
-rw-r--r--lib/Target/Hexagon/HexagonBitTracker.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonBlockRanges.cpp16
-rw-r--r--lib/Target/Hexagon/HexagonBranchRelaxation.cpp19
-rw-r--r--lib/Target/Hexagon/HexagonCallingConv.td134
-rw-r--r--lib/Target/Hexagon/HexagonCommonGEP.cpp89
-rw-r--r--lib/Target/Hexagon/HexagonConstExtenders.cpp190
-rw-r--r--lib/Target/Hexagon/HexagonConstPropagation.cpp80
-rw-r--r--lib/Target/Hexagon/HexagonCopyToCombine.cpp14
-rw-r--r--lib/Target/Hexagon/HexagonDepArch.td12
-rw-r--r--lib/Target/Hexagon/HexagonDepDecoders.h13
-rw-r--r--lib/Target/Hexagon/HexagonDepIICScalar.td1209
-rw-r--r--lib/Target/Hexagon/HexagonDepInstrInfo.td292
-rw-r--r--lib/Target/Hexagon/HexagonDepMappings.td1
-rw-r--r--lib/Target/Hexagon/HexagonEarlyIfConv.cpp111
-rw-r--r--lib/Target/Hexagon/HexagonExpandCondsets.cpp68
-rw-r--r--lib/Target/Hexagon/HexagonFixupHwLoops.cpp20
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp66
-rw-r--r--lib/Target/Hexagon/HexagonGatherPacketize.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonGenInsert.cpp25
-rw-r--r--lib/Target/Hexagon/HexagonGenMux.cpp14
-rw-r--r--lib/Target/Hexagon/HexagonGenPredicate.cpp18
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp74
-rw-r--r--lib/Target/Hexagon/HexagonHazardRecognizer.cpp54
-rw-r--r--lib/Target/Hexagon/HexagonHazardRecognizer.h17
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp508
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.h12
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp272
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp1937
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h104
-rw-r--r--lib/Target/Hexagon/HexagonISelLoweringHVX.cpp1333
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV60.td2
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp387
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h26
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsics.td11
-rw-r--r--lib/Target/Hexagon/HexagonIntrinsicsV5.td2
-rw-r--r--lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp141
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp469
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.h52
-rw-r--r--lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td154
-rw-r--r--lib/Target/Hexagon/HexagonNewValueJump.cpp52
-rw-r--r--lib/Target/Hexagon/HexagonOptAddrMode.cpp254
-rw-r--r--lib/Target/Hexagon/HexagonPatterns.td799
-rw-r--r--lib/Target/Hexagon/HexagonPatternsHVX.td497
-rw-r--r--lib/Target/Hexagon/HexagonPseudo.td96
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp65
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h10
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.td143
-rw-r--r--lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp15
-rw-r--r--lib/Target/Hexagon/HexagonSplitDouble.cpp120
-rw-r--r--lib/Target/Hexagon/HexagonStoreWidening.cpp22
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp139
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h64
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp23
-rw-r--r--lib/Target/Hexagon/HexagonTargetObjectFile.cpp26
-rw-r--r--lib/Target/Hexagon/HexagonTargetTransformInfo.cpp252
-rw-r--r--lib/Target/Hexagon/HexagonTargetTransformInfo.h82
-rw-r--r--lib/Target/Hexagon/HexagonVExtract.cpp166
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp85
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.h2
-rw-r--r--lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp40
-rw-r--r--lib/Target/Hexagon/HexagonVectorPrint.cpp17
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp79
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp8
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp19
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp1133
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h45
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp28
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp38
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp31
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h6
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp33
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h6
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp13
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp13
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h9
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp6
-rw-r--r--lib/Target/Hexagon/RDFCopy.cpp5
-rw-r--r--lib/Target/Hexagon/RDFDeadCode.cpp2
-rw-r--r--lib/Target/Hexagon/RDFGraph.cpp4
-rw-r--r--lib/Target/Hexagon/RDFLiveness.cpp10
-rw-r--r--lib/Target/Hexagon/RDFLiveness.h4
-rw-r--r--lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp2
93 files changed, 7457 insertions, 5352 deletions
diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index 387296c69c39..92bda224f3dc 100644
--- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -118,7 +118,6 @@ class HexagonAsmParser : public MCTargetAsmParser {
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
bool ParseDirectiveSubsection(SMLoc L);
- bool ParseDirectiveValue(unsigned Size, SMLoc L);
bool ParseDirectiveComm(bool IsLocal, SMLoc L);
bool RegisterMatchesArch(unsigned MatchNum) const;
@@ -165,6 +164,10 @@ public:
MCB.setOpcode(Hexagon::BUNDLE);
setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
+ Parser.addAliasForDirective(".half", ".2byte");
+ Parser.addAliasForDirective(".hword", ".2byte");
+ Parser.addAliasForDirective(".word", ".4byte");
+
MCAsmParserExtension::Initialize(_Parser);
}
@@ -462,9 +465,9 @@ void HexagonOperand::print(raw_ostream &OS) const {
}
bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) {
- DEBUG(dbgs() << "Bundle:");
- DEBUG(MCB.dump_pretty(dbgs()));
- DEBUG(dbgs() << "--\n");
+ LLVM_DEBUG(dbgs() << "Bundle:");
+ LLVM_DEBUG(MCB.dump_pretty(dbgs()));
+ LLVM_DEBUG(dbgs() << "--\n");
MCB.setLoc(IDLoc);
// Check the bundle for errors.
@@ -506,16 +509,19 @@ bool HexagonAsmParser::matchBundleOptions() {
"supported with this architecture";
StringRef Option = Parser.getTok().getString();
auto IDLoc = Parser.getTok().getLoc();
- if (Option.compare_lower("endloop0") == 0)
+ if (Option.compare_lower("endloop01") == 0) {
+ HexagonMCInstrInfo::setInnerLoop(MCB);
+ HexagonMCInstrInfo::setOuterLoop(MCB);
+ } else if (Option.compare_lower("endloop0") == 0) {
HexagonMCInstrInfo::setInnerLoop(MCB);
- else if (Option.compare_lower("endloop1") == 0)
+ } else if (Option.compare_lower("endloop1") == 0) {
HexagonMCInstrInfo::setOuterLoop(MCB);
- else if (Option.compare_lower("mem_noshuf") == 0)
+ } else if (Option.compare_lower("mem_noshuf") == 0) {
if (getSTI().getFeatureBits()[Hexagon::FeatureMemNoShuf])
HexagonMCInstrInfo::setMemReorderDisabled(MCB);
else
return getParser().Error(IDLoc, MemNoShuffMsg);
- else
+ } else
return getParser().Error(IDLoc, llvm::Twine("'") + Option +
"' is not a valid bundle option");
Lex();
@@ -554,9 +560,9 @@ bool HexagonAsmParser::matchOneInstruction(MCInst &MCI, SMLoc IDLoc,
canonicalizeImmediates(MCI);
result = processInstruction(MCI, InstOperands, IDLoc);
- DEBUG(dbgs() << "Insn:");
- DEBUG(MCI.dump_pretty(dbgs()));
- DEBUG(dbgs() << "\n\n");
+ LLVM_DEBUG(dbgs() << "Insn:");
+ LLVM_DEBUG(MCI.dump_pretty(dbgs()));
+ LLVM_DEBUG(dbgs() << "\n\n");
MCI.setLoc(IDLoc);
}
@@ -648,11 +654,6 @@ bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
/// ParseDirective parses the Hexagon specific directives
bool HexagonAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
- if ((IDVal.lower() == ".word") || (IDVal.lower() == ".4byte"))
- return ParseDirectiveValue(4, DirectiveID.getLoc());
- if (IDVal.lower() == ".short" || IDVal.lower() == ".hword" ||
- IDVal.lower() == ".half")
- return ParseDirectiveValue(2, DirectiveID.getLoc());
if (IDVal.lower() == ".falign")
return ParseDirectiveFalign(256, DirectiveID.getLoc());
if ((IDVal.lower() == ".lcomm") || (IDVal.lower() == ".lcommon"))
@@ -720,39 +721,6 @@ bool HexagonAsmParser::ParseDirectiveFalign(unsigned Size, SMLoc L) {
return false;
}
-/// ::= .word [ expression (, expression)* ]
-bool HexagonAsmParser::ParseDirectiveValue(unsigned Size, SMLoc L) {
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- while (true) {
- const MCExpr *Value;
- SMLoc ExprLoc = L;
- if (getParser().parseExpression(Value))
- return true;
-
- // Special case constant expressions to match code generator.
- if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
- assert(Size <= 8 && "Invalid size");
- uint64_t IntValue = MCE->getValue();
- if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
- return Error(ExprLoc, "literal value out of range for directive");
- getStreamer().EmitIntValue(IntValue, Size);
- } else
- getStreamer().EmitValue(Value, Size);
-
- if (getLexer().is(AsmToken::EndOfStatement))
- break;
-
- // FIXME: Improve diagnostic.
- if (getLexer().isNot(AsmToken::Comma))
- return TokError("unexpected token in directive");
- Lex();
- }
- }
-
- Lex();
- return false;
-}
-
// This is largely a copy of AsmParser's ParseDirectiveComm extended to
// accept a 3rd argument, AccessAlignment which indicates the smallest
// memory access made to the symbol, expressed in bytes. If no
@@ -1293,9 +1261,9 @@ unsigned HexagonAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
return Match_Success;
}
- DEBUG(dbgs() << "Unmatched Operand:");
- DEBUG(Op->dump());
- DEBUG(dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Unmatched Operand:");
+ LLVM_DEBUG(Op->dump());
+ LLVM_DEBUG(dbgs() << "\n");
return Match_InvalidOperand;
}
@@ -1333,6 +1301,17 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
}
break;
+ case Hexagon::J2_trap1:
+ if (!getSTI().getFeatureBits()[Hexagon::ArchV65]) {
+ MCOperand &Rx = Inst.getOperand(0);
+ MCOperand &Ry = Inst.getOperand(1);
+ if (Rx.getReg() != Hexagon::R0 || Ry.getReg() != Hexagon::R0) {
+ Error(IDLoc, "trap1 can only have register r0 as operand");
+ return Match_InvalidOperand;
+ }
+ }
+ break;
+
case Hexagon::A2_iconst: {
Inst.setOpcode(Hexagon::A2_addi);
MCOperand Reg = Inst.getOperand(0);
diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp
index 15d6a05a0078..69529b0d1162 100644
--- a/lib/Target/Hexagon/BitTracker.cpp
+++ b/lib/Target/Hexagon/BitTracker.cpp
@@ -779,15 +779,18 @@ bool BT::UseQueueType::Cmp::operator()(const MachineInstr *InstA,
return BA->getNumber() > BB->getNumber();
}
- MachineBasicBlock::const_iterator ItA = InstA->getIterator();
- MachineBasicBlock::const_iterator ItB = InstB->getIterator();
- MachineBasicBlock::const_iterator End = BA->end();
- while (ItA != End) {
- if (ItA == ItB)
- return false; // ItA was before ItB.
- ++ItA;
- }
- return true;
+ auto getDist = [this] (const MachineInstr *MI) {
+ auto F = Dist.find(MI);
+ if (F != Dist.end())
+ return F->second;
+ MachineBasicBlock::const_iterator I = MI->getParent()->begin();
+ MachineBasicBlock::const_iterator E = MI->getIterator();
+ unsigned D = std::distance(I, E);
+ Dist.insert(std::make_pair(MI, D));
+ return D;
+ };
+
+ return getDist(InstA) > getDist(InstB);
}
// Main W-Z implementation.
@@ -840,7 +843,7 @@ void BT::visitPHI(const MachineInstr &PI) {
void BT::visitNonBranch(const MachineInstr &MI) {
if (Trace)
dbgs() << "Visit MI(" << printMBBReference(*MI.getParent()) << "): " << MI;
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
return;
assert(!MI.isBranch() && "Unexpected branch instruction");
@@ -1138,6 +1141,7 @@ void BT::run() {
runEdgeQueue(BlockScanned);
runUseQueue();
}
+ UseQ.reset();
if (Trace)
print_cells(dbgs() << "Cells after propagation:\n");
diff --git a/lib/Target/Hexagon/BitTracker.h b/lib/Target/Hexagon/BitTracker.h
index 5df6b61710f6..058225c0d812 100644
--- a/lib/Target/Hexagon/BitTracker.h
+++ b/lib/Target/Hexagon/BitTracker.h
@@ -13,6 +13,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include <cassert>
#include <cstdint>
@@ -28,7 +29,6 @@ class ConstantInt;
class MachineRegisterInfo;
class MachineBasicBlock;
class MachineFunction;
-class MachineInstr;
class raw_ostream;
class TargetRegisterClass;
class TargetRegisterInfo;
@@ -73,6 +73,8 @@ private:
// Priority queue of instructions using modified registers, ordered by
// their relative position in a basic block.
struct UseQueueType {
+ UseQueueType() : Uses(Dist) {}
+
unsigned size() const {
return Uses.size();
}
@@ -90,12 +92,18 @@ private:
Set.erase(front());
Uses.pop();
}
+ void reset() {
+ Dist.clear();
+ }
private:
struct Cmp {
+ Cmp(DenseMap<const MachineInstr*,unsigned> &Map) : Dist(Map) {}
bool operator()(const MachineInstr *MI, const MachineInstr *MJ) const;
+ DenseMap<const MachineInstr*,unsigned> &Dist;
};
std::priority_queue<MachineInstr*, std::vector<MachineInstr*>, Cmp> Uses;
- DenseSet<MachineInstr*> Set; // Set to avoid adding duplicate entries.
+ DenseSet<const MachineInstr*> Set; // Set to avoid adding duplicate entries.
+ DenseMap<const MachineInstr*,unsigned> Dist;
};
void reset();
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index 1c36093923ac..a9f606c54eb1 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -2,6 +2,7 @@ set(LLVM_TARGET_DEFINITIONS Hexagon.td)
tablegen(LLVM HexagonGenAsmMatcher.inc -gen-asm-matcher)
tablegen(LLVM HexagonGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM HexagonGenCallingConv.inc -gen-callingconv)
tablegen(LLVM HexagonGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM HexagonGenDFAPacketizer.inc -gen-dfa-packetizer)
tablegen(LLVM HexagonGenDisassemblerTables.inc -gen-disassembler)
@@ -9,6 +10,7 @@ tablegen(LLVM HexagonGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM HexagonGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM HexagonGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget)
+
add_public_tablegen_target(HexagonCommonTableGen)
add_llvm_target(HexagonCodeGen
@@ -59,6 +61,7 @@ add_llvm_target(HexagonCodeGen
HexagonTargetTransformInfo.cpp
HexagonVectorLoopCarriedReuse.cpp
HexagonVectorPrint.cpp
+ HexagonVExtract.cpp
HexagonVLIWPacketizer.cpp
RDFCopy.cpp
RDFDeadCode.cpp
@@ -68,7 +71,7 @@ add_llvm_target(HexagonCodeGen
)
add_subdirectory(AsmParser)
-add_subdirectory(TargetInfo)
-add_subdirectory(MCTargetDesc)
add_subdirectory(Disassembler)
+add_subdirectory(MCTargetDesc)
+add_subdirectory(TargetInfo)
diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 481b692ae8bf..1a619ebda84e 100644
--- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -40,7 +40,7 @@ using DecodeStatus = MCDisassembler::DecodeStatus;
namespace {
-/// \brief Hexagon disassembler for all Hexagon platforms.
+/// Hexagon disassembler for all Hexagon platforms.
class HexagonDisassembler : public MCDisassembler {
public:
std::unique_ptr<MCInstrInfo const> const MCII;
@@ -127,12 +127,18 @@ static DecodeStatus DecodeHvxQRRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeGuestRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeGuestRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
uint64_t Address, const void *Decoder);
@@ -783,3 +789,55 @@ static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext());
return MCDisassembler::Success;
}
+
+static DecodeStatus DecodeGuestRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ using namespace Hexagon;
+
+ static const MCPhysReg GuestRegDecoderTable[] = {
+ /* 0 */ GELR, GSR, GOSP, G3,
+ /* 4 */ G4, G5, G6, G7,
+ /* 8 */ G8, G9, G10, G11,
+ /* 12 */ G12, G13, G14, G15,
+ /* 16 */ GPMUCNT4, GPMUCNT5, GPMUCNT6, GPMUCNT7,
+ /* 20 */ G20, G21, G22, G23,
+ /* 24 */ GPCYCLELO, GPCYCLEHI, GPMUCNT0, GPMUCNT1,
+ /* 28 */ GPMUCNT2, GPMUCNT3, G30, G31
+ };
+
+ if (RegNo >= array_lengthof(GuestRegDecoderTable))
+ return MCDisassembler::Fail;
+ if (GuestRegDecoderTable[RegNo] == Hexagon::NoRegister)
+ return MCDisassembler::Fail;
+
+ unsigned Register = GuestRegDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeGuestRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ using namespace Hexagon;
+
+ static const MCPhysReg GuestReg64DecoderTable[] = {
+ /* 0 */ G1_0, 0, G3_2, 0,
+ /* 4 */ G5_4, 0, G7_6, 0,
+ /* 8 */ G9_8, 0, G11_10, 0,
+ /* 12 */ G13_12, 0, G15_14, 0,
+ /* 16 */ G17_16, 0, G19_18, 0,
+ /* 20 */ G21_20, 0, G23_22, 0,
+ /* 24 */ G25_24, 0, G27_26, 0,
+ /* 28 */ G29_28, 0, G31_30, 0
+ };
+
+ if (RegNo >= array_lengthof(GuestReg64DecoderTable))
+ return MCDisassembler::Fail;
+ if (GuestReg64DecoderTable[RegNo] == Hexagon::NoRegister)
+ return MCDisassembler::Fail;
+
+ unsigned Register = GuestReg64DecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index 66b387b62c6c..6ec52d18cdc4 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -49,7 +49,7 @@
namespace llvm {
class HexagonTargetMachine;
- /// \brief Creates a Hexagon-specific Target Transformation Info pass.
+ /// Creates a Hexagon-specific Target Transformation Info pass.
ImmutablePass *createHexagonTargetTransformInfoPass(const HexagonTargetMachine *TM);
} // end namespace llvm;
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
index 6292e2a7a4ea..69e263a425f8 100644
--- a/lib/Target/Hexagon/Hexagon.td
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -36,32 +36,36 @@ def ExtensionHVXV62: SubtargetFeature<"hvxv62", "HexagonHVXVersion",
def ExtensionHVXV65: SubtargetFeature<"hvxv65", "HexagonHVXVersion",
"Hexagon::ArchEnum::V65", "Hexagon HVX instructions",
[ExtensionHVX,ExtensionHVXV60, ExtensionHVXV62]>;
-def ExtensionHVX64B
- : SubtargetFeature<"hvx-length64b", "UseHVX64BOps", "true",
- "Hexagon HVX 64B instructions", [ExtensionHVX]>;
-def ExtensionHVX128B
- : SubtargetFeature<"hvx-length128b", "UseHVX128BOps", "true",
- "Hexagon HVX 128B instructions", [ExtensionHVX]>;
-
-// This is an alias to ExtensionHVX128B to accept the hvx-double as
-// an acceptable subtarget feature.
-def ExtensionHVXDbl
- : SubtargetFeature<"hvx-double", "UseHVX128BOps", "true",
- "Hexagon HVX 128B instructions", [ExtensionHVX128B]>;
+def ExtensionHVX64B: SubtargetFeature<"hvx-length64b", "UseHVX64BOps",
+ "true", "Hexagon HVX 64B instructions", [ExtensionHVX]>;
+def ExtensionHVX128B: SubtargetFeature<"hvx-length128b", "UseHVX128BOps",
+ "true", "Hexagon HVX 128B instructions", [ExtensionHVX]>;
+
+def FeaturePackets: SubtargetFeature<"packets", "UsePackets", "true",
+ "Support for instruction packets">;
def FeatureLongCalls: SubtargetFeature<"long-calls", "UseLongCalls", "true",
"Use constant-extended calls">;
def FeatureMemNoShuf: SubtargetFeature<"mem_noshuf", "HasMemNoShuf", "false",
"Supports mem_noshuf feature">;
-def FeatureDuplex : SubtargetFeature<"duplex", "EnableDuplex", "true",
+def FeatureMemops: SubtargetFeature<"memops", "UseMemops", "true",
+ "Use memop instructions">;
+def FeatureNVJ: SubtargetFeature<"nvj", "UseNewValueJumps", "true",
+ "Support for new-value jumps", [FeaturePackets]>;
+def FeatureNVS: SubtargetFeature<"nvs", "UseNewValueStores", "true",
+ "Support for new-value stores", [FeaturePackets]>;
+def FeatureSmallData: SubtargetFeature<"small-data", "UseSmallData", "true",
+ "Allow GP-relative addressing of global variables">;
+def FeatureDuplex: SubtargetFeature<"duplex", "EnableDuplex", "true",
"Enable generation of duplex instruction">;
+def FeatureReservedR19: SubtargetFeature<"reserved-r19", "ReservedR19",
+ "true", "Reserve register R19">;
//===----------------------------------------------------------------------===//
// Hexagon Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
-def UseMEMOP : Predicate<"HST->useMemOps()">;
-def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">;
+def UseMEMOPS : Predicate<"HST->useMemops()">;
def UseHVX64B : Predicate<"HST->useHVX64BOps()">,
AssemblerPredicate<"ExtensionHVX64B">;
def UseHVX128B : Predicate<"HST->useHVX128BOps()">,
@@ -75,10 +79,8 @@ def UseHVXV62 : Predicate<"HST->useHVXOps()">,
def UseHVXV65 : Predicate<"HST->useHVXOps()">,
AssemblerPredicate<"ExtensionHVXV65">;
-def Hvx64 : HwMode<"+hvx-length64b">;
-def Hvx64old : HwMode<"-hvx-double">;
-def Hvx128 : HwMode<"+hvx-length128b">;
-def Hvx128old : HwMode<"+hvx-double">;
+def Hvx64: HwMode<"+hvx-length64b">;
+def Hvx128: HwMode<"+hvx-length128b">;
//===----------------------------------------------------------------------===//
// Classes used for relation maps.
@@ -300,8 +302,10 @@ include "HexagonDepITypes.td"
include "HexagonInstrFormats.td"
include "HexagonDepInstrFormats.td"
include "HexagonDepInstrInfo.td"
+include "HexagonCallingConv.td"
include "HexagonPseudo.td"
include "HexagonPatterns.td"
+include "HexagonPatternsHVX.td"
include "HexagonPatternsV65.td"
include "HexagonDepMappings.td"
include "HexagonIntrinsics.td"
@@ -318,19 +322,34 @@ class Proc<string Name, SchedMachineModel Model,
list<SubtargetFeature> Features>
: ProcessorModel<Name, Model, Features>;
+def : Proc<"generic", HexagonModelV60,
+ [ArchV4, ArchV5, ArchV55, ArchV60,
+ FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
+ FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv4", HexagonModelV4,
- [ArchV4, FeatureDuplex]>;
+ [ArchV4,
+ FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
+ FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv5", HexagonModelV4,
- [ArchV4, ArchV5, FeatureDuplex]>;
+ [ArchV4, ArchV5,
+ FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
+ FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv55", HexagonModelV55,
- [ArchV4, ArchV5, ArchV55, FeatureDuplex]>;
+ [ArchV4, ArchV5, ArchV55,
+ FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
+ FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv60", HexagonModelV60,
- [ArchV4, ArchV5, ArchV55, ArchV60, FeatureDuplex]>;
+ [ArchV4, ArchV5, ArchV55, ArchV60,
+ FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
+ FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv62", HexagonModelV62,
- [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62, FeatureDuplex]>;
+ [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62,
+ FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS,
+ FeaturePackets, FeatureSmallData]>;
def : Proc<"hexagonv65", HexagonModelV65,
[ArchV4, ArchV5, ArchV55, ArchV60, ArchV62, ArchV65,
- FeatureMemNoShuf, FeatureDuplex]>;
+ FeatureDuplex, FeatureMemNoShuf, FeatureMemops, FeatureNVJ,
+ FeatureNVS, FeaturePackets, FeatureSmallData]>;
//===----------------------------------------------------------------------===//
// Declare the target which we are implementing
@@ -357,4 +376,5 @@ def Hexagon : Target {
let AssemblyParsers = [HexagonAsmParser];
let AssemblyParserVariants = [HexagonAsmParserVariant];
let AssemblyWriters = [HexagonAsmWriter];
+ let AllowRegisterRenaming = 1;
}
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 68b1fe6bf4b1..0ac83ea7c5fc 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -62,10 +62,6 @@ void HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
#define DEBUG_TYPE "asm-printer"
-static cl::opt<bool> AlignCalls(
- "hexagon-align-calls", cl::Hidden, cl::init(true),
- cl::desc("Insert falign after call instruction for Hexagon target"));
-
// Given a scalar register return its pair.
inline static unsigned getHexagonRegisterPair(unsigned Reg,
const MCRegisterInfo *RI) {
@@ -76,16 +72,13 @@ inline static unsigned getHexagonRegisterPair(unsigned Reg,
return Pair;
}
-HexagonAsmPrinter::HexagonAsmPrinter(TargetMachine &TM,
- std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)) {}
-
void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O) {
const MachineOperand &MO = MI->getOperand(OpNo);
switch (MO.getType()) {
- default: llvm_unreachable ("<unknown operand type>");
+ default:
+ llvm_unreachable ("<unknown operand type>");
case MachineOperand::MO_Register:
O << HexagonInstPrinter::getRegisterName(MO.getReg());
return;
@@ -112,8 +105,8 @@ void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
// for the case in which the basic block is reachable by a fall through but
// through an indirect from a jump table. In this case, the jump table
// will contain a label not defined by AsmPrinter.
-bool HexagonAsmPrinter::
-isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+bool HexagonAsmPrinter::isBlockOnlyReachableByFallthrough(
+ const MachineBasicBlock *MBB) const {
if (MBB->hasAddressTaken())
return false;
return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
@@ -167,7 +160,8 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
}
bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNo, unsigned AsmVariant,
+ unsigned OpNo,
+ unsigned AsmVariant,
const char *ExtraCode,
raw_ostream &O) {
if (ExtraCode && ExtraCode[0])
@@ -183,10 +177,10 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
if (Offset.isImm()) {
if (Offset.getImm())
- O << " + #" << Offset.getImm();
- }
- else
+ O << "+#" << Offset.getImm();
+ } else {
llvm_unreachable("Unimplemented");
+ }
return false;
}
@@ -285,7 +279,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
unsigned VectorSize = HRI.getRegSizeInBits(Hexagon::HvxVRRegClass) / 8;
switch (Inst.getOpcode()) {
- default: return;
+ default:
+ return;
case Hexagon::A2_iconst: {
Inst.setOpcode(Hexagon::A2_addi);
@@ -300,30 +295,40 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
break;
}
- case Hexagon::A2_tfrf:
+ case Hexagon::A2_tfrf: {
+ const MCConstantExpr *Zero = MCConstantExpr::create(0, OutContext);
Inst.setOpcode(Hexagon::A2_paddif);
- Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext)));
+ Inst.addOperand(MCOperand::createExpr(Zero));
break;
+ }
- case Hexagon::A2_tfrt:
+ case Hexagon::A2_tfrt: {
+ const MCConstantExpr *Zero = MCConstantExpr::create(0, OutContext);
Inst.setOpcode(Hexagon::A2_paddit);
- Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext)));
+ Inst.addOperand(MCOperand::createExpr(Zero));
break;
+ }
- case Hexagon::A2_tfrfnew:
+ case Hexagon::A2_tfrfnew: {
+ const MCConstantExpr *Zero = MCConstantExpr::create(0, OutContext);
Inst.setOpcode(Hexagon::A2_paddifnew);
- Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext)));
+ Inst.addOperand(MCOperand::createExpr(Zero));
break;
+ }
- case Hexagon::A2_tfrtnew:
+ case Hexagon::A2_tfrtnew: {
+ const MCConstantExpr *Zero = MCConstantExpr::create(0, OutContext);
Inst.setOpcode(Hexagon::A2_padditnew);
- Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext)));
+ Inst.addOperand(MCOperand::createExpr(Zero));
break;
+ }
- case Hexagon::A2_zxtb:
+ case Hexagon::A2_zxtb: {
+ const MCConstantExpr *C255 = MCConstantExpr::create(255, OutContext);
Inst.setOpcode(Hexagon::A2_andir);
- Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(255, OutContext)));
+ Inst.addOperand(MCOperand::createExpr(C255));
break;
+ }
// "$dst = CONST64(#$src1)",
case Hexagon::CONST64:
@@ -525,10 +530,12 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
bool Success = MO.getExpr()->evaluateAsAbsolute(Imm);
if (Success && Imm < 0) {
const MCExpr *MOne = MCConstantExpr::create(-1, OutContext);
- TmpInst.addOperand(MCOperand::createExpr(HexagonMCExpr::create(MOne, OutContext)));
+ const HexagonMCExpr *E = HexagonMCExpr::create(MOne, OutContext);
+ TmpInst.addOperand(MCOperand::createExpr(E));
} else {
const MCExpr *Zero = MCConstantExpr::create(0, OutContext);
- TmpInst.addOperand(MCOperand::createExpr(HexagonMCExpr::create(Zero, OutContext)));
+ const HexagonMCExpr *E = HexagonMCExpr::create(Zero, OutContext);
+ TmpInst.addOperand(MCOperand::createExpr(E));
}
TmpInst.addOperand(MO);
MappedInst = TmpInst;
@@ -569,9 +576,9 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
MO.setReg(High);
// Add a new operand for the second register in the pair.
MappedInst.addOperand(MCOperand::createReg(Low));
- MappedInst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew)
- ? Hexagon::C2_ccombinewnewt
- : Hexagon::C2_ccombinewnewf);
+ MappedInst.setOpcode(Inst.getOpcode() == Hexagon::A2_tfrptnew
+ ? Hexagon::C2_ccombinewnewt
+ : Hexagon::C2_ccombinewnewf);
return;
}
@@ -615,6 +622,7 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
MappedInst = TmpInst;
return;
}
+
case Hexagon::V6_vdd0: {
MCInst TmpInst;
assert (Inst.getOperand(0).isReg() &&
@@ -627,6 +635,7 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
MappedInst = TmpInst;
return;
}
+
case Hexagon::V6_vL32Ub_pi:
case Hexagon::V6_vL32b_cur_pi:
case Hexagon::V6_vL32b_nt_cur_pi:
@@ -735,12 +744,10 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
case Hexagon::V6_vS32b_srls_pi:
MappedInst = ScaleVectorOffset(Inst, 2, VectorSize, OutContext);
return;
-
}
}
-/// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to
-/// the current output stream.
+/// Print out a single Hexagon MI to the current output stream.
void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst MCB;
MCB.setOpcode(Hexagon::BUNDLE);
@@ -748,21 +755,27 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MCInstrInfo &MCII = *Subtarget->getInstrInfo();
if (MI->isBundle()) {
+ assert(Subtarget->usePackets() && "Support for packets is disabled");
const MachineBasicBlock* MBB = MI->getParent();
MachineBasicBlock::const_instr_iterator MII = MI->getIterator();
for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII)
- if (!MII->isDebugValue() && !MII->isImplicitDef())
+ if (!MII->isDebugInstr() && !MII->isImplicitDef())
HexagonLowerToMC(MCII, &*MII, MCB, *this);
- }
- else
+ } else {
HexagonLowerToMC(MCII, MI, MCB, *this);
+ }
+
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+ if (MI->isBundle() && HII.getBundleNoShuf(*MI))
+ HexagonMCInstrInfo::setMemReorderDisabled(MCB);
- bool Ok = HexagonMCInstrInfo::canonicalizePacket(
- MCII, *Subtarget, OutStreamer->getContext(), MCB, nullptr);
- assert(Ok);
- (void)Ok;
- if(HexagonMCInstrInfo::bundleSize(MCB) == 0)
+ MCContext &Ctx = OutStreamer->getContext();
+ bool Ok = HexagonMCInstrInfo::canonicalizePacket(MCII, *Subtarget, Ctx,
+ MCB, nullptr);
+ assert(Ok); (void)Ok;
+ if (HexagonMCInstrInfo::bundleSize(MCB) == 0)
return;
OutStreamer->EmitInstruction(MCB, getSubtargetInfo());
}
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h
index 4b8865672cf4..d0629d173a65 100755
--- a/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -18,7 +18,8 @@
#include "HexagonSubtarget.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include <memory>
+#include "llvm/MC/MCStreamer.h"
+#include <utility>
namespace llvm {
@@ -32,7 +33,8 @@ class TargetMachine;
public:
explicit HexagonAsmPrinter(TargetMachine &TM,
- std::unique_ptr<MCStreamer> Streamer);
+ std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)) {}
bool runOnMachineFunction(MachineFunction &Fn) override {
Subtarget = &Fn.getSubtarget<HexagonSubtarget>();
@@ -43,13 +45,11 @@ class TargetMachine;
return "Hexagon Assembly Printer";
}
- bool isBlockOnlyReachableByFallthrough(
- const MachineBasicBlock *MBB) const override;
+ bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+ const override;
void EmitInstruction(const MachineInstr *MI) override;
-
- void HexagonProcessInstruction(MCInst &Inst,
- const MachineInstr &MBB);
+ void HexagonProcessInstruction(MCInst &Inst, const MachineInstr &MBB);
void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -58,8 +58,6 @@ class TargetMachine;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &OS) override;
-
- static const char *getRegisterName(unsigned RegNo);
};
} // end namespace llvm
diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 9e73766b6fdc..4791b067aa8d 100644
--- a/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -184,9 +184,7 @@ namespace {
public:
static char ID;
- HexagonBitSimplify() : MachineFunctionPass(ID) {
- initializeHexagonBitSimplifyPass(*PassRegistry::getPassRegistry());
- }
+ HexagonBitSimplify() : MachineFunctionPass(ID) {}
StringRef getPassName() const override {
return "Hexagon bit simplification";
@@ -257,10 +255,10 @@ namespace {
char HexagonBitSimplify::ID = 0;
-INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexbit",
+INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexagon-bit-simplify",
"Hexagon bit simplification", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit",
+INITIALIZE_PASS_END(HexagonBitSimplify, "hexagon-bit-simplify",
"Hexagon bit simplification", false, false)
bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T,
@@ -622,7 +620,7 @@ bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits,
// operand may be a subregister of a larger register, while Bits would
// correspond to the larger register in its entirety. Because of that,
// the parameter Begin can be used to indicate which bit of Bits should be
-// considered the LSB of of the operand.
+// considered the LSB of the operand.
bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN,
BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) {
using namespace Hexagon;
@@ -2452,7 +2450,7 @@ bool BitSimplification::simplifyExtractLow(MachineInstr *MI,
if (Len == RW)
return false;
- DEBUG({
+ LLVM_DEBUG({
dbgs() << __func__ << " on reg: " << printReg(RD.Reg, &HRI, RD.Sub)
<< ", MI: " << *MI;
dbgs() << "Cell: " << RC << '\n';
@@ -2646,7 +2644,7 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
const HexagonEvaluator HE(HRI, MRI, HII, MF);
BitTracker BT(HE, MF);
- DEBUG(BT.trace(true));
+ LLVM_DEBUG(BT.trace(true));
BT.run();
MachineBasicBlock &Entry = MF.front();
@@ -2977,7 +2975,8 @@ void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
}
bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
- DEBUG(dbgs() << "Processing loop in " << printMBBReference(*C.LB) << "\n");
+ LLVM_DEBUG(dbgs() << "Processing loop in " << printMBBReference(*C.LB)
+ << "\n");
std::vector<PhiInfo> Phis;
for (auto &I : *C.LB) {
if (!I.isPHI())
@@ -3001,7 +3000,7 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
Phis.push_back(PhiInfo(I, *C.LB));
}
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Phis: {";
for (auto &I : Phis) {
dbgs() << ' ' << printReg(I.DefR, HRI) << "=phi("
@@ -3122,7 +3121,7 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
Groups.push_back(G);
}
- DEBUG({
+ LLVM_DEBUG({
for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
InstrGroup &G = Groups[i];
dbgs() << "Group[" << i << "] inp: "
@@ -3190,7 +3189,7 @@ bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
BitTracker BT(HE, MF);
- DEBUG(BT.trace(true));
+ LLVM_DEBUG(BT.trace(true));
BT.run();
BTP = &BT;
diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp
index b6e220beb0c6..e13cfd3f655a 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -325,7 +325,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
int FI = op(1).getIndex();
int Off = op(2).getImm();
unsigned A = MFI.getObjectAlignment(FI) + std::abs(Off);
- unsigned L = Log2_32(A);
+ unsigned L = countTrailingZeros(A);
RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0);
RC.fill(0, L, BT::BitValue::Zero);
return rr0(RC, Outputs);
diff --git a/lib/Target/Hexagon/HexagonBlockRanges.cpp b/lib/Target/Hexagon/HexagonBlockRanges.cpp
index ff915ca59dae..48a4505458ae 100644
--- a/lib/Target/Hexagon/HexagonBlockRanges.cpp
+++ b/lib/Target/Hexagon/HexagonBlockRanges.cpp
@@ -85,7 +85,7 @@ void HexagonBlockRanges::RangeList::unionize(bool MergeAdjacent) {
if (empty())
return;
- std::sort(begin(), end());
+ llvm::sort(begin(), end());
iterator Iter = begin();
while (Iter != end()-1) {
@@ -160,7 +160,7 @@ HexagonBlockRanges::InstrIndexMap::InstrIndexMap(MachineBasicBlock &B)
IndexType Idx = IndexType::First;
First = Idx;
for (auto &In : B) {
- if (In.isDebugValue())
+ if (In.isDebugInstr())
continue;
assert(getIndex(&In) == IndexType::None && "Instruction already in map");
Map.insert(std::make_pair(Idx, &In));
@@ -314,7 +314,7 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
RegisterSet Defs, Clobbers;
for (auto &In : B) {
- if (In.isDebugValue())
+ if (In.isDebugInstr())
continue;
IndexType Index = IndexMap.getIndex(&In);
// Process uses first.
@@ -422,10 +422,10 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeLiveMap(
InstrIndexMap &IndexMap) {
RegToRangeMap LiveMap;
- DEBUG(dbgs() << __func__ << ": index map\n" << IndexMap << '\n');
+ LLVM_DEBUG(dbgs() << __func__ << ": index map\n" << IndexMap << '\n');
computeInitialLiveRanges(IndexMap, LiveMap);
- DEBUG(dbgs() << __func__ << ": live map\n"
- << PrintRangeMap(LiveMap, TRI) << '\n');
+ LLVM_DEBUG(dbgs() << __func__ << ": live map\n"
+ << PrintRangeMap(LiveMap, TRI) << '\n');
return LiveMap;
}
@@ -486,8 +486,8 @@ HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeDeadMap(
if (TargetRegisterInfo::isVirtualRegister(P.first.Reg))
addDeadRanges(P.first);
- DEBUG(dbgs() << __func__ << ": dead map\n"
- << PrintRangeMap(DeadMap, TRI) << '\n');
+ LLVM_DEBUG(dbgs() << __func__ << ": dead map\n"
+ << PrintRangeMap(DeadMap, TRI) << '\n');
return DeadMap;
}
diff --git a/lib/Target/Hexagon/HexagonBranchRelaxation.cpp b/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
index 84af4b14b9f7..2fa7888dd02b 100644
--- a/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
+++ b/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
@@ -90,7 +90,7 @@ FunctionPass *llvm::createHexagonBranchRelaxation() {
}
bool HexagonBranchRelaxation::runOnMachineFunction(MachineFunction &MF) {
- DEBUG(dbgs() << "****** Hexagon Branch Relaxation ******\n");
+ LLVM_DEBUG(dbgs() << "****** Hexagon Branch Relaxation ******\n");
auto &HST = MF.getSubtarget<HexagonSubtarget>();
HII = HST.getInstrInfo();
@@ -114,8 +114,12 @@ void HexagonBranchRelaxation::computeOffset(MachineFunction &MF,
InstOffset = (InstOffset + ByteAlign) & ~(ByteAlign);
}
OffsetMap[&B] = InstOffset;
- for (auto &MI : B.instrs())
+ for (auto &MI : B.instrs()) {
InstOffset += HII->getSize(MI);
+ // Assume that all extendable branches will be extended.
+ if (MI.isBranch() && HII->isExtendable(MI))
+ InstOffset += HEXAGON_INSTR_SIZE;
+ }
}
}
@@ -145,6 +149,9 @@ bool HexagonBranchRelaxation::isJumpOutOfRange(MachineInstr &MI,
if (FirstTerm == B.instr_end())
return false;
+ if (HII->isExtended(MI))
+ return false;
+
unsigned InstOffset = BlockToInstOffset[&B];
unsigned Distance = 0;
@@ -193,14 +200,14 @@ bool HexagonBranchRelaxation::reGenerateBranch(MachineFunction &MF,
for (auto &MI : B) {
if (!MI.isBranch() || !isJumpOutOfRange(MI, BlockToInstOffset))
continue;
- DEBUG(dbgs() << "Long distance jump. isExtendable("
- << HII->isExtendable(MI) << ") isConstExtended("
- << HII->isConstExtended(MI) << ") " << MI);
+ LLVM_DEBUG(dbgs() << "Long distance jump. isExtendable("
+ << HII->isExtendable(MI) << ") isConstExtended("
+ << HII->isConstExtended(MI) << ") " << MI);
// Since we have not merged HW loops relaxation into
// this code (yet), soften our approach for the moment.
if (!HII->isExtendable(MI) && !HII->isExtended(MI)) {
- DEBUG(dbgs() << "\tUnderimplemented relax branch instruction.\n");
+ LLVM_DEBUG(dbgs() << "\tUnderimplemented relax branch instruction.\n");
} else {
// Find which operand is expandable.
int ExtOpNum = HII->getCExtOpNum(MI);
diff --git a/lib/Target/Hexagon/HexagonCallingConv.td b/lib/Target/Hexagon/HexagonCallingConv.td
new file mode 100644
index 000000000000..ed2f87570d6b
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCallingConv.td
@@ -0,0 +1,134 @@
+//===- HexagonCallingConv.td ----------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class CCIfArgIsVarArg<CCAction A>
+ : CCIf<"State.isVarArg() && "
+ "ValNo >= static_cast<HexagonCCState&>(State)"
+ ".getNumNamedVarArgParams()", A>;
+
+def CC_HexagonStack: CallingConv<[
+ CCIfType<[i32,v2i16,v4i8],
+ CCAssignToStack<4,4>>,
+ CCIfType<[i64,v2i32,v4i16,v8i8],
+ CCAssignToStack<8,8>>
+]>;
+
+def CC_Hexagon: CallingConv<[
+ CCIfType<[i1,i8,i16],
+ CCPromoteToType<i32>>,
+ CCIfType<[f32],
+ CCBitConvertToType<i32>>,
+ CCIfType<[f64],
+ CCBitConvertToType<i64>>,
+
+ CCIfByVal<
+ CCPassByVal<8,8>>,
+ CCIfArgIsVarArg<
+ CCDelegateTo<CC_HexagonStack>>,
+
+ // Pass split values in pairs, allocate odd register if necessary.
+ CCIfType<[i32],
+ CCIfSplit<
+ CCCustom<"CC_SkipOdd">>>,
+
+ CCIfType<[i32,v2i16,v4i8],
+ CCAssignToReg<[R0,R1,R2,R3,R4,R5]>>,
+ // Make sure to allocate any skipped 32-bit register, so it does not get
+ // allocated to a subsequent 32-bit value.
+ CCIfType<[i64,v2i32,v4i16,v8i8],
+ CCCustom<"CC_SkipOdd">>,
+ CCIfType<[i64,v2i32,v4i16,v8i8],
+ CCAssignToReg<[D0,D1,D2]>>,
+
+ CCDelegateTo<CC_HexagonStack>
+]>;
+
+def RetCC_Hexagon: CallingConv<[
+ CCIfType<[i1,i8,i16],
+ CCPromoteToType<i32>>,
+ CCIfType<[f32],
+ CCBitConvertToType<i32>>,
+ CCIfType<[f64],
+ CCBitConvertToType<i64>>,
+
+ // Small structures are returned in a pair of registers, (which is
+ // always r1:0). In such case, what is returned are two i32 values
+ // without any additional information (in ArgFlags) stating that
+ // they are parts of a structure. Because of that there is no way
+ // to differentiate that situation from an attempt to return two
+ // values, so always assign R0 and R1.
+ CCIfSplit<
+ CCAssignToReg<[R0,R1]>>,
+ CCIfType<[i32,v2i16,v4i8],
+ CCAssignToReg<[R0,R1]>>,
+ CCIfType<[i64,v2i32,v4i16,v8i8],
+ CCAssignToReg<[D0]>>
+]>;
+
+
+class CCIfHvx64<CCAction A>
+ : CCIf<"State.getMachineFunction().getSubtarget<HexagonSubtarget>()"
+ ".useHVX64BOps()", A>;
+
+class CCIfHvx128<CCAction A>
+ : CCIf<"State.getMachineFunction().getSubtarget<HexagonSubtarget>()"
+ ".useHVX128BOps()", A>;
+
+def CC_Hexagon_HVX: CallingConv<[
+ // HVX 64-byte mode
+ CCIfHvx64<
+ CCIfType<[v16i32,v32i16,v64i8],
+ CCAssignToReg<[V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15]>>>,
+ CCIfHvx64<
+ CCIfType<[v32i32,v64i16,v128i8],
+ CCAssignToReg<[W0,W1,W2,W3,W4,W5,W6,W7]>>>,
+ CCIfHvx64<
+ CCIfType<[v16i32,v32i16,v64i8],
+ CCAssignToStack<64,64>>>,
+ CCIfHvx64<
+ CCIfType<[v32i32,v64i16,v128i8],
+ CCAssignToStack<128,64>>>,
+
+ // HVX 128-byte mode
+ CCIfHvx128<
+ CCIfType<[v32i32,v64i16,v128i8],
+ CCAssignToReg<[V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15]>>>,
+ CCIfHvx128<
+ CCIfType<[v64i32,v128i16,v256i8],
+ CCAssignToReg<[W0,W1,W2,W3,W4,W5,W6,W7]>>>,
+ CCIfHvx128<
+ CCIfType<[v32i32,v64i16,v128i8],
+ CCAssignToStack<128,128>>>,
+ CCIfHvx128<
+ CCIfType<[v64i32,v128i16,v256i8],
+ CCAssignToStack<256,128>>>,
+
+ CCDelegateTo<CC_Hexagon>
+]>;
+
+def RetCC_Hexagon_HVX: CallingConv<[
+ // HVX 64-byte mode
+ CCIfHvx64<
+ CCIfType<[v16i32,v32i16,v64i8],
+ CCAssignToReg<[V0]>>>,
+ CCIfHvx64<
+ CCIfType<[v32i32,v64i16,v128i8],
+ CCAssignToReg<[W0]>>>,
+
+ // HVX 128-byte mode
+ CCIfHvx128<
+ CCIfType<[v32i32,v64i16,v128i8],
+ CCAssignToReg<[V0]>>>,
+ CCIfHvx128<
+ CCIfType<[v64i32,v128i16,v256i8],
+ CCAssignToReg<[W0]>>>,
+
+ CCDelegateTo<RetCC_Hexagon>
+]>;
+
diff --git a/lib/Target/Hexagon/HexagonCommonGEP.cpp b/lib/Target/Hexagon/HexagonCommonGEP.cpp
index 7e3d049d337f..f315e24eba62 100644
--- a/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -36,7 +37,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -342,7 +342,7 @@ bool HexagonCommonGEP::isHandledGepForm(GetElementPtrInst *GepI) {
void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
ValueToNodeMap &NM) {
- DEBUG(dbgs() << "Visiting GEP: " << *GepI << '\n');
+ LLVM_DEBUG(dbgs() << "Visiting GEP: " << *GepI << '\n');
GepNode *N = new (*Mem) GepNode;
Value *PtrOp = GepI->getPointerOperand();
uint32_t InBounds = GepI->isInBounds() ? GepNode::InBounds : 0;
@@ -426,7 +426,7 @@ void HexagonCommonGEP::collect() {
}
}
- DEBUG(dbgs() << "Gep nodes after initial collection:\n" << Nodes);
+ LLVM_DEBUG(dbgs() << "Gep nodes after initial collection:\n" << Nodes);
}
static void invert_find_roots(const NodeVect &Nodes, NodeChildrenMap &NCM,
@@ -575,7 +575,7 @@ void HexagonCommonGEP::common() {
}
}
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Gep node equality:\n";
for (NodePairSet::iterator I = Eq.begin(), E = Eq.end(); I != E; ++I)
dbgs() << "{ " << I->first << ", " << I->second << " }\n";
@@ -642,7 +642,7 @@ void HexagonCommonGEP::common() {
N->Parent = Rep;
}
- DEBUG(dbgs() << "Gep nodes after commoning:\n" << Nodes);
+ LLVM_DEBUG(dbgs() << "Gep nodes after commoning:\n" << Nodes);
// Finally, erase the nodes that are no longer used.
NodeSet Erase;
@@ -662,35 +662,35 @@ void HexagonCommonGEP::common() {
NodeVect::iterator NewE = remove_if(Nodes, in_set(Erase));
Nodes.resize(std::distance(Nodes.begin(), NewE));
- DEBUG(dbgs() << "Gep nodes after post-commoning cleanup:\n" << Nodes);
+ LLVM_DEBUG(dbgs() << "Gep nodes after post-commoning cleanup:\n" << Nodes);
}
template <typename T>
static BasicBlock *nearest_common_dominator(DominatorTree *DT, T &Blocks) {
- DEBUG({
- dbgs() << "NCD of {";
- for (typename T::iterator I = Blocks.begin(), E = Blocks.end();
- I != E; ++I) {
- if (!*I)
- continue;
- BasicBlock *B = cast<BasicBlock>(*I);
- dbgs() << ' ' << B->getName();
- }
- dbgs() << " }\n";
- });
+ LLVM_DEBUG({
+ dbgs() << "NCD of {";
+ for (typename T::iterator I = Blocks.begin(), E = Blocks.end(); I != E;
+ ++I) {
+ if (!*I)
+ continue;
+ BasicBlock *B = cast<BasicBlock>(*I);
+ dbgs() << ' ' << B->getName();
+ }
+ dbgs() << " }\n";
+ });
- // Allow null basic blocks in Blocks. In such cases, return nullptr.
- typename T::iterator I = Blocks.begin(), E = Blocks.end();
- if (I == E || !*I)
+ // Allow null basic blocks in Blocks. In such cases, return nullptr.
+ typename T::iterator I = Blocks.begin(), E = Blocks.end();
+ if (I == E || !*I)
+ return nullptr;
+ BasicBlock *Dom = cast<BasicBlock>(*I);
+ while (++I != E) {
+ BasicBlock *B = cast_or_null<BasicBlock>(*I);
+ Dom = B ? DT->findNearestCommonDominator(Dom, B) : nullptr;
+ if (!Dom)
return nullptr;
- BasicBlock *Dom = cast<BasicBlock>(*I);
- while (++I != E) {
- BasicBlock *B = cast_or_null<BasicBlock>(*I);
- Dom = B ? DT->findNearestCommonDominator(Dom, B) : nullptr;
- if (!Dom)
- return nullptr;
}
- DEBUG(dbgs() << "computed:" << Dom->getName() << '\n');
+ LLVM_DEBUG(dbgs() << "computed:" << Dom->getName() << '\n');
return Dom;
}
@@ -753,7 +753,7 @@ static bool is_empty(const BasicBlock *B) {
BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node,
NodeChildrenMap &NCM, NodeToValueMap &Loc) {
- DEBUG(dbgs() << "Loc for node:" << Node << '\n');
+ LLVM_DEBUG(dbgs() << "Loc for node:" << Node << '\n');
// Recalculate the placement for Node, assuming that the locations of
// its children in Loc are valid.
// Return nullptr if there is no valid placement for Node (for example, it
@@ -820,7 +820,7 @@ BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node,
BasicBlock *HexagonCommonGEP::recalculatePlacementRec(GepNode *Node,
NodeChildrenMap &NCM, NodeToValueMap &Loc) {
- DEBUG(dbgs() << "LocRec begin for node:" << Node << '\n');
+ LLVM_DEBUG(dbgs() << "LocRec begin for node:" << Node << '\n');
// Recalculate the placement of Node, after recursively recalculating the
// placements of all its children.
NodeChildrenMap::iterator CF = NCM.find(Node);
@@ -830,7 +830,7 @@ BasicBlock *HexagonCommonGEP::recalculatePlacementRec(GepNode *Node,
recalculatePlacementRec(*I, NCM, Loc);
}
BasicBlock *LB = recalculatePlacement(Node, NCM, Loc);
- DEBUG(dbgs() << "LocRec end for node:" << Node << '\n');
+ LLVM_DEBUG(dbgs() << "LocRec end for node:" << Node << '\n');
return LB;
}
@@ -952,8 +952,8 @@ namespace {
void HexagonCommonGEP::separateChainForNode(GepNode *Node, Use *U,
NodeToValueMap &Loc) {
User *R = U->getUser();
- DEBUG(dbgs() << "Separating chain for node (" << Node << ") user: "
- << *R << '\n');
+ LLVM_DEBUG(dbgs() << "Separating chain for node (" << Node << ") user: " << *R
+ << '\n');
BasicBlock *PB = cast<Instruction>(R)->getParent();
GepNode *N = Node;
@@ -996,7 +996,7 @@ void HexagonCommonGEP::separateChainForNode(GepNode *Node, Use *U,
// Should at least have U in NewUs.
NewNode->Flags |= GepNode::Used;
- DEBUG(dbgs() << "new node: " << NewNode << " " << *NewNode << '\n');
+ LLVM_DEBUG(dbgs() << "new node: " << NewNode << " " << *NewNode << '\n');
assert(!NewUs.empty());
Uses[NewNode] = NewUs;
}
@@ -1007,7 +1007,7 @@ void HexagonCommonGEP::separateConstantChains(GepNode *Node,
NodeSet Ns;
nodes_for_root(Node, NCM, Ns);
- DEBUG(dbgs() << "Separating constant chains for node: " << Node << '\n');
+ LLVM_DEBUG(dbgs() << "Separating constant chains for node: " << Node << '\n');
// Collect all used nodes together with the uses from loads and stores,
// where the GEP node could be folded into the load/store instruction.
NodeToUsesMap FNs; // Foldable nodes.
@@ -1044,7 +1044,7 @@ void HexagonCommonGEP::separateConstantChains(GepNode *Node,
FNs.insert(std::make_pair(N, LSs));
}
- DEBUG(dbgs() << "Nodes with foldable users:\n" << FNs);
+ LLVM_DEBUG(dbgs() << "Nodes with foldable users:\n" << FNs);
for (NodeToUsesMap::iterator I = FNs.begin(), E = FNs.end(); I != E; ++I) {
GepNode *N = I->first;
@@ -1066,32 +1066,33 @@ void HexagonCommonGEP::computeNodePlacement(NodeToValueMap &Loc) {
for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I)
recalculatePlacementRec(*I, NCM, Loc);
- DEBUG(dbgs() << "Initial node placement:\n" << LocationAsBlock(Loc));
+ LLVM_DEBUG(dbgs() << "Initial node placement:\n" << LocationAsBlock(Loc));
if (OptEnableInv) {
for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I)
adjustForInvariance(*I, NCM, Loc);
- DEBUG(dbgs() << "Node placement after adjustment for invariance:\n"
- << LocationAsBlock(Loc));
+ LLVM_DEBUG(dbgs() << "Node placement after adjustment for invariance:\n"
+ << LocationAsBlock(Loc));
}
if (OptEnableConst) {
for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I)
separateConstantChains(*I, NCM, Loc);
}
- DEBUG(dbgs() << "Node use information:\n" << Uses);
+ LLVM_DEBUG(dbgs() << "Node use information:\n" << Uses);
// At the moment, there is no further refinement of the initial placement.
// Such a refinement could include splitting the nodes if they are placed
// too far from some of its users.
- DEBUG(dbgs() << "Final node placement:\n" << LocationAsBlock(Loc));
+ LLVM_DEBUG(dbgs() << "Final node placement:\n" << LocationAsBlock(Loc));
}
Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
BasicBlock *LocB) {
- DEBUG(dbgs() << "Fabricating GEP in " << LocB->getName()
- << " for nodes:\n" << NA);
+ LLVM_DEBUG(dbgs() << "Fabricating GEP in " << LocB->getName()
+ << " for nodes:\n"
+ << NA);
unsigned Num = NA.size();
GepNode *RN = NA[0];
assert((RN->Flags & GepNode::Root) && "Creating GEP for non-root");
@@ -1128,7 +1129,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
Type *ElTy = cast<PointerType>(InpTy->getScalarType())->getElementType();
NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", &*At);
NewInst->setIsInBounds(RN->Flags & GepNode::InBounds);
- DEBUG(dbgs() << "new GEP: " << *NewInst << '\n');
+ LLVM_DEBUG(dbgs() << "new GEP: " << *NewInst << '\n');
Input = NewInst;
} while (nax <= Num);
@@ -1161,7 +1162,7 @@ void HexagonCommonGEP::getAllUsersForNode(GepNode *Node, ValueVect &Values,
}
void HexagonCommonGEP::materialize(NodeToValueMap &Loc) {
- DEBUG(dbgs() << "Nodes before materialization:\n" << Nodes << '\n');
+ LLVM_DEBUG(dbgs() << "Nodes before materialization:\n" << Nodes << '\n');
NodeChildrenMap NCM;
NodeVect Roots;
// Compute the inversion again, since computing placement could alter
diff --git a/lib/Target/Hexagon/HexagonConstExtenders.cpp b/lib/Target/Hexagon/HexagonConstExtenders.cpp
index 294a6da69f51..cbce61bc63c9 100644
--- a/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -39,31 +39,57 @@ namespace llvm {
FunctionPass *createHexagonConstExtenders();
}
+static int32_t adjustUp(int32_t V, uint8_t A, uint8_t O) {
+ assert(isPowerOf2_32(A));
+ int32_t U = (V & -A) + O;
+ return U >= V ? U : U+A;
+}
+
+static int32_t adjustDown(int32_t V, uint8_t A, uint8_t O) {
+ assert(isPowerOf2_32(A));
+ int32_t U = (V & -A) + O;
+ return U <= V ? U : U-A;
+}
+
namespace {
struct OffsetRange {
+ // The range of values between Min and Max that are of form Align*N+Offset,
+ // for some integer N. Min and Max are required to be of that form as well,
+ // except in the case of an empty range.
int32_t Min = INT_MIN, Max = INT_MAX;
uint8_t Align = 1;
+ uint8_t Offset = 0;
OffsetRange() = default;
- OffsetRange(int32_t L, int32_t H, uint8_t A)
- : Min(L), Max(H), Align(A) {}
+ OffsetRange(int32_t L, int32_t H, uint8_t A, uint8_t O = 0)
+ : Min(L), Max(H), Align(A), Offset(O) {}
OffsetRange &intersect(OffsetRange A) {
- Align = std::max(Align, A.Align);
- Min = std::max(Min, A.Min);
- Max = std::min(Max, A.Max);
+ if (Align < A.Align)
+ std::swap(*this, A);
+
+ // Align >= A.Align.
+ if (Offset >= A.Offset && (Offset - A.Offset) % A.Align == 0) {
+ Min = adjustUp(std::max(Min, A.Min), Align, Offset);
+ Max = adjustDown(std::min(Max, A.Max), Align, Offset);
+ } else {
+ // Make an empty range.
+ Min = 0;
+ Max = -1;
+ }
// Canonicalize empty ranges.
if (Min > Max)
std::tie(Min, Max, Align) = std::make_tuple(0, -1, 1);
return *this;
}
OffsetRange &shift(int32_t S) {
- assert(alignTo(std::abs(S), Align) == uint64_t(std::abs(S)));
Min += S;
Max += S;
+ Offset = (Offset+S) % Align;
return *this;
}
OffsetRange &extendBy(int32_t D) {
// If D < 0, extend Min, otherwise extend Max.
+ assert(D % Align == 0);
if (D < 0)
Min = (INT_MIN-D < Min) ? Min+D : INT_MIN;
else
@@ -74,7 +100,7 @@ namespace {
return Min > Max;
}
bool contains(int32_t V) const {
- return Min <= V && V <= Max && (V % Align) == 0;
+ return Min <= V && V <= Max && (V-Offset) % Align == 0;
}
bool operator==(const OffsetRange &R) const {
return Min == R.Min && Max == R.Max && Align == R.Align;
@@ -408,7 +434,8 @@ namespace {
raw_ostream &operator<< (raw_ostream &OS, const OffsetRange &OR) {
if (OR.Min > OR.Max)
OS << '!';
- OS << '[' << OR.Min << ',' << OR.Max << "]a" << unsigned(OR.Align);
+ OS << '[' << OR.Min << ',' << OR.Max << "]a" << unsigned(OR.Align)
+ << '+' << unsigned(OR.Offset);
return OS;
}
@@ -703,9 +730,21 @@ bool HCE::ExtRoot::operator< (const HCE::ExtRoot &ER) const {
}
case MachineOperand::MO_ExternalSymbol:
return StringRef(V.SymbolName) < StringRef(ER.V.SymbolName);
- case MachineOperand::MO_GlobalAddress:
- assert(V.GV->hasName() && ER.V.GV->hasName());
- return V.GV->getName() < ER.V.GV->getName();
+ case MachineOperand::MO_GlobalAddress: {
+ // Global values may not have names, so compare their positions
+ // in the parent module.
+ const Module &M = *V.GV->getParent();
+ auto FindPos = [&M] (const GlobalValue &V) {
+ unsigned P = 0;
+ for (const GlobalValue &T : M.global_values()) {
+ if (&T == &V)
+ return P;
+ P++;
+ }
+ llvm_unreachable("Global value not found in module");
+ };
+ return FindPos(*V.GV) < FindPos(*ER.V.GV);
+ }
case MachineOperand::MO_BlockAddress: {
const BasicBlock *ThisB = V.BA->getBasicBlock();
const BasicBlock *OtherB = ER.V.BA->getBasicBlock();
@@ -999,15 +1038,19 @@ unsigned HCE::getDirectRegReplacement(unsigned ExtOpc) const {
return 0;
}
-// Return the allowable deviation from the current value of Rb which the
+// Return the allowable deviation from the current value of Rb (i.e. the
+// range of values that can be added to the current value) which the
// instruction MI can accommodate.
// The instruction MI is a user of register Rb, which is defined via an
// extender. It may be possible for MI to be tweaked to work for a register
// defined with a slightly different value. For example
-// ... = L2_loadrub_io Rb, 0
+// ... = L2_loadrub_io Rb, 1
// can be modifed to be
-// ... = L2_loadrub_io Rb', 1
-// if Rb' = Rb-1.
+// ... = L2_loadrub_io Rb', 0
+// if Rb' = Rb+1.
+// The range for Rb would be [Min+1, Max+1], where [Min, Max] is a range
+// for L2_loadrub with offset 0. That means that Rb could be replaced with
+// Rc, where Rc-Rb belongs to [Min+1, Max+1].
OffsetRange HCE::getOffsetRange(Register Rb, const MachineInstr &MI) const {
unsigned Opc = MI.getOpcode();
// Instructions that are constant-extended may be replaced with something
@@ -1109,6 +1152,13 @@ void HCE::recordExtender(MachineInstr &MI, unsigned OpNum) {
bool IsLoad = MI.mayLoad();
bool IsStore = MI.mayStore();
+ // Fixed stack slots have negative indexes, and they cannot be used
+ // with TRI::stackSlot2Index and TRI::index2StackSlot. This is somewhat
+ // unfortunate, but should not be a frequent thing.
+ for (MachineOperand &Op : MI.operands())
+ if (Op.isFI() && Op.getIndex() < 0)
+ return;
+
if (IsLoad || IsStore) {
unsigned AM = HII->getAddrMode(MI);
switch (AM) {
@@ -1220,7 +1270,7 @@ void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
if (!ED.IsDef)
continue;
ExtValue EV(ED);
- DEBUG(dbgs() << " =" << I << ". " << EV << " " << ED << '\n');
+ LLVM_DEBUG(dbgs() << " =" << I << ". " << EV << " " << ED << '\n');
assert(ED.Rd.Reg != 0);
Ranges[I-Begin] = getOffsetRange(ED.Rd).shift(EV.Offset);
// A2_tfrsi is a special case: it will be replaced with A2_addi, which
@@ -1240,7 +1290,7 @@ void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
if (ED.IsDef)
continue;
ExtValue EV(ED);
- DEBUG(dbgs() << " " << I << ". " << EV << " " << ED << '\n');
+ LLVM_DEBUG(dbgs() << " " << I << ". " << EV << " " << ED << '\n');
OffsetRange Dev = getOffsetRange(ED);
Ranges[I-Begin].intersect(Dev.shift(EV.Offset));
}
@@ -1252,7 +1302,7 @@ void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
for (unsigned I = Begin; I != End; ++I)
RangeMap[Ranges[I-Begin]].insert(I);
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Ranges\n";
for (unsigned I = Begin; I != End; ++I)
dbgs() << " " << I << ". " << Ranges[I-Begin] << '\n';
@@ -1280,11 +1330,17 @@ void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
SmallVector<RangeTree::Node*,8> Nodes;
Tree.order(Nodes);
- auto MaxAlign = [](const SmallVectorImpl<RangeTree::Node*> &Nodes) {
- uint8_t Align = 1;
- for (RangeTree::Node *N : Nodes)
- Align = std::max(Align, N->Range.Align);
- return Align;
+ auto MaxAlign = [](const SmallVectorImpl<RangeTree::Node*> &Nodes,
+ uint8_t Align, uint8_t Offset) {
+ for (RangeTree::Node *N : Nodes) {
+ if (N->Range.Align <= Align || N->Range.Offset < Offset)
+ continue;
+ if ((N->Range.Offset - Offset) % Align != 0)
+ continue;
+ Align = N->Range.Align;
+ Offset = N->Range.Offset;
+ }
+ return std::make_pair(Align, Offset);
};
// Construct the set of all potential definition points from the endpoints
@@ -1294,14 +1350,14 @@ void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
std::set<int32_t> CandSet;
for (RangeTree::Node *N : Nodes) {
const OffsetRange &R = N->Range;
- uint8_t A0 = MaxAlign(Tree.nodesWith(R.Min, false));
+ auto P0 = MaxAlign(Tree.nodesWith(R.Min, false), R.Align, R.Offset);
CandSet.insert(R.Min);
- if (R.Align < A0)
- CandSet.insert(R.Min < 0 ? -alignDown(-R.Min, A0) : alignTo(R.Min, A0));
- uint8_t A1 = MaxAlign(Tree.nodesWith(R.Max, false));
+ if (R.Align < P0.first)
+ CandSet.insert(adjustUp(R.Min, P0.first, P0.second));
+ auto P1 = MaxAlign(Tree.nodesWith(R.Max, false), R.Align, R.Offset);
CandSet.insert(R.Max);
- if (R.Align < A1)
- CandSet.insert(R.Max < 0 ? -alignTo(-R.Max, A1) : alignDown(R.Max, A1));
+ if (R.Align < P1.first)
+ CandSet.insert(adjustDown(R.Max, P1.first, P1.second));
}
// Build the assignment map: candidate C -> { list of extender indexes }.
@@ -1340,7 +1396,7 @@ void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
}
}
- DEBUG(dbgs() << "IMap (before fixup) = " << PrintIMap(IMap, *HRI));
+ LLVM_DEBUG(dbgs() << "IMap (before fixup) = " << PrintIMap(IMap, *HRI));
// There is some ambiguity in what initializer should be used, if the
// descriptor's subexpression is non-trivial: it can be the entire
@@ -1359,10 +1415,50 @@ void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
AssignmentMap::iterator F = IMap.find({EV, ExtExpr()});
if (F == IMap.end())
continue;
+
// Finally, check if all extenders have the same value as the initializer.
- auto SameValue = [&EV,this](unsigned I) {
+ // Make sure that extenders that are a part of a stack address are not
+ // merged with those that aren't. Stack addresses need an offset field
+ // (to be used by frame index elimination), while non-stack expressions
+ // can be replaced with forms (such as rr) that do not have such a field.
+ // Example:
+ //
+ // Collected 3 extenders
+ // =2. imm:0 off:32968 bb#2: %7 = ## + __ << 0, def
+ // 0. imm:0 off:267 bb#0: __ = ## + SS#1 << 0
+ // 1. imm:0 off:267 bb#1: __ = ## + SS#1 << 0
+ // Ranges
+ // 0. [-756,267]a1+0
+ // 1. [-756,267]a1+0
+ // 2. [201,65735]a1+0
+ // RangeMap
+ // [-756,267]a1+0 -> 0 1
+ // [201,65735]a1+0 -> 2
+ // IMap (before fixup) = {
+ // [imm:0 off:267, ## + __ << 0] -> { 2 }
+ // [imm:0 off:267, ## + SS#1 << 0] -> { 0 1 }
+ // }
+ // IMap (after fixup) = {
+ // [imm:0 off:267, ## + __ << 0] -> { 2 0 1 }
+ // [imm:0 off:267, ## + SS#1 << 0] -> { }
+ // }
+ // Inserted def in bb#0 for initializer: [imm:0 off:267, ## + __ << 0]
+ // %12:intregs = A2_tfrsi 267
+ //
+ // The result was
+ // %12:intregs = A2_tfrsi 267
+ // S4_pstorerbt_rr %3, %12, %stack.1, 0, killed %4
+ // Which became
+ // r0 = #267
+ // if (p0.new) memb(r0+r29<<#4) = r2
+
+ bool IsStack = any_of(F->second, [this](unsigned I) {
+ return Extenders[I].Expr.Rs.isSlot();
+ });
+ auto SameValue = [&EV,this,IsStack](unsigned I) {
const ExtDesc &ED = Extenders[I];
- return ExtValue(ED).Offset == EV.Offset;
+ return ED.Expr.Rs.isSlot() == IsStack &&
+ ExtValue(ED).Offset == EV.Offset;
};
if (all_of(P.second, SameValue)) {
F->second.insert(P.second.begin(), P.second.end());
@@ -1370,7 +1466,7 @@ void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
}
}
- DEBUG(dbgs() << "IMap (after fixup) = " << PrintIMap(IMap, *HRI));
+ LLVM_DEBUG(dbgs() << "IMap (after fixup) = " << PrintIMap(IMap, *HRI));
}
void HCE::calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs,
@@ -1473,9 +1569,9 @@ HCE::Register HCE::insertInitializer(Loc DefL, const ExtenderInit &ExtI) {
assert(InitI);
(void)InitI;
- DEBUG(dbgs() << "Inserted def in bb#" << MBB.getNumber()
- << " for initializer: " << PrintInit(ExtI, *HRI)
- << "\n " << *InitI);
+ LLVM_DEBUG(dbgs() << "Inserted def in bb#" << MBB.getNumber()
+ << " for initializer: " << PrintInit(ExtI, *HRI) << "\n "
+ << *InitI);
return { DefR, 0 };
}
@@ -1618,7 +1714,7 @@ bool HCE::replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI,
assert(IdxOpc == Hexagon::A2_addi);
// Clamp Diff to the 16 bit range.
- int32_t D = isInt<16>(Diff) ? Diff : (Diff > 32767 ? 32767 : -32767);
+ int32_t D = isInt<16>(Diff) ? Diff : (Diff > 0 ? 32767 : -32768);
BuildMI(MBB, At, dl, HII->get(IdxOpc))
.add(MI.getOperand(0))
.add(MachineOperand(ExtR))
@@ -1626,11 +1722,13 @@ bool HCE::replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI,
Diff -= D;
#ifndef NDEBUG
// Make sure the output is within allowable range for uses.
+ // "Diff" is a difference in the "opposite direction", i.e. Ext - DefV,
+ // not DefV - Ext, as the getOffsetRange would calculate.
OffsetRange Uses = getOffsetRange(MI.getOperand(0));
- if (!Uses.contains(Diff))
- dbgs() << "Diff: " << Diff << " out of range " << Uses
+ if (!Uses.contains(-Diff))
+ dbgs() << "Diff: " << -Diff << " out of range " << Uses
<< " for " << MI;
- assert(Uses.contains(Diff));
+ assert(Uses.contains(-Diff));
#endif
MBB.erase(MI);
return true;
@@ -1726,8 +1824,8 @@ bool HCE::replaceInstr(unsigned Idx, Register ExtR, const ExtenderInit &ExtI) {
ExtValue EV(ED);
int32_t Diff = EV.Offset - DefV.Offset;
const MachineInstr &MI = *ED.UseMI;
- DEBUG(dbgs() << __func__ << " Idx:" << Idx << " ExtR:"
- << PrintRegister(ExtR, *HRI) << " Diff:" << Diff << '\n');
+ LLVM_DEBUG(dbgs() << __func__ << " Idx:" << Idx << " ExtR:"
+ << PrintRegister(ExtR, *HRI) << " Diff:" << Diff << '\n');
// These two addressing modes must be converted into indexed forms
// regardless of what the initializer looks like.
@@ -1833,7 +1931,7 @@ const MachineOperand &HCE::getStoredValueOp(const MachineInstr &MI) const {
bool HCE::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
- DEBUG(MF.print(dbgs() << "Before " << getPassName() << '\n', nullptr));
+ LLVM_DEBUG(MF.print(dbgs() << "Before " << getPassName() << '\n', nullptr));
HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
@@ -1842,13 +1940,13 @@ bool HCE::runOnMachineFunction(MachineFunction &MF) {
AssignmentMap IMap;
collect(MF);
- std::sort(Extenders.begin(), Extenders.end(),
+ llvm::sort(Extenders.begin(), Extenders.end(),
[](const ExtDesc &A, const ExtDesc &B) {
return ExtValue(A) < ExtValue(B);
});
bool Changed = false;
- DEBUG(dbgs() << "Collected " << Extenders.size() << " extenders\n");
+ LLVM_DEBUG(dbgs() << "Collected " << Extenders.size() << " extenders\n");
for (unsigned I = 0, E = Extenders.size(); I != E; ) {
unsigned B = I;
const ExtRoot &T = Extenders[B].getOp();
@@ -1860,7 +1958,7 @@ bool HCE::runOnMachineFunction(MachineFunction &MF) {
Changed |= replaceExtenders(IMap);
}
- DEBUG({
+ LLVM_DEBUG({
if (Changed)
MF.print(dbgs() << "After " << getPassName() << '\n', nullptr);
else
diff --git a/lib/Target/Hexagon/HexagonConstPropagation.cpp b/lib/Target/Hexagon/HexagonConstPropagation.cpp
index 8ac96f3a4bfa..8f22a71dc1f3 100644
--- a/lib/Target/Hexagon/HexagonConstPropagation.cpp
+++ b/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -617,7 +617,7 @@ void MachineConstPropagator::CellMap::print(raw_ostream &os,
void MachineConstPropagator::visitPHI(const MachineInstr &PN) {
const MachineBasicBlock *MB = PN.getParent();
unsigned MBN = MB->getNumber();
- DEBUG(dbgs() << "Visiting FI(" << printMBBReference(*MB) << "): " << PN);
+ LLVM_DEBUG(dbgs() << "Visiting FI(" << printMBBReference(*MB) << "): " << PN);
const MachineOperand &MD = PN.getOperand(0);
Register DefR(MD);
@@ -642,8 +642,8 @@ Bottomize:
const MachineBasicBlock *PB = PN.getOperand(i+1).getMBB();
unsigned PBN = PB->getNumber();
if (!EdgeExec.count(CFGEdge(PBN, MBN))) {
- DEBUG(dbgs() << " edge " << printMBBReference(*PB) << "->"
- << printMBBReference(*MB) << " not executable\n");
+ LLVM_DEBUG(dbgs() << " edge " << printMBBReference(*PB) << "->"
+ << printMBBReference(*MB) << " not executable\n");
continue;
}
const MachineOperand &SO = PN.getOperand(i);
@@ -658,8 +658,9 @@ Bottomize:
LatticeCell SrcC;
bool Eval = MCE.evaluate(UseR, Cells.get(UseR.Reg), SrcC);
- DEBUG(dbgs() << " edge from " << printMBBReference(*PB) << ": "
- << printReg(UseR.Reg, &MCE.TRI, UseR.SubReg) << SrcC << '\n');
+ LLVM_DEBUG(dbgs() << " edge from " << printMBBReference(*PB) << ": "
+ << printReg(UseR.Reg, &MCE.TRI, UseR.SubReg) << SrcC
+ << '\n');
Changed |= Eval ? DefC.meet(SrcC)
: DefC.setBottom();
Cells.update(DefR.Reg, DefC);
@@ -671,11 +672,11 @@ Bottomize:
}
void MachineConstPropagator::visitNonBranch(const MachineInstr &MI) {
- DEBUG(dbgs() << "Visiting MI(" << printMBBReference(*MI.getParent())
- << "): " << MI);
+ LLVM_DEBUG(dbgs() << "Visiting MI(" << printMBBReference(*MI.getParent())
+ << "): " << MI);
CellMap Outputs;
bool Eval = MCE.evaluate(MI, Cells, Outputs);
- DEBUG({
+ LLVM_DEBUG({
if (Eval) {
dbgs() << " outputs:";
for (auto &I : Outputs)
@@ -713,7 +714,7 @@ void MachineConstPropagator::visitNonBranch(const MachineInstr &MI) {
}
}
-// \brief Starting at a given branch, visit remaining branches in the block.
+// Starting at a given branch, visit remaining branches in the block.
// Traverse over the subsequent branches for as long as the preceding one
// can fall through. Add all the possible targets to the flow work queue,
// including the potential fall-through to the layout-successor block.
@@ -728,8 +729,8 @@ void MachineConstPropagator::visitBranchesFrom(const MachineInstr &BrI) {
while (It != End) {
const MachineInstr &MI = *It;
InstrExec.insert(&MI);
- DEBUG(dbgs() << "Visiting " << (EvalOk ? "BR" : "br") << "("
- << printMBBReference(B) << "): " << MI);
+ LLVM_DEBUG(dbgs() << "Visiting " << (EvalOk ? "BR" : "br") << "("
+ << printMBBReference(B) << "): " << MI);
// Do not evaluate subsequent branches if the evaluation of any of the
// previous branches failed. Keep iterating over the branches only
// to mark them as executable.
@@ -763,23 +764,23 @@ void MachineConstPropagator::visitBranchesFrom(const MachineInstr &BrI) {
// last one set "FallsThru", then add an edge to the layout successor
// to the targets.
Targets.clear();
- DEBUG(dbgs() << " failed to evaluate a branch...adding all CFG "
- "successors\n");
+ LLVM_DEBUG(dbgs() << " failed to evaluate a branch...adding all CFG "
+ "successors\n");
for (const MachineBasicBlock *SB : B.successors())
Targets.insert(SB);
}
for (const MachineBasicBlock *TB : Targets) {
unsigned TBN = TB->getNumber();
- DEBUG(dbgs() << " pushing edge " << printMBBReference(B) << " -> "
- << printMBBReference(*TB) << "\n");
+ LLVM_DEBUG(dbgs() << " pushing edge " << printMBBReference(B) << " -> "
+ << printMBBReference(*TB) << "\n");
FlowQ.push(CFGEdge(MBN, TBN));
}
}
void MachineConstPropagator::visitUsesOf(unsigned Reg) {
- DEBUG(dbgs() << "Visiting uses of " << printReg(Reg, &MCE.TRI)
- << Cells.get(Reg) << '\n');
+ LLVM_DEBUG(dbgs() << "Visiting uses of " << printReg(Reg, &MCE.TRI)
+ << Cells.get(Reg) << '\n');
for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
// Do not process non-executable instructions. They can become exceutable
// later (via a flow-edge in the work queue). In such case, the instruc-
@@ -799,7 +800,7 @@ bool MachineConstPropagator::computeBlockSuccessors(const MachineBasicBlock *MB,
SetVector<const MachineBasicBlock*> &Targets) {
MachineBasicBlock::const_iterator FirstBr = MB->end();
for (const MachineInstr &MI : *MB) {
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
continue;
if (MI.isBranch()) {
FirstBr = MI.getIterator();
@@ -814,7 +815,7 @@ bool MachineConstPropagator::computeBlockSuccessors(const MachineBasicBlock *MB,
for (MachineBasicBlock::const_iterator I = FirstBr; I != End; ++I) {
const MachineInstr &MI = *I;
// Can there be debug instructions between branches?
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
continue;
if (!InstrExec.count(&MI))
continue;
@@ -870,10 +871,10 @@ void MachineConstPropagator::propagate(MachineFunction &MF) {
CFGEdge Edge = FlowQ.front();
FlowQ.pop();
- DEBUG(dbgs() << "Picked edge "
- << printMBBReference(*MF.getBlockNumbered(Edge.first)) << "->"
- << printMBBReference(*MF.getBlockNumbered(Edge.second))
- << '\n');
+ LLVM_DEBUG(
+ dbgs() << "Picked edge "
+ << printMBBReference(*MF.getBlockNumbered(Edge.first)) << "->"
+ << printMBBReference(*MF.getBlockNumbered(Edge.second)) << '\n');
if (Edge.first != EntryNum)
if (EdgeExec.count(Edge))
continue;
@@ -896,7 +897,7 @@ void MachineConstPropagator::propagate(MachineFunction &MF) {
// If the successor block just became executable, visit all instructions.
// To see if this is the first time we're visiting it, check the first
// non-debug instruction to see if it is executable.
- while (It != End && It->isDebugValue())
+ while (It != End && It->isDebugInstr())
++It;
assert(It == End || !It->isPHI());
// If this block has been visited, go on to the next one.
@@ -905,7 +906,7 @@ void MachineConstPropagator::propagate(MachineFunction &MF) {
// For now, scan all non-branch instructions. Branches require different
// processing.
while (It != End && !It->isBranch()) {
- if (!It->isDebugValue()) {
+ if (!It->isDebugInstr()) {
InstrExec.insert(&*It);
visitNonBranch(*It);
}
@@ -927,7 +928,7 @@ void MachineConstPropagator::propagate(MachineFunction &MF) {
}
} // while (FlowQ)
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Cells after propagation:\n";
Cells.print(dbgs(), MCE.TRI);
dbgs() << "Dead CFG edges:\n";
@@ -1042,7 +1043,7 @@ bool MachineConstPropagator::rewrite(MachineFunction &MF) {
// This is the constant propagation algorithm as described by Wegman-Zadeck.
// Most of the terminology comes from there.
bool MachineConstPropagator::run(MachineFunction &MF) {
- DEBUG(MF.print(dbgs() << "Starting MachineConstPropagator\n", nullptr));
+ LLVM_DEBUG(MF.print(dbgs() << "Starting MachineConstPropagator\n", nullptr));
MRI = &MF.getRegInfo();
@@ -1054,7 +1055,7 @@ bool MachineConstPropagator::run(MachineFunction &MF) {
propagate(MF);
bool Changed = rewrite(MF);
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "End of MachineConstPropagator (Changed=" << Changed << ")\n";
if (Changed)
MF.print(dbgs(), nullptr);
@@ -1880,10 +1881,7 @@ namespace {
public:
static char ID;
- HexagonConstPropagation() : MachineFunctionPass(ID) {
- PassRegistry &Registry = *PassRegistry::getPassRegistry();
- initializeHexagonConstPropagationPass(Registry);
- }
+ HexagonConstPropagation() : MachineFunctionPass(ID) {}
StringRef getPassName() const override {
return "Hexagon Constant Propagation";
@@ -1903,8 +1901,8 @@ namespace {
char HexagonConstPropagation::ID = 0;
-INITIALIZE_PASS(HexagonConstPropagation, "hcp", "Hexagon Constant Propagation",
- false, false)
+INITIALIZE_PASS(HexagonConstPropagation, "hexagon-constp",
+ "Hexagon Constant Propagation", false, false)
HexagonConstEvaluator::HexagonConstEvaluator(MachineFunction &Fn)
: MachineConstEvaluator(Fn),
@@ -2022,6 +2020,8 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI,
case Hexagon::A2_combineii: // combine(#s8Ext, #s8)
case Hexagon::A4_combineii: // combine(#s8, #u6Ext)
{
+ if (!MI.getOperand(1).isImm() || !MI.getOperand(2).isImm())
+ return false;
uint64_t Hi = MI.getOperand(1).getImm();
uint64_t Lo = MI.getOperand(2).getImm();
uint64_t Res = (Hi << 32) | (Lo & 0xFFFFFFFF);
@@ -2631,6 +2631,8 @@ bool HexagonConstEvaluator::evaluateHexLogical(const MachineInstr &MI,
Eval = evaluateANDrr(R1, Register(Src2), Inputs, RC);
break;
case Hexagon::A2_andir: {
+ if (!Src2.isImm())
+ return false;
APInt A(32, Src2.getImm(), true);
Eval = evaluateANDri(R1, A, Inputs, RC);
break;
@@ -2640,6 +2642,8 @@ bool HexagonConstEvaluator::evaluateHexLogical(const MachineInstr &MI,
Eval = evaluateORrr(R1, Register(Src2), Inputs, RC);
break;
case Hexagon::A2_orir: {
+ if (!Src2.isImm())
+ return false;
APInt A(32, Src2.getImm(), true);
Eval = evaluateORri(R1, A, Inputs, RC);
break;
@@ -2775,7 +2779,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI,
AllDefs = false;
// Some diagnostics.
- // DEBUG({...}) gets confused with all this code as an argument.
+ // LLVM_DEBUG({...}) gets confused with all this code as an argument.
#ifndef NDEBUG
bool Debugging = DebugFlag && isCurrentDebugType(DEBUG_TYPE);
if (Debugging) {
@@ -2920,7 +2924,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI,
ChangedNum++;
}
- DEBUG({
+ LLVM_DEBUG({
if (!NewInstrs.empty()) {
MachineFunction &MF = *MI.getParent()->getParent();
dbgs() << "In function: " << MF.getName() << "\n";
@@ -3087,7 +3091,7 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI,
MO.setIsKill(false);
}
- DEBUG({
+ LLVM_DEBUG({
if (NewMI) {
dbgs() << "Rewrite: for " << MI;
if (NewMI != &MI)
@@ -3127,7 +3131,7 @@ bool HexagonConstEvaluator::rewriteHexBranch(MachineInstr &BrI,
if (BrI.getOpcode() == Hexagon::J2_jump)
return false;
- DEBUG(dbgs() << "Rewrite(" << printMBBReference(B) << "):" << BrI);
+ LLVM_DEBUG(dbgs() << "Rewrite(" << printMBBReference(B) << "):" << BrI);
bool Rewritten = false;
if (NumTargets > 0) {
assert(!FallsThru && "This should have been checked before");
diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index 087a77203fcb..fccde96d8a32 100644
--- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -300,7 +300,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr &I1,
// * reads I2's def reg
// * or has unmodelled side effects
// we can't move I2 across it.
- if (I->isDebugValue())
+ if (I->isDebugInstr())
continue;
if (isUnsafeToMoveAcross(*I, I2UseReg, I2DestReg, TRI)) {
@@ -358,7 +358,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr &I1,
// to remove the implicit killed %d4 operand. For now, we are
// conservative and disallow the move.
// we can't move I1 across it.
- if (MI.isDebugValue()) {
+ if (MI.isDebugInstr()) {
if (MI.readsRegister(I1DestReg, TRI)) // Move this instruction after I2.
DbgMItoMove.push_back(&MI);
continue;
@@ -396,7 +396,7 @@ void
HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) {
DenseMap<unsigned, MachineInstr *> LastDef;
for (MachineInstr &MI : BB) {
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
continue;
// Mark TFRs that feed a potential new value store as such.
@@ -423,7 +423,7 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) {
MachineBasicBlock::iterator It(DefInst);
unsigned NumInstsToDef = 0;
while (&*It != &MI) {
- if (!It->isDebugValue())
+ if (!It->isDebugInstr())
++NumInstsToDef;
++It;
}
@@ -489,7 +489,7 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) {
MI != End;) {
MachineInstr &I1 = *MI++;
- if (I1.isDebugValue())
+ if (I1.isDebugInstr())
continue;
// Don't combine a TFR whose user could be newified (instructions that
@@ -526,7 +526,7 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr &I1,
bool &DoInsertAtI1,
bool AllowC64) {
MachineBasicBlock::iterator I2 = std::next(MachineBasicBlock::iterator(I1));
- while (I2 != I1.getParent()->end() && I2->isDebugValue())
+ while (I2 != I1.getParent()->end() && I2->isDebugInstr())
++I2;
unsigned I1DestReg = I1.getOperand(0).getReg();
@@ -649,7 +649,7 @@ void HexagonCopyToCombine::emitConst64(MachineBasicBlock::iterator &InsertPt,
unsigned DoubleDestReg,
MachineOperand &HiOperand,
MachineOperand &LoOperand) {
- DEBUG(dbgs() << "Found a CONST64\n");
+ LLVM_DEBUG(dbgs() << "Found a CONST64\n");
DebugLoc DL = InsertPt->getDebugLoc();
MachineBasicBlock *BB = InsertPt->getParent();
diff --git a/lib/Target/Hexagon/HexagonDepArch.td b/lib/Target/Hexagon/HexagonDepArch.td
index 87dcd966f2ed..3594379aa841 100644
--- a/lib/Target/Hexagon/HexagonDepArch.td
+++ b/lib/Target/Hexagon/HexagonDepArch.td
@@ -11,14 +11,14 @@
def ArchV65: SubtargetFeature<"v65", "HexagonArchVersion", "Hexagon::ArchEnum::V65", "Enable Hexagon V65 architecture">;
-def HasV65T : Predicate<"HST->hasV65TOps()">, AssemblerPredicate<"ArchV65">;
+def HasV65 : Predicate<"HST->hasV65Ops()">, AssemblerPredicate<"ArchV65">;
def ArchV62: SubtargetFeature<"v62", "HexagonArchVersion", "Hexagon::ArchEnum::V62", "Enable Hexagon V62 architecture">;
-def HasV62T : Predicate<"HST->hasV62TOps()">, AssemblerPredicate<"ArchV62">;
+def HasV62 : Predicate<"HST->hasV62Ops()">, AssemblerPredicate<"ArchV62">;
def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "Hexagon::ArchEnum::V60", "Enable Hexagon V60 architecture">;
-def HasV60T : Predicate<"HST->hasV60TOps()">, AssemblerPredicate<"ArchV60">;
+def HasV60 : Predicate<"HST->hasV60Ops()">, AssemblerPredicate<"ArchV60">;
def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "Hexagon::ArchEnum::V55", "Enable Hexagon V55 architecture">;
-def HasV55T : Predicate<"HST->hasV55TOps()">, AssemblerPredicate<"ArchV55">;
+def HasV55 : Predicate<"HST->hasV55Ops()">, AssemblerPredicate<"ArchV55">;
def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "Hexagon::ArchEnum::V4", "Enable Hexagon V4 architecture">;
-def HasV4T : Predicate<"HST->hasV4TOps()">, AssemblerPredicate<"ArchV4">;
+def HasV4 : Predicate<"HST->hasV4Ops()">, AssemblerPredicate<"ArchV4">;
def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "Hexagon::ArchEnum::V5", "Enable Hexagon V5 architecture">;
-def HasV5T : Predicate<"HST->hasV5TOps()">, AssemblerPredicate<"ArchV5">;
+def HasV5 : Predicate<"HST->hasV5Ops()">, AssemblerPredicate<"ArchV5">;
diff --git a/lib/Target/Hexagon/HexagonDepDecoders.h b/lib/Target/Hexagon/HexagonDepDecoders.h
deleted file mode 100644
index 020362a95909..000000000000
--- a/lib/Target/Hexagon/HexagonDepDecoders.h
+++ /dev/null
@@ -1,13 +0,0 @@
-//===- HexagonDepDecoders.h -----------------------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Automatically generated file, please consult code owner before editing.
-//===----------------------------------------------------------------------===//
-
-
-
diff --git a/lib/Target/Hexagon/HexagonDepIICScalar.td b/lib/Target/Hexagon/HexagonDepIICScalar.td
index 083ec7753e04..931504b56ccb 100644
--- a/lib/Target/Hexagon/HexagonDepIICScalar.td
+++ b/lib/Target/Hexagon/HexagonDepIICScalar.td
@@ -10,21 +10,17 @@
//===----------------------------------------------------------------------===//
-def tc_0077f68c : InstrItinClass;
def tc_00afc57e : InstrItinClass;
def tc_00e7c26e : InstrItinClass;
def tc_03220ffa : InstrItinClass;
def tc_038a1342 : InstrItinClass;
def tc_04c9decc : InstrItinClass;
def tc_05b6c987 : InstrItinClass;
-def tc_0a2b8c7c : InstrItinClass;
def tc_0cd51c76 : InstrItinClass;
def tc_0dc560de : InstrItinClass;
def tc_0fc1ae07 : InstrItinClass;
def tc_10b97e27 : InstrItinClass;
-def tc_128f96e3 : InstrItinClass;
def tc_1372bca1 : InstrItinClass;
-def tc_1432937d : InstrItinClass;
def tc_14cd4cfa : InstrItinClass;
def tc_15411484 : InstrItinClass;
def tc_16d0d8d5 : InstrItinClass;
@@ -32,18 +28,14 @@ def tc_181af5d0 : InstrItinClass;
def tc_1853ea6d : InstrItinClass;
def tc_1b82a277 : InstrItinClass;
def tc_1b9c9ee5 : InstrItinClass;
-def tc_1c0005f9 : InstrItinClass;
def tc_1d5a38a8 : InstrItinClass;
def tc_1e856f58 : InstrItinClass;
-def tc_20280784 : InstrItinClass;
def tc_234a11a5 : InstrItinClass;
def tc_238d91d2 : InstrItinClass;
def tc_29175780 : InstrItinClass;
-def tc_29641329 : InstrItinClass;
def tc_2a160009 : InstrItinClass;
def tc_2b2f4060 : InstrItinClass;
def tc_2b6f77c6 : InstrItinClass;
-def tc_2e00db30 : InstrItinClass;
def tc_2f185f5c : InstrItinClass;
def tc_2fc0c436 : InstrItinClass;
def tc_351fed2d : InstrItinClass;
@@ -71,22 +63,19 @@ def tc_51b866be : InstrItinClass;
def tc_523fcf30 : InstrItinClass;
def tc_5274e61a : InstrItinClass;
def tc_52d7bbea : InstrItinClass;
-def tc_53173427 : InstrItinClass;
def tc_53bc8a6a : InstrItinClass;
def tc_53bdb2f6 : InstrItinClass;
def tc_540fdfbc : InstrItinClass;
def tc_55050d58 : InstrItinClass;
-def tc_56d25411 : InstrItinClass;
def tc_57288781 : InstrItinClass;
def tc_594ab548 : InstrItinClass;
+def tc_59a01ead : InstrItinClass;
def tc_5acef64a : InstrItinClass;
def tc_5ba5997d : InstrItinClass;
def tc_5eb851fc : InstrItinClass;
def tc_5f6847a1 : InstrItinClass;
def tc_60571023 : InstrItinClass;
def tc_609d2efe : InstrItinClass;
-def tc_60d76817 : InstrItinClass;
-def tc_60f5738d : InstrItinClass;
def tc_63fe3df7 : InstrItinClass;
def tc_66888ded : InstrItinClass;
def tc_6792d5ff : InstrItinClass;
@@ -96,6 +85,7 @@ def tc_6aa5711a : InstrItinClass;
def tc_6ac37025 : InstrItinClass;
def tc_6ebb4a12 : InstrItinClass;
def tc_6efc556e : InstrItinClass;
+def tc_6fa4db47 : InstrItinClass;
def tc_73043bf4 : InstrItinClass;
def tc_746baa8e : InstrItinClass;
def tc_74e47fd9 : InstrItinClass;
@@ -103,18 +93,16 @@ def tc_7934b9df : InstrItinClass;
def tc_7a830544 : InstrItinClass;
def tc_7f881c76 : InstrItinClass;
def tc_84df2cd3 : InstrItinClass;
-def tc_85523bcb : InstrItinClass;
def tc_855b0b61 : InstrItinClass;
def tc_87735c3b : InstrItinClass;
-def tc_88fa1a78 : InstrItinClass;
def tc_897d1a9d : InstrItinClass;
def tc_8b15472a : InstrItinClass;
-def tc_8bb285ec : InstrItinClass;
def tc_8fd5f294 : InstrItinClass;
def tc_8fe6b782 : InstrItinClass;
def tc_90f3e30c : InstrItinClass;
def tc_976ddc4f : InstrItinClass;
def tc_97743097 : InstrItinClass;
+def tc_994333cd : InstrItinClass;
def tc_999d32db : InstrItinClass;
def tc_99be14ca : InstrItinClass;
def tc_9c00ce8d : InstrItinClass;
@@ -133,7 +121,6 @@ def tc_adb14c66 : InstrItinClass;
def tc_b13761ae : InstrItinClass;
def tc_b166348b : InstrItinClass;
def tc_b44c6e2a : InstrItinClass;
-def tc_b5a33b22 : InstrItinClass;
def tc_b77c481f : InstrItinClass;
def tc_b7dd427e : InstrItinClass;
def tc_b9488031 : InstrItinClass;
@@ -141,7 +128,6 @@ def tc_b9c0b731 : InstrItinClass;
def tc_b9c4623f : InstrItinClass;
def tc_bad2bcaf : InstrItinClass;
def tc_bcc96cee : InstrItinClass;
-def tc_bd90564c : InstrItinClass;
def tc_bde7aaf4 : InstrItinClass;
def tc_be706f30 : InstrItinClass;
def tc_c2f7d806 : InstrItinClass;
@@ -166,24 +152,20 @@ def tc_d9f95eef : InstrItinClass;
def tc_daa058fa : InstrItinClass;
def tc_dbdffe3d : InstrItinClass;
def tc_e0739b8c : InstrItinClass;
-def tc_e1e0a2dc : InstrItinClass;
def tc_e1e99bfa : InstrItinClass;
def tc_e216a5db : InstrItinClass;
def tc_e421e012 : InstrItinClass;
-def tc_e6b38e01 : InstrItinClass;
def tc_e7624c08 : InstrItinClass;
def tc_e7d02c66 : InstrItinClass;
def tc_e913dc32 : InstrItinClass;
def tc_e9c822f7 : InstrItinClass;
def tc_e9fae2d6 : InstrItinClass;
-def tc_ef20db1c : InstrItinClass;
def tc_ef52ed71 : InstrItinClass;
def tc_ef84f62f : InstrItinClass;
def tc_f2704b9a : InstrItinClass;
def tc_f3eaa14b : InstrItinClass;
def tc_f47d212f : InstrItinClass;
def tc_f49e76f4 : InstrItinClass;
-def tc_f4f43fb5 : InstrItinClass;
def tc_f7dd9c9f : InstrItinClass;
def tc_f86c328a : InstrItinClass;
def tc_f8eeed7a : InstrItinClass;
@@ -192,21 +174,17 @@ def tc_ff9ee76e : InstrItinClass;
class DepScalarItinV4 {
list<InstrItinData> DepScalarItinV4_list = [
- InstrItinData <tc_0077f68c, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_00afc57e, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_00e7c26e, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_03220ffa, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_038a1342, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_04c9decc, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_05b6c987, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_0a2b8c7c, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_0cd51c76, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_0dc560de, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_0fc1ae07, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_10b97e27, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_128f96e3, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_1372bca1, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_1432937d, [InstrStage<1, [SLOT2]>]>,
InstrItinData <tc_14cd4cfa, [InstrStage<1, [SLOT2]>]>,
InstrItinData <tc_15411484, [InstrStage<1, [SLOT2]>]>,
InstrItinData <tc_16d0d8d5, [InstrStage<1, [SLOT2, SLOT3]>]>,
@@ -214,18 +192,14 @@ class DepScalarItinV4 {
InstrItinData <tc_1853ea6d, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_1b82a277, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_1b9c9ee5, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_1c0005f9, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_1d5a38a8, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_1e856f58, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_20280784, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_234a11a5, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_238d91d2, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_29175780, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_29641329, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_2a160009, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_2b2f4060, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
InstrItinData <tc_2b6f77c6, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_2e00db30, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_2f185f5c, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_2fc0c436, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_351fed2d, [InstrStage<1, [SLOT2, SLOT3]>]>,
@@ -253,22 +227,19 @@ class DepScalarItinV4 {
InstrItinData <tc_523fcf30, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_5274e61a, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_52d7bbea, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_53173427, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_53bc8a6a, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_53bdb2f6, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_540fdfbc, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_55050d58, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_56d25411, [InstrStage<1, [SLOT2]>]>,
InstrItinData <tc_57288781, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_594ab548, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_59a01ead, [InstrStage<1, [SLOT2]>]>,
InstrItinData <tc_5acef64a, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_5ba5997d, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
InstrItinData <tc_5eb851fc, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_5f6847a1, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
InstrItinData <tc_60571023, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_609d2efe, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_60d76817, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_60f5738d, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_63fe3df7, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_66888ded, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_6792d5ff, [InstrStage<1, [SLOT2, SLOT3]>]>,
@@ -278,6 +249,7 @@ class DepScalarItinV4 {
InstrItinData <tc_6ac37025, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_6ebb4a12, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
InstrItinData <tc_6efc556e, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_6fa4db47, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_73043bf4, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_746baa8e, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_74e47fd9, [InstrStage<1, [SLOT0, SLOT1]>]>,
@@ -285,18 +257,16 @@ class DepScalarItinV4 {
InstrItinData <tc_7a830544, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_7f881c76, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_84df2cd3, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_85523bcb, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_855b0b61, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_87735c3b, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_88fa1a78, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_897d1a9d, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_8b15472a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_8bb285ec, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_8fd5f294, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_8fe6b782, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
InstrItinData <tc_90f3e30c, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_976ddc4f, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_97743097, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_994333cd, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_999d32db, [InstrStage<1, [SLOT2]>]>,
InstrItinData <tc_99be14ca, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_9c00ce8d, [InstrStage<1, [SLOT2, SLOT3]>]>,
@@ -315,7 +285,6 @@ class DepScalarItinV4 {
InstrItinData <tc_b13761ae, [InstrStage<1, [SLOT2]>]>,
InstrItinData <tc_b166348b, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_b44c6e2a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_b5a33b22, [InstrStage<1, [SLOT2]>]>,
InstrItinData <tc_b77c481f, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_b7dd427e, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_b9488031, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
@@ -323,7 +292,6 @@ class DepScalarItinV4 {
InstrItinData <tc_b9c4623f, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_bad2bcaf, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_bcc96cee, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_bd90564c, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_bde7aaf4, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_be706f30, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_c2f7d806, [InstrStage<1, [SLOT2, SLOT3]>]>,
@@ -348,24 +316,20 @@ class DepScalarItinV4 {
InstrItinData <tc_daa058fa, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_dbdffe3d, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_e0739b8c, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_e1e0a2dc, [InstrStage<1, [SLOT2]>]>,
InstrItinData <tc_e1e99bfa, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_e216a5db, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_e421e012, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e6b38e01, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_e7624c08, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_e7d02c66, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_e913dc32, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_e9c822f7, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_e9fae2d6, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_ef20db1c, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_ef52ed71, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_ef84f62f, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_f2704b9a, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_f3eaa14b, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_f47d212f, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_f49e76f4, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f4f43fb5, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_f7dd9c9f, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_f86c328a, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_f8eeed7a, [InstrStage<1, [SLOT2, SLOT3]>]>,
@@ -375,21 +339,17 @@ class DepScalarItinV4 {
class DepScalarItinV5 {
list<InstrItinData> DepScalarItinV5_list = [
- InstrItinData <tc_0077f68c, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_00afc57e, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_00e7c26e, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_03220ffa, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_038a1342, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_04c9decc, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_05b6c987, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_0a2b8c7c, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_0cd51c76, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_0dc560de, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_0fc1ae07, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_10b97e27, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_128f96e3, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_1372bca1, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_1432937d, [InstrStage<1, [SLOT2]>]>,
InstrItinData <tc_14cd4cfa, [InstrStage<1, [SLOT2]>]>,
InstrItinData <tc_15411484, [InstrStage<1, [SLOT2]>]>,
InstrItinData <tc_16d0d8d5, [InstrStage<1, [SLOT2, SLOT3]>]>,
@@ -397,18 +357,14 @@ class DepScalarItinV5 {
InstrItinData <tc_1853ea6d, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_1b82a277, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_1b9c9ee5, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_1c0005f9, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_1d5a38a8, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_1e856f58, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_20280784, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_234a11a5, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_238d91d2, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_29175780, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_29641329, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_2a160009, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_2b2f4060, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
InstrItinData <tc_2b6f77c6, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_2e00db30, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_2f185f5c, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_2fc0c436, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_351fed2d, [InstrStage<1, [SLOT2, SLOT3]>]>,
@@ -436,22 +392,19 @@ class DepScalarItinV5 {
InstrItinData <tc_523fcf30, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_5274e61a, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_52d7bbea, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData <tc_53173427, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_53bc8a6a, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_53bdb2f6, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_540fdfbc, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_55050d58, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_56d25411, [InstrStage<1, [SLOT2]>]>,
InstrItinData <tc_57288781, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_594ab548, [InstrStage<1, [SLOT0]>]>,
+ InstrItinData <tc_59a01ead, [InstrStage<1, [SLOT2]>]>,
InstrItinData <tc_5acef64a, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_5ba5997d, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
InstrItinData <tc_5eb851fc, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_5f6847a1, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
InstrItinData <tc_60571023, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_609d2efe, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_60d76817, [InstrStage<1, [SLOT3]>]>,
- InstrItinData <tc_60f5738d, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_63fe3df7, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_66888ded, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_6792d5ff, [InstrStage<1, [SLOT2, SLOT3]>]>,
@@ -461,6 +414,7 @@ class DepScalarItinV5 {
InstrItinData <tc_6ac37025, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_6ebb4a12, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
InstrItinData <tc_6efc556e, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData <tc_6fa4db47, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_73043bf4, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_746baa8e, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_74e47fd9, [InstrStage<1, [SLOT0, SLOT1]>]>,
@@ -468,18 +422,16 @@ class DepScalarItinV5 {
InstrItinData <tc_7a830544, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_7f881c76, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_84df2cd3, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_85523bcb, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_855b0b61, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_87735c3b, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_88fa1a78, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_897d1a9d, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_8b15472a, [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData <tc_8bb285ec, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_8fd5f294, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_8fe6b782, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
InstrItinData <tc_90f3e30c, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_976ddc4f, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_97743097, [InstrStage<1, [SLOT2]>]>,
+ InstrItinData <tc_994333cd, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_999d32db, [InstrStage<1, [SLOT2]>]>,
InstrItinData <tc_99be14ca, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_9c00ce8d, [InstrStage<1, [SLOT2, SLOT3]>]>,
@@ -498,7 +450,6 @@ class DepScalarItinV5 {
InstrItinData <tc_b13761ae, [InstrStage<1, [SLOT2]>]>,
InstrItinData <tc_b166348b, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_b44c6e2a, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_b5a33b22, [InstrStage<1, [SLOT2]>]>,
InstrItinData <tc_b77c481f, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_b7dd427e, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_b9488031, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
@@ -506,7 +457,6 @@ class DepScalarItinV5 {
InstrItinData <tc_b9c4623f, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_bad2bcaf, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_bcc96cee, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_bd90564c, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_bde7aaf4, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_be706f30, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_c2f7d806, [InstrStage<1, [SLOT2, SLOT3]>]>,
@@ -531,24 +481,20 @@ class DepScalarItinV5 {
InstrItinData <tc_daa058fa, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_dbdffe3d, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_e0739b8c, [InstrStage<1, [SLOT2]>]>,
- InstrItinData <tc_e1e0a2dc, [InstrStage<1, [SLOT2]>]>,
InstrItinData <tc_e1e99bfa, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_e216a5db, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_e421e012, [InstrStage<1, [SLOT0]>]>,
- InstrItinData <tc_e6b38e01, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_e7624c08, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_e7d02c66, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_e913dc32, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_e9c822f7, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_e9fae2d6, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_ef20db1c, [InstrStage<1, [SLOT3]>]>,
InstrItinData <tc_ef52ed71, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_ef84f62f, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_f2704b9a, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_f3eaa14b, [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData <tc_f47d212f, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_f49e76f4, [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData <tc_f4f43fb5, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_f7dd9c9f, [InstrStage<1, [SLOT0]>]>,
InstrItinData <tc_f86c328a, [InstrStage<1, [SLOT0, SLOT1]>]>,
InstrItinData <tc_f8eeed7a, [InstrStage<1, [SLOT2, SLOT3]>]>,
@@ -558,10 +504,6 @@ class DepScalarItinV5 {
class DepScalarItinV55 {
list<InstrItinData> DepScalarItinV55_list = [
- InstrItinData <tc_0077f68c, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2],
- [Hex_FWD]>,
-
InstrItinData <tc_00afc57e, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
@@ -586,10 +528,6 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0a2b8c7c, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_0cd51c76, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -606,18 +544,10 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_128f96e3, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_1372bca1, /*tc_3stall*/
[InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1432937d, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_14cd4cfa, /*tc_2early*/
[InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
@@ -646,10 +576,6 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1c0005f9, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_1d5a38a8, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -658,10 +584,6 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_20280784, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_234a11a5, /*tc_3x*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
@@ -674,10 +596,6 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_29641329, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_2a160009, /*tc_2early*/
[InstrStage<1, [SLOT0]>], [],
[]>,
@@ -690,10 +608,6 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2e00db30, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
-
InstrItinData <tc_2f185f5c, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
@@ -802,10 +716,6 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
- InstrItinData <tc_53173427, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_53bc8a6a, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -822,10 +732,6 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_56d25411, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_57288781, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
@@ -834,6 +740,10 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_59a01ead, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_5acef64a, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -858,14 +768,6 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_60d76817, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [],
- []>,
-
- InstrItinData <tc_60f5738d, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1],
- [Hex_FWD]>,
-
InstrItinData <tc_63fe3df7, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -902,6 +804,10 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
+ InstrItinData <tc_6fa4db47, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_73043bf4, /*tc_2early*/
[InstrStage<1, [SLOT3]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
@@ -930,10 +836,6 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_85523bcb, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_855b0b61, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -942,10 +844,6 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_88fa1a78, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_897d1a9d, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -954,10 +852,6 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8bb285ec, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
-
InstrItinData <tc_8fd5f294, /*tc_3x*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -978,6 +872,10 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_994333cd, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_999d32db, /*tc_2early*/
[InstrStage<1, [SLOT2]>], [1],
[Hex_FWD]>,
@@ -1050,10 +948,6 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b5a33b22, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_b77c481f, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1082,10 +976,6 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bd90564c, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_bde7aaf4, /*tc_3stall*/
[InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1182,10 +1072,6 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e1e0a2dc, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [],
- []>,
-
InstrItinData <tc_e1e99bfa, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
@@ -1198,10 +1084,6 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e6b38e01, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_e7624c08, /*tc_3stall*/
[InstrStage<1, [SLOT0]>], [3],
[Hex_FWD]>,
@@ -1222,10 +1104,6 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef20db1c, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_ef52ed71, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1250,10 +1128,6 @@ class DepScalarItinV55 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f4f43fb5, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_f7dd9c9f, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1278,10 +1152,6 @@ class DepScalarItinV55 {
class DepScalarItinV60 {
list<InstrItinData> DepScalarItinV60_list = [
- InstrItinData <tc_0077f68c, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2],
- [Hex_FWD]>,
-
InstrItinData <tc_00afc57e, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
@@ -1306,10 +1176,6 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0a2b8c7c, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_0cd51c76, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1326,18 +1192,10 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_128f96e3, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [1, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_1372bca1, /*tc_3stall*/
[InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1432937d, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_14cd4cfa, /*tc_2early*/
[InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
@@ -1366,10 +1224,6 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1c0005f9, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_1d5a38a8, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1378,10 +1232,6 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_20280784, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_234a11a5, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
@@ -1394,10 +1244,6 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_29641329, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_2a160009, /*tc_2early*/
[InstrStage<1, [SLOT0]>], [],
[]>,
@@ -1410,10 +1256,6 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2e00db30, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
-
InstrItinData <tc_2f185f5c, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
@@ -1522,10 +1364,6 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
- InstrItinData <tc_53173427, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [1, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_53bc8a6a, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1542,10 +1380,6 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_56d25411, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_57288781, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
@@ -1554,6 +1388,10 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_59a01ead, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_5acef64a, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1578,14 +1416,6 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_60d76817, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [],
- []>,
-
- InstrItinData <tc_60f5738d, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [1],
- [Hex_FWD]>,
-
InstrItinData <tc_63fe3df7, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1622,6 +1452,10 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
+ InstrItinData <tc_6fa4db47, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_73043bf4, /*tc_2early*/
[InstrStage<1, [SLOT3]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
@@ -1650,10 +1484,6 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_85523bcb, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_855b0b61, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1662,10 +1492,6 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_88fa1a78, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_897d1a9d, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1674,10 +1500,6 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8bb285ec, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
-
InstrItinData <tc_8fd5f294, /*tc_3x*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1698,6 +1520,10 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_994333cd, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_999d32db, /*tc_2early*/
[InstrStage<1, [SLOT2]>], [1],
[Hex_FWD]>,
@@ -1770,10 +1596,6 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b5a33b22, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_b77c481f, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1802,10 +1624,6 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bd90564c, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_bde7aaf4, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1902,10 +1720,6 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e1e0a2dc, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [],
- []>,
-
InstrItinData <tc_e1e99bfa, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
@@ -1918,10 +1732,6 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e6b38e01, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_e7624c08, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3],
[Hex_FWD]>,
@@ -1942,10 +1752,6 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef20db1c, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_ef52ed71, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1970,10 +1776,6 @@ class DepScalarItinV60 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f4f43fb5, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_f7dd9c9f, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1996,765 +1798,8 @@ class DepScalarItinV60 {
];
}
-class DepScalarItinV60se {
- list<InstrItinData> DepScalarItinV60se_list = [
- InstrItinData <tc_0077f68c, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2],
- [Hex_FWD]>,
-
- InstrItinData <tc_00afc57e, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_00e7c26e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
-
- InstrItinData <tc_03220ffa, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_038a1342, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_04c9decc, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_05b6c987, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_0a2b8c7c, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_0cd51c76, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_0dc560de, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_0fc1ae07, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [2],
- [Hex_FWD]>,
-
- InstrItinData <tc_10b97e27, /*tc_2early*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_ST]>], [2, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_128f96e3, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [1, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_1372bca1, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_1432937d, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_14cd4cfa, /*tc_2early*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_ST]>], [2],
- [Hex_FWD]>,
-
- InstrItinData <tc_15411484, /*tc_2early*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_ST]>], [1],
- [Hex_FWD]>,
-
- InstrItinData <tc_16d0d8d5, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_181af5d0, /*tc_2early*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_ST]>], [3, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_1853ea6d, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_1b82a277, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
- [Hex_FWD]>,
-
- InstrItinData <tc_1b9c9ee5, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_1c0005f9, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_1d5a38a8, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_1e856f58, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_20280784, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_234a11a5, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_238d91d2, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_29175780, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_29641329, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_2a160009, /*tc_2early*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [],
- []>,
-
- InstrItinData <tc_2b2f4060, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_2b6f77c6, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_2e00db30, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
-
- InstrItinData <tc_2f185f5c, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ST]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_2fc0c436, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_351fed2d, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_3669266a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ST]>], [2],
- [Hex_FWD]>,
-
- InstrItinData <tc_367f7f3d, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
-
- InstrItinData <tc_36c68ad1, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [],
- []>,
-
- InstrItinData <tc_395dc00f, /*tc_newvjump*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 3, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_3bc2c5d3, /*tc_newvjump*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [2],
- [Hex_FWD]>,
-
- InstrItinData <tc_3cb8ea06, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ST]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_3d04548d, /*tc_newvjump*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_3da80ba5, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
-
- InstrItinData <tc_3e07fb90, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_41d5298e, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_4403ca65, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_44126683, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_452f85af, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
- [Hex_FWD]>,
-
- InstrItinData <tc_481e5e5c, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_49eb22c8, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ST]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_4ca572d4, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [],
- []>,
-
- InstrItinData <tc_4d9914c9, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_4d99bca9, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_4f7cd700, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [2, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_513bef45, /*tc_newvjump*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_51b866be, /*tc_newvjump*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_523fcf30, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_5274e61a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_52d7bbea, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ST]>], [],
- []>,
-
- InstrItinData <tc_53173427, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [1, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_53bc8a6a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_53bdb2f6, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_540fdfbc, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_55050d58, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_56d25411, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_57288781, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_594ab548, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_5acef64a, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_5ba5997d, /*tc_2*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_5eb851fc, /*tc_newvjump*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [2, 3, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_5f6847a1, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_60571023, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_609d2efe, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_60d76817, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [],
- []>,
-
- InstrItinData <tc_60f5738d, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [1],
- [Hex_FWD]>,
-
- InstrItinData <tc_63fe3df7, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_66888ded, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_6792d5ff, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_681a2300, /*tc_3stall*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_ST]>], [2],
- [Hex_FWD]>,
-
- InstrItinData <tc_68cb12ce, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_6aa5711a, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_6ac37025, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_6ebb4a12, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_6efc556e, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
- []>,
-
- InstrItinData <tc_73043bf4, /*tc_2early*/
- [InstrStage<1, [SLOT3], 0>,
- InstrStage<1, [CVI_ST]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_746baa8e, /*tc_newvjump*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_74e47fd9, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_7934b9df, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_7a830544, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_7f881c76, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_84df2cd3, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_85523bcb, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_855b0b61, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ST]>], [1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_87735c3b, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_88fa1a78, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_897d1a9d, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_8b15472a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_8bb285ec, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
-
- InstrItinData <tc_8fd5f294, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_8fe6b782, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_90f3e30c, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_976ddc4f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_97743097, /*tc_2early*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_ST]>], [2, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_999d32db, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1],
- [Hex_FWD]>,
-
- InstrItinData <tc_99be14ca, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ST]>], [1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_9c00ce8d, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_9c98e8af, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_9d5941c7, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_9ef61e5c, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_9faf76ae, /*tc_2early*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_ST]>], [1],
- [Hex_FWD]>,
-
- InstrItinData <tc_9fdb5406, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_a21dc435, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_a27582fa, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ST]>], [2],
- [Hex_FWD]>,
-
- InstrItinData <tc_a46f0df5, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ST]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_a788683e, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_a8acdac0, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_a904d137, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_adb14c66, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_b13761ae, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [],
- []>,
-
- InstrItinData <tc_b166348b, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_b44c6e2a, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_b5a33b22, /*tc_2early*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_ST]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_b77c481f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_b7dd427e, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_b9488031, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_b9c0b731, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_b9c4623f, /*tc_2*/
- [InstrStage<1, [SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_bad2bcaf, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_bcc96cee, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_bd90564c, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_bde7aaf4, /*tc_newvjump*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_be706f30, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_c2f7d806, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_c5e2426d, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_c6aa82f7, /*tc_2early*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_c6ce9b3f, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_c6ebf8dd, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_c74f796f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_c82dc1ff, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [1],
- [Hex_FWD]>,
-
- InstrItinData <tc_caaebcba, /*tc_3stall*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_cd7374a0, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_cde8b071, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_cf47a43f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_cf59f215, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_d088982c, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_d1090e34, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_d24b2d85, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_d580173f, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_d6bf0472, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_d9709180, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_d9f95eef, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_daa058fa, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [1, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_dbdffe3d, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_e0739b8c, /*tc_2early*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_ST]>], [2, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_e1e0a2dc, /*tc_3stall*/
- [InstrStage<1, [SLOT2], 0>,
- InstrStage<1, [CVI_ST]>], [],
- []>,
-
- InstrItinData <tc_e1e99bfa, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ST]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_e216a5db, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_e421e012, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_e6b38e01, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_e7624c08, /*tc_newvjump*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [3],
- [Hex_FWD]>,
-
- InstrItinData <tc_e7d02c66, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_e913dc32, /*tc_3x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_e9c822f7, /*tc_1*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3],
- [Hex_FWD]>,
-
- InstrItinData <tc_e9fae2d6, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3], 0>,
- InstrStage<1, [CVI_ST]>], [2, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_ef20db1c, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_ef52ed71, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_ef84f62f, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_f2704b9a, /*tc_2early*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_f3eaa14b, /*tc_4x*/
- [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
- [Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_f47d212f, /*tc_ld*/
- [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_f49e76f4, /*tc_2*/
- [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_f4f43fb5, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_f7dd9c9f, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2, 3],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_f86c328a, /*tc_st*/
- [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_f8eeed7a, /*tc_1*/
- [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
-
- InstrItinData <tc_fcab4871, /*tc_newvjump*/
- [InstrStage<1, [SLOT0], 0>,
- InstrStage<1, [CVI_ST]>], [],
- []>,
-
- InstrItinData <tc_ff9ee76e, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2, 3],
- [Hex_FWD, Hex_FWD]>
- ];
-}
-
class DepScalarItinV62 {
list<InstrItinData> DepScalarItinV62_list = [
- InstrItinData <tc_0077f68c, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2],
- [Hex_FWD]>,
-
InstrItinData <tc_00afc57e, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
@@ -2779,10 +1824,6 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0a2b8c7c, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_0cd51c76, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -2799,18 +1840,10 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_128f96e3, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [1, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_1372bca1, /*tc_3stall*/
[InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1432937d, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [1, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_14cd4cfa, /*tc_2early*/
[InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
@@ -2839,10 +1872,6 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1c0005f9, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_1d5a38a8, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -2851,10 +1880,6 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_20280784, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_234a11a5, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
@@ -2867,10 +1892,6 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_29641329, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_2a160009, /*tc_2early*/
[InstrStage<1, [SLOT0]>], [],
[]>,
@@ -2883,10 +1904,6 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2e00db30, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
-
InstrItinData <tc_2f185f5c, /*tc_3*/
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
@@ -2995,10 +2012,6 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
- InstrItinData <tc_53173427, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [1, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_53bc8a6a, /*tc_2early*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3015,10 +2028,6 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_56d25411, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_57288781, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
@@ -3027,6 +2036,10 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_59a01ead, /*tc_2early*/
+ [InstrStage<1, [SLOT2]>], [3, 2, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_5acef64a, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3051,14 +2064,6 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_60d76817, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [],
- []>,
-
- InstrItinData <tc_60f5738d, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [1],
- [Hex_FWD]>,
-
InstrItinData <tc_63fe3df7, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3095,6 +2100,10 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
+ InstrItinData <tc_6fa4db47, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_73043bf4, /*tc_2early*/
[InstrStage<1, [SLOT3]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
@@ -3123,10 +2132,6 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_85523bcb, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_855b0b61, /*tc_2early*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3135,10 +2140,6 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_88fa1a78, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_897d1a9d, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3147,10 +2148,6 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8bb285ec, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
-
InstrItinData <tc_8fd5f294, /*tc_3x*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3167,6 +2164,10 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_994333cd, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_97743097, /*tc_2early*/
[InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
@@ -3243,10 +2244,6 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b5a33b22, /*tc_2early*/
- [InstrStage<1, [SLOT2]>], [3, 2, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_b77c481f, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3275,10 +2272,6 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bd90564c, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_bde7aaf4, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3375,10 +2368,6 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e1e0a2dc, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [],
- []>,
-
InstrItinData <tc_e1e99bfa, /*tc_2early*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
@@ -3391,10 +2380,6 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e6b38e01, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_e7624c08, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3],
[Hex_FWD]>,
@@ -3415,10 +2400,6 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef20db1c, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_ef52ed71, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3443,10 +2424,6 @@ class DepScalarItinV62 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f4f43fb5, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_f7dd9c9f, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3471,10 +2448,6 @@ class DepScalarItinV62 {
class DepScalarItinV65 {
list<InstrItinData> DepScalarItinV65_list = [
- InstrItinData <tc_0077f68c, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [2],
- [Hex_FWD]>,
-
InstrItinData <tc_00afc57e, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
@@ -3499,10 +2472,6 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_0a2b8c7c, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_0cd51c76, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3519,18 +2488,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2]>], [2, 1],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_128f96e3, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [1, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_1372bca1, /*tc_3stall*/
[InstrStage<1, [SLOT0]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1432937d, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [1, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_14cd4cfa, /*tc_2early*/
[InstrStage<1, [SLOT2]>], [2],
[Hex_FWD]>,
@@ -3559,10 +2520,6 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_1c0005f9, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_1d5a38a8, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3571,10 +2528,6 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_20280784, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_234a11a5, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
@@ -3587,10 +2540,6 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT3]>], [4, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_29641329, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_2a160009, /*tc_2early*/
[InstrStage<1, [SLOT0]>], [],
[]>,
@@ -3603,10 +2552,6 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_2e00db30, /*tc_3stall*/
- [InstrStage<1, [SLOT0]>], [],
- []>,
-
InstrItinData <tc_2f185f5c, /*tc_3*/
[InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
@@ -3715,10 +2660,6 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
- InstrItinData <tc_53173427, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [1, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_53bc8a6a, /*tc_1*/
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3735,10 +2676,6 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_56d25411, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_57288781, /*tc_st*/
[InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
[Hex_FWD, Hex_FWD]>,
@@ -3747,6 +2684,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_59a01ead, /*tc_3stall*/
+ [InstrStage<1, [SLOT2]>], [4, 1, 2],
+ [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_5acef64a, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3771,14 +2712,6 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_60d76817, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [],
- []>,
-
- InstrItinData <tc_60f5738d, /*tc_3stall*/
- [InstrStage<1, [SLOT3]>], [1],
- [Hex_FWD]>,
-
InstrItinData <tc_63fe3df7, /*tc_latepredldaia*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3815,6 +2748,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
[]>,
+ InstrItinData <tc_6fa4db47, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 2],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_73043bf4, /*tc_1*/
[InstrStage<1, [SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
@@ -3843,10 +2780,6 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_85523bcb, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_855b0b61, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3855,10 +2788,6 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_88fa1a78, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_897d1a9d, /*tc_2*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3867,10 +2796,6 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_8bb285ec, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1],
- [Hex_FWD]>,
-
InstrItinData <tc_8fd5f294, /*tc_3x*/
[InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3891,6 +2816,10 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
+ InstrItinData <tc_994333cd, /*tc_3x*/
+ [InstrStage<1, [SLOT3]>], [4, 1],
+ [Hex_FWD, Hex_FWD]>,
+
InstrItinData <tc_999d32db, /*tc_3stall*/
[InstrStage<1, [SLOT2]>], [1],
[Hex_FWD]>,
@@ -3963,10 +2892,6 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_b5a33b22, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [4, 1, 2],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_b77c481f, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3995,10 +2920,6 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_bd90564c, /*tc_st*/
- [InstrStage<1, [SLOT0]>], [1, 2],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_bde7aaf4, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4095,10 +3016,6 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e1e0a2dc, /*tc_3stall*/
- [InstrStage<1, [SLOT2]>], [],
- []>,
-
InstrItinData <tc_e1e99bfa, /*tc_1*/
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
@@ -4111,10 +3028,6 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_e6b38e01, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 2],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_e7624c08, /*tc_newvjump*/
[InstrStage<1, [SLOT0]>], [3],
[Hex_FWD]>,
@@ -4135,10 +3048,6 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
[Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_ef20db1c, /*tc_3x*/
- [InstrStage<1, [SLOT3]>], [4, 1],
- [Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_ef52ed71, /*tc_ld*/
[InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
[Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4163,10 +3072,6 @@ class DepScalarItinV65 {
[InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
- InstrItinData <tc_f4f43fb5, /*tc_ld*/
- [InstrStage<1, [SLOT0]>], [4, 1, 1],
- [Hex_FWD, Hex_FWD, Hex_FWD]>,
-
InstrItinData <tc_f7dd9c9f, /*tc_st*/
[InstrStage<1, [SLOT0]>], [1, 2, 3],
[Hex_FWD, Hex_FWD, Hex_FWD]>,
diff --git a/lib/Target/Hexagon/HexagonDepInstrInfo.td b/lib/Target/Hexagon/HexagonDepInstrInfo.td
index 6e16762ac0eb..b6824fa33106 100644
--- a/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -991,7 +991,7 @@ def A2_roundsat : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = round($Rss32):sat",
-tc_c2f7d806, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> {
+tc_c2f7d806, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000110;
let hasNewValue = 1;
@@ -3301,7 +3301,7 @@ def A5_ACS : HInst<
(outs DoubleRegs:$Rxx32, PredRegs:$Pe4),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32,$Pe4 = vacsh($Rss32,$Rtt32)",
-tc_caaebcba, TypeM>, Enc_831a7d, Requires<[HasV55T]> {
+tc_caaebcba, TypeM>, Enc_831a7d, Requires<[HasV55]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010101;
@@ -3314,7 +3314,7 @@ def A5_vaddhubs : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rd32 = vaddhub($Rss32,$Rtt32):sat",
-tc_2b6f77c6, TypeS_3op>, Enc_d2216a, Requires<[HasV5T]> {
+tc_2b6f77c6, TypeS_3op>, Enc_d2216a, Requires<[HasV5]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001010;
@@ -3327,7 +3327,7 @@ def A6_vcmpbeq_notany : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = !any8(vcmpb.eq($Rss32,$Rtt32))",
-tc_55050d58, TypeALU64>, Enc_fcf7a7, Requires<[HasV65T]> {
+tc_55050d58, TypeALU64>, Enc_fcf7a7, Requires<[HasV65]> {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b1;
let Inst{31-21} = 0b11010010000;
@@ -3336,7 +3336,7 @@ def A6_vminub_RdP : HInst<
(outs DoubleRegs:$Rdd32, PredRegs:$Pe4),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32,$Pe4 = vminub($Rtt32,$Rss32)",
-tc_ef84f62f, TypeM>, Enc_d2c7f1, Requires<[HasV62T]> {
+tc_ef84f62f, TypeM>, Enc_d2c7f1, Requires<[HasV62]> {
let Inst{7-7} = 0b0;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010111;
@@ -4059,7 +4059,7 @@ def F2_conv_d2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_d2df($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
let Inst{13-5} = 0b000000011;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4069,7 +4069,7 @@ def F2_conv_d2sf : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_d2sf($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000010;
let hasNewValue = 1;
@@ -4081,7 +4081,7 @@ def F2_conv_df2d : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2d($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4091,7 +4091,7 @@ def F2_conv_df2d_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2d($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4101,7 +4101,7 @@ def F2_conv_df2sf : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2sf($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000000;
let hasNewValue = 1;
@@ -4113,7 +4113,7 @@ def F2_conv_df2ud : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2ud($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4123,7 +4123,7 @@ def F2_conv_df2ud_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_df2ud($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
let Inst{13-5} = 0b000000111;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4133,7 +4133,7 @@ def F2_conv_df2uw : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2uw($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000011;
let hasNewValue = 1;
@@ -4145,7 +4145,7 @@ def F2_conv_df2uw_chop : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2uw($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000101;
let hasNewValue = 1;
@@ -4157,7 +4157,7 @@ def F2_conv_df2w : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2w($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000100;
let hasNewValue = 1;
@@ -4169,7 +4169,7 @@ def F2_conv_df2w_chop : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_df2w($Rss32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000111;
let hasNewValue = 1;
@@ -4181,7 +4181,7 @@ def F2_conv_sf2d : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2d($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4191,7 +4191,7 @@ def F2_conv_sf2d_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2d($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
let Inst{13-5} = 0b000000110;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4201,7 +4201,7 @@ def F2_conv_sf2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2df($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4211,7 +4211,7 @@ def F2_conv_sf2ud : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2ud($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
let Inst{13-5} = 0b000000011;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4221,7 +4221,7 @@ def F2_conv_sf2ud_chop : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_sf2ud($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
let Inst{13-5} = 0b000000101;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4231,7 +4231,7 @@ def F2_conv_sf2uw : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2uw($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011011;
let hasNewValue = 1;
@@ -4243,7 +4243,7 @@ def F2_conv_sf2uw_chop : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2uw($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001011011;
let hasNewValue = 1;
@@ -4255,7 +4255,7 @@ def F2_conv_sf2w : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2w($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011100;
let hasNewValue = 1;
@@ -4267,7 +4267,7 @@ def F2_conv_sf2w_chop : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_sf2w($Rs32):chop",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001011100;
let hasNewValue = 1;
@@ -4279,7 +4279,7 @@ def F2_conv_ud2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32),
"$Rdd32 = convert_ud2df($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10000000111;
let isFP = 1;
@@ -4289,7 +4289,7 @@ def F2_conv_ud2sf : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = convert_ud2sf($Rss32)",
-tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10001000001;
let hasNewValue = 1;
@@ -4301,7 +4301,7 @@ def F2_conv_uw2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_uw2df($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
let Inst{13-5} = 0b000000001;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4311,7 +4311,7 @@ def F2_conv_uw2sf : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_uw2sf($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011001;
let hasNewValue = 1;
@@ -4323,7 +4323,7 @@ def F2_conv_w2df : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = convert_w2df($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> {
let Inst{13-5} = 0b000000010;
let Inst{31-21} = 0b10000100100;
let isFP = 1;
@@ -4333,7 +4333,7 @@ def F2_conv_w2sf : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = convert_w2sf($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011010;
let hasNewValue = 1;
@@ -4345,7 +4345,7 @@ def F2_dfclass : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, u5_0Imm:$Ii),
"$Pd4 = dfclass($Rss32,#$Ii)",
-tc_7a830544, TypeALU64>, Enc_1f19b5, Requires<[HasV5T]> {
+tc_7a830544, TypeALU64>, Enc_1f19b5, Requires<[HasV5]> {
let Inst{4-2} = 0b100;
let Inst{13-10} = 0b0000;
let Inst{31-21} = 0b11011100100;
@@ -4356,7 +4356,7 @@ def F2_dfcmpeq : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.eq($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> {
+tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@@ -4368,7 +4368,7 @@ def F2_dfcmpge : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.ge($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> {
+tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
let Inst{7-2} = 0b010000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@@ -4380,7 +4380,7 @@ def F2_dfcmpgt : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.gt($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> {
+tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@@ -4392,7 +4392,7 @@ def F2_dfcmpuo : HInst<
(outs PredRegs:$Pd4),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Pd4 = dfcmp.uo($Rss32,$Rtt32)",
-tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> {
+tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11010010111;
@@ -4404,7 +4404,7 @@ def F2_dfimm_n : HInst<
(outs DoubleRegs:$Rdd32),
(ins u10_0Imm:$Ii),
"$Rdd32 = dfmake(#$Ii):neg",
-tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5T]> {
+tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5]> {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101100101;
let prefersSlot3 = 1;
@@ -4413,7 +4413,7 @@ def F2_dfimm_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins u10_0Imm:$Ii),
"$Rdd32 = dfmake(#$Ii):pos",
-tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5T]> {
+tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5]> {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101100100;
let prefersSlot3 = 1;
@@ -4422,7 +4422,7 @@ def F2_sfadd : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfadd($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5T]> {
+tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011000;
@@ -4436,7 +4436,7 @@ def F2_sfclass : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Pd4 = sfclass($Rs32,#$Ii)",
-tc_7a830544, TypeS_2op>, Enc_83ee64, Requires<[HasV5T]> {
+tc_7a830544, TypeS_2op>, Enc_83ee64, Requires<[HasV5]> {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10000101111;
@@ -4447,7 +4447,7 @@ def F2_sfcmpeq : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.eq($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> {
+tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
let Inst{7-2} = 0b011000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@@ -4459,7 +4459,7 @@ def F2_sfcmpge : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.ge($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> {
+tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
let Inst{7-2} = 0b000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@@ -4471,7 +4471,7 @@ def F2_sfcmpgt : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.gt($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> {
+tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
let Inst{7-2} = 0b100000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@@ -4483,7 +4483,7 @@ def F2_sfcmpuo : HInst<
(outs PredRegs:$Pd4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Pd4 = sfcmp.uo($Rs32,$Rt32)",
-tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> {
+tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> {
let Inst{7-2} = 0b001000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000111111;
@@ -4495,7 +4495,7 @@ def F2_sffixupd : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sffixupd($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5T]> {
+tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011110;
@@ -4507,7 +4507,7 @@ def F2_sffixupn : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sffixupn($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5T]> {
+tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011110;
@@ -4519,7 +4519,7 @@ def F2_sffixupr : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32),
"$Rd32 = sffixupr($Rs32)",
-tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> {
+tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> {
let Inst{13-5} = 0b000000000;
let Inst{31-21} = 0b10001011101;
let hasNewValue = 1;
@@ -4530,7 +4530,7 @@ def F2_sffma : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += sfmpy($Rs32,$Rt32)",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5T]> {
+tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
let Inst{7-5} = 0b100;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -4544,7 +4544,7 @@ def F2_sffma_lib : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 += sfmpy($Rs32,$Rt32):lib",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5T]> {
+tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
let Inst{7-5} = 0b110;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -4558,7 +4558,7 @@ def F2_sffma_sc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32, PredRegs:$Pu4),
"$Rx32 += sfmpy($Rs32,$Rt32,$Pu4):scale",
-tc_038a1342, TypeM>, Enc_437f33, Requires<[HasV5T]> {
+tc_038a1342, TypeM>, Enc_437f33, Requires<[HasV5]> {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111011;
@@ -4572,7 +4572,7 @@ def F2_sffms : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= sfmpy($Rs32,$Rt32)",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5T]> {
+tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -4586,7 +4586,7 @@ def F2_sffms_lib : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32),
"$Rx32 -= sfmpy($Rs32,$Rt32):lib",
-tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5T]> {
+tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101111000;
@@ -4600,7 +4600,7 @@ def F2_sfimm_n : HInst<
(outs IntRegs:$Rd32),
(ins u10_0Imm:$Ii),
"$Rd32 = sfmake(#$Ii):neg",
-tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5T]> {
+tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5]> {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101011001;
let hasNewValue = 1;
@@ -4611,7 +4611,7 @@ def F2_sfimm_p : HInst<
(outs IntRegs:$Rd32),
(ins u10_0Imm:$Ii),
"$Rd32 = sfmake(#$Ii):pos",
-tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5T]> {
+tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5]> {
let Inst{20-16} = 0b00000;
let Inst{31-22} = 0b1101011000;
let hasNewValue = 1;
@@ -4622,7 +4622,7 @@ def F2_sfinvsqrta : HInst<
(outs IntRegs:$Rd32, PredRegs:$Pe4),
(ins IntRegs:$Rs32),
"$Rd32,$Pe4 = sfinvsqrta($Rs32)",
-tc_4d99bca9, TypeS_2op>, Enc_890909, Requires<[HasV5T]> {
+tc_4d99bca9, TypeS_2op>, Enc_890909, Requires<[HasV5]> {
let Inst{13-7} = 0b0000000;
let Inst{31-21} = 0b10001011111;
let hasNewValue = 1;
@@ -4634,7 +4634,7 @@ def F2_sfmax : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfmax($Rs32,$Rt32)",
-tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5T]> {
+tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011100;
@@ -4648,7 +4648,7 @@ def F2_sfmin : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfmin($Rs32,$Rt32)",
-tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5T]> {
+tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011100;
@@ -4662,7 +4662,7 @@ def F2_sfmpy : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfmpy($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5T]> {
+tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011010;
@@ -4676,7 +4676,7 @@ def F2_sfrecipa : HInst<
(outs IntRegs:$Rd32, PredRegs:$Pe4),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32,$Pe4 = sfrecipa($Rs32,$Rt32)",
-tc_9c00ce8d, TypeM>, Enc_a94f3b, Requires<[HasV5T]> {
+tc_9c00ce8d, TypeM>, Enc_a94f3b, Requires<[HasV5]> {
let Inst{7-7} = 0b1;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011111;
@@ -4689,7 +4689,7 @@ def F2_sfsub : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, IntRegs:$Rt32),
"$Rd32 = sfsub($Rs32,$Rt32)",
-tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5T]> {
+tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101011000;
@@ -4698,6 +4698,44 @@ let opNewValue = 0;
let isFP = 1;
let Uses = [USR];
}
+def G4_tfrgcpp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins GuestRegs64:$Gss32),
+"$Rdd32 = $Gss32",
+tc_6fa4db47, TypeCR>, Enc_0aa344 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01101000001;
+}
+def G4_tfrgcrr : HInst<
+(outs IntRegs:$Rd32),
+(ins GuestRegs:$Gs32),
+"$Rd32 = $Gs32",
+tc_6fa4db47, TypeCR>, Enc_44271f {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01101010001;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def G4_tfrgpcp : HInst<
+(outs GuestRegs64:$Gdd32),
+(ins DoubleRegs:$Rss32),
+"$Gdd32 = $Rss32",
+tc_994333cd, TypeCR>, Enc_ed5027 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01100011000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def G4_tfrgrcr : HInst<
+(outs GuestRegs:$Gd32),
+(ins IntRegs:$Rs32),
+"$Gd32 = $Rs32",
+tc_994333cd, TypeCR>, Enc_621fba {
+let Inst{13-5} = 0b000000000;
+let Inst{31-21} = 0b01100010000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
def J2_call : HInst<
(outs),
(ins a30_2Imm:$Ii),
@@ -4905,7 +4943,7 @@ def J2_jumpf_nopred_map : HInst<
(outs),
(ins PredRegs:$Pu4, b15_2Imm:$Ii),
"if (!$Pu4) jump $Ii",
-tc_e9fae2d6, TypeMAPPING>, Requires<[HasV60T]> {
+tc_e9fae2d6, TypeMAPPING>, Requires<[HasV60]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -4967,7 +5005,7 @@ def J2_jumpfpt : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if (!$Pu4) jump:t $Ii",
-tc_e1e99bfa, TypeJ>, Enc_daea09, Requires<[HasV60T]>, PredNewRel {
+tc_e1e99bfa, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b100;
let Inst{21-21} = 0b1;
@@ -5029,7 +5067,7 @@ def J2_jumprf_nopred_map : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) jumpr $Rs32",
-tc_e0739b8c, TypeMAPPING>, Requires<[HasV60T]> {
+tc_e0739b8c, TypeMAPPING>, Requires<[HasV60]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -5077,7 +5115,7 @@ def J2_jumprfpt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if (!$Pu4) jumpr:t $Rs32",
-tc_97743097, TypeJ>, Enc_88d4d9, Requires<[HasV60T]>, PredNewRel {
+tc_97743097, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0100;
let Inst{31-21} = 0b01010011011;
@@ -5222,7 +5260,7 @@ def J2_jumprt_nopred_map : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) jumpr $Rs32",
-tc_e0739b8c, TypeMAPPING>, Requires<[HasV60T]> {
+tc_e0739b8c, TypeMAPPING>, Requires<[HasV60]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -5268,7 +5306,7 @@ def J2_jumprtpt : HInst<
(outs),
(ins PredRegs:$Pu4, IntRegs:$Rs32),
"if ($Pu4) jumpr:t $Rs32",
-tc_97743097, TypeJ>, Enc_88d4d9, Requires<[HasV60T]>, PredNewRel {
+tc_97743097, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel {
let Inst{7-0} = 0b00000000;
let Inst{13-10} = 0b0100;
let Inst{31-21} = 0b01010011010;
@@ -5347,7 +5385,7 @@ def J2_jumpt_nopred_map : HInst<
(outs),
(ins PredRegs:$Pu4, b15_2Imm:$Ii),
"if ($Pu4) jump $Ii",
-tc_e9fae2d6, TypeMAPPING>, Requires<[HasV60T]> {
+tc_e9fae2d6, TypeMAPPING>, Requires<[HasV60]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -5407,7 +5445,7 @@ def J2_jumptpt : HInst<
(outs),
(ins PredRegs:$Pu4, b30_2Imm:$Ii),
"if ($Pu4) jump:t $Ii",
-tc_e1e99bfa, TypeJ>, Enc_daea09, Requires<[HasV60T]>, PredNewRel {
+tc_e1e99bfa, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel {
let Inst{0-0} = 0b0;
let Inst{12-10} = 0b100;
let Inst{21-21} = 0b0;
@@ -5631,6 +5669,30 @@ let Inst{13-13} = 0b0;
let Inst{31-16} = 0b0101010000000000;
let isSolo = 1;
}
+def J2_trap1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, u8_0Imm:$Ii),
+"trap1($Rx32,#$Ii)",
+tc_59a01ead, TypeJ>, Enc_33f8ba {
+let Inst{1-0} = 0b00;
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01010100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isSolo = 1;
+let Uses = [GOSP];
+let Defs = [GOSP, PC];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def J2_trap1_noregmap : HInst<
+(outs),
+(ins u8_0Imm:$Ii),
+"trap1(#$Ii)",
+tc_59a01ead, TypeMAPPING> {
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+}
def J4_cmpeq_f_jumpnv_nt : HInst<
(outs),
(ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
@@ -13334,7 +13396,7 @@ def L4_return_map_to_raw_f : HInst<
(outs),
(ins PredRegs:$Pv4),
"if (!$Pv4) dealloc_return",
-tc_513bef45, TypeMAPPING>, Requires<[HasV65T]> {
+tc_513bef45, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13342,7 +13404,7 @@ def L4_return_map_to_raw_fnew_pnt : HInst<
(outs),
(ins PredRegs:$Pv4),
"if (!$Pv4.new) dealloc_return:nt",
-tc_395dc00f, TypeMAPPING>, Requires<[HasV65T]> {
+tc_395dc00f, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13350,7 +13412,7 @@ def L4_return_map_to_raw_fnew_pt : HInst<
(outs),
(ins PredRegs:$Pv4),
"if (!$Pv4.new) dealloc_return:t",
-tc_395dc00f, TypeMAPPING>, Requires<[HasV65T]> {
+tc_395dc00f, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13358,7 +13420,7 @@ def L4_return_map_to_raw_t : HInst<
(outs),
(ins PredRegs:$Pv4),
"if ($Pv4) dealloc_return",
-tc_3bc2c5d3, TypeMAPPING>, Requires<[HasV65T]> {
+tc_3bc2c5d3, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13366,7 +13428,7 @@ def L4_return_map_to_raw_tnew_pnt : HInst<
(outs),
(ins PredRegs:$Pv4),
"if ($Pv4.new) dealloc_return:nt",
-tc_e7624c08, TypeMAPPING>, Requires<[HasV65T]> {
+tc_e7624c08, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13374,7 +13436,7 @@ def L4_return_map_to_raw_tnew_pt : HInst<
(outs),
(ins PredRegs:$Pv4),
"if ($Pv4.new) dealloc_return:t",
-tc_e7624c08, TypeMAPPING>, Requires<[HasV65T]> {
+tc_e7624c08, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13528,7 +13590,7 @@ def L6_deallocframe_map_to_raw : HInst<
(outs),
(ins),
"deallocframe",
-tc_d1090e34, TypeMAPPING>, Requires<[HasV65T]> {
+tc_d1090e34, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -13536,7 +13598,7 @@ def L6_return_map_to_raw : HInst<
(outs),
(ins),
"dealloc_return",
-tc_3d04548d, TypeMAPPING>, Requires<[HasV65T]> {
+tc_3d04548d, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -16916,7 +16978,7 @@ def M4_cmpyi_whc : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = cmpyiwh($Rss32,$Rt32*):<<1:rnd:sat",
-tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5T]> {
+tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000101000;
@@ -16942,7 +17004,7 @@ def M4_cmpyr_whc : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, IntRegs:$Rt32),
"$Rd32 = cmpyrwh($Rss32,$Rt32*):<<1:rnd:sat",
-tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5T]> {
+tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000101000;
@@ -17295,7 +17357,7 @@ def M5_vdmacbsu : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rxx32 += vdmpybsu($Rss32,$Rtt32):sat",
-tc_e913dc32, TypeM>, Enc_88c16c, Requires<[HasV5T]> {
+tc_e913dc32, TypeM>, Enc_88c16c, Requires<[HasV5]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101010001;
@@ -17307,7 +17369,7 @@ def M5_vdmpybsu : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vdmpybsu($Rss32,$Rtt32):sat",
-tc_8fd5f294, TypeM>, Enc_a56825, Requires<[HasV5T]> {
+tc_8fd5f294, TypeM>, Enc_a56825, Requires<[HasV5]> {
let Inst{7-5} = 0b001;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -17402,7 +17464,7 @@ def M6_vabsdiffb : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vabsdiffb($Rtt32,$Rss32)",
-tc_f49e76f4, TypeM>, Enc_ea23e4, Requires<[HasV62T]> {
+tc_f49e76f4, TypeM>, Enc_ea23e4, Requires<[HasV62]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000111;
@@ -17412,7 +17474,7 @@ def M6_vabsdiffub : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32),
"$Rdd32 = vabsdiffub($Rtt32,$Rss32)",
-tc_f49e76f4, TypeM>, Enc_ea23e4, Requires<[HasV62T]> {
+tc_f49e76f4, TypeM>, Enc_ea23e4, Requires<[HasV62]> {
let Inst{7-5} = 0b000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11101000101;
@@ -18142,7 +18204,7 @@ def S2_asr_i_p_rnd : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = asr($Rss32,#$Ii):rnd",
-tc_2b6f77c6, TypeS_2op>, Enc_5eac98, Requires<[HasV5T]> {
+tc_2b6f77c6, TypeS_2op>, Enc_5eac98, Requires<[HasV5]> {
let Inst{7-5} = 0b111;
let Inst{31-21} = 0b10000000110;
let prefersSlot3 = 1;
@@ -18151,7 +18213,7 @@ def S2_asr_i_p_rnd_goodsyntax : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = asrrnd($Rss32,#$Ii)",
-tc_2b6f77c6, TypeS_2op>, Requires<[HasV5T]> {
+tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> {
let isPseudo = 1;
}
def S2_asr_i_r : HInst<
@@ -25086,7 +25148,7 @@ def S5_asrhub_rnd_sat : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rd32 = vasrhub($Rss32,#$Ii):raw",
-tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5T]> {
+tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5]> {
let Inst{7-5} = 0b100;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10001000011;
@@ -25099,7 +25161,7 @@ def S5_asrhub_rnd_sat_goodsyntax : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rd32 = vasrhub($Rss32,#$Ii):rnd:sat",
-tc_2b6f77c6, TypeS_2op>, Requires<[HasV5T]> {
+tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -25108,7 +25170,7 @@ def S5_asrhub_sat : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rd32 = vasrhub($Rss32,#$Ii):sat",
-tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5T]> {
+tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5]> {
let Inst{7-5} = 0b101;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10001000011;
@@ -25121,7 +25183,7 @@ def S5_popcountp : HInst<
(outs IntRegs:$Rd32),
(ins DoubleRegs:$Rss32),
"$Rd32 = popcount($Rss32)",
-tc_00afc57e, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> {
+tc_00afc57e, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> {
let Inst{13-5} = 0b000000011;
let Inst{31-21} = 0b10001000011;
let hasNewValue = 1;
@@ -25132,7 +25194,7 @@ def S5_vasrhrnd : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rdd32 = vasrh($Rss32,#$Ii):raw",
-tc_2b6f77c6, TypeS_2op>, Enc_12b6e9, Requires<[HasV5T]> {
+tc_2b6f77c6, TypeS_2op>, Enc_12b6e9, Requires<[HasV5]> {
let Inst{7-5} = 0b000;
let Inst{13-12} = 0b00;
let Inst{31-21} = 0b10000000001;
@@ -25142,14 +25204,14 @@ def S5_vasrhrnd_goodsyntax : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u4_0Imm:$Ii),
"$Rdd32 = vasrh($Rss32,#$Ii):rnd",
-tc_2b6f77c6, TypeS_2op>, Requires<[HasV5T]> {
+tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> {
let isPseudo = 1;
}
def S6_allocframe_to_raw : HInst<
(outs),
(ins u11_3Imm:$Ii),
"allocframe(#$Ii)",
-tc_e216a5db, TypeMAPPING>, Requires<[HasV65T]> {
+tc_e216a5db, TypeMAPPING>, Requires<[HasV65]> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -25157,7 +25219,7 @@ def S6_rol_i_p : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rdd32 = rol($Rss32,#$Ii)",
-tc_55050d58, TypeS_2op>, Enc_5eac98, Requires<[HasV60T]> {
+tc_55050d58, TypeS_2op>, Enc_5eac98, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b10000000000;
}
@@ -25165,7 +25227,7 @@ def S6_rol_i_p_acc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 += rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> {
+tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b111;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -25175,7 +25237,7 @@ def S6_rol_i_p_and : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 &= rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> {
+tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -25185,7 +25247,7 @@ def S6_rol_i_p_nac : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 -= rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> {
+tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b10000010000;
let prefersSlot3 = 1;
@@ -25195,7 +25257,7 @@ def S6_rol_i_p_or : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 |= rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> {
+tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b111;
let Inst{31-21} = 0b10000010010;
let prefersSlot3 = 1;
@@ -25205,7 +25267,7 @@ def S6_rol_i_p_xacc : HInst<
(outs DoubleRegs:$Rxx32),
(ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii),
"$Rxx32 ^= rol($Rss32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> {
+tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{31-21} = 0b10000010100;
let prefersSlot3 = 1;
@@ -25215,7 +25277,7 @@ def S6_rol_i_r : HInst<
(outs IntRegs:$Rd32),
(ins IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rd32 = rol($Rs32,#$Ii)",
-tc_55050d58, TypeS_2op>, Enc_a05677, Requires<[HasV60T]> {
+tc_55050d58, TypeS_2op>, Enc_a05677, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001100000;
@@ -25226,7 +25288,7 @@ def S6_rol_i_r_acc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 += rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> {
+tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -25239,7 +25301,7 @@ def S6_rol_i_r_and : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 &= rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> {
+tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -25252,7 +25314,7 @@ def S6_rol_i_r_nac : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 -= rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> {
+tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110000;
@@ -25265,7 +25327,7 @@ def S6_rol_i_r_or : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 |= rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> {
+tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b111;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110010;
@@ -25278,7 +25340,7 @@ def S6_rol_i_r_xacc : HInst<
(outs IntRegs:$Rx32),
(ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii),
"$Rx32 ^= rol($Rs32,#$Ii)",
-tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> {
+tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10001110100;
@@ -25291,7 +25353,7 @@ def S6_vsplatrbp : HInst<
(outs DoubleRegs:$Rdd32),
(ins IntRegs:$Rs32),
"$Rdd32 = vsplatb($Rs32)",
-tc_be706f30, TypeS_2op>, Enc_3a3d62, Requires<[HasV62T]> {
+tc_be706f30, TypeS_2op>, Enc_3a3d62, Requires<[HasV62]> {
let Inst{13-5} = 0b000000100;
let Inst{31-21} = 0b10000100010;
}
@@ -25299,7 +25361,7 @@ def S6_vtrunehb_ppp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vtrunehb($Rss32,$Rtt32)",
-tc_55050d58, TypeS_3op>, Enc_a56825, Requires<[HasV62T]> {
+tc_55050d58, TypeS_3op>, Enc_a56825, Requires<[HasV62]> {
let Inst{7-5} = 0b011;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -25308,7 +25370,7 @@ def S6_vtrunohb_ppp : HInst<
(outs DoubleRegs:$Rdd32),
(ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32),
"$Rdd32 = vtrunohb($Rss32,$Rtt32)",
-tc_55050d58, TypeS_3op>, Enc_a56825, Requires<[HasV62T]> {
+tc_55050d58, TypeS_3op>, Enc_a56825, Requires<[HasV62]> {
let Inst{7-5} = 0b101;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b11000001100;
@@ -26288,7 +26350,7 @@ def V6_ldntnt0 : HInst<
(outs HvxVR:$Vd32),
(ins IntRegs:$Rt32),
"$Vd32 = vmem($Rt32):nt",
-PSEUDO, TypeMAPPING>, Requires<[HasV62T]> {
+PSEUDO, TypeMAPPING>, Requires<[HasV62]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30301,7 +30363,7 @@ def V6_vasrhbrndsat_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrhb($Vu32,$Vv32,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> {
+tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30335,7 +30397,7 @@ def V6_vasrhubrndsat_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> {
+tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30357,7 +30419,7 @@ def V6_vasrhubsat_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> {
+tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30500,7 +30562,7 @@ def V6_vasrwh_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8)",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> {
+tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30522,7 +30584,7 @@ def V6_vasrwhrndsat_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):rnd:sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> {
+tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30544,7 +30606,7 @@ def V6_vasrwhsat_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> {
+tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -30578,7 +30640,7 @@ def V6_vasrwuhsat_alt : HInst<
(outs HvxVR:$Vd32),
(ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8),
"$Vd32 = vasrwuh($Vu32,$Vv32,$Rt8):sat",
-tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> {
+tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> {
let hasNewValue = 1;
let opNewValue = 0;
let isPseudo = 1;
@@ -36942,7 +37004,7 @@ def Y5_l2fetch : HInst<
(outs),
(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
"l2fetch($Rs32,$Rtt32)",
-tc_daa058fa, TypeST>, Enc_e6abcf, Requires<[HasV5T]> {
+tc_daa058fa, TypeST>, Enc_e6abcf, Requires<[HasV5]> {
let Inst{7-0} = 0b00000000;
let Inst{13-13} = 0b0;
let Inst{31-21} = 0b10100110100;
diff --git a/lib/Target/Hexagon/HexagonDepMappings.td b/lib/Target/Hexagon/HexagonDepMappings.td
index 7a156c39da9c..03c504ff0b08 100644
--- a/lib/Target/Hexagon/HexagonDepMappings.td
+++ b/lib/Target/Hexagon/HexagonDepMappings.td
@@ -26,6 +26,7 @@ def J2_jumpf_nopred_mapAlias : InstAlias<"if (!$Pu4) jump $Ii", (J2_jumpf PredRe
def J2_jumprf_nopred_mapAlias : InstAlias<"if (!$Pu4) jumpr $Rs32", (J2_jumprf PredRegs:$Pu4, IntRegs:$Rs32)>;
def J2_jumprt_nopred_mapAlias : InstAlias<"if ($Pu4) jumpr $Rs32", (J2_jumprt PredRegs:$Pu4, IntRegs:$Rs32)>;
def J2_jumpt_nopred_mapAlias : InstAlias<"if ($Pu4) jump $Ii", (J2_jumpt PredRegs:$Pu4, b30_2Imm:$Ii)>;
+def J2_trap1_noregmapAlias : InstAlias<"trap1(#$Ii)", (J2_trap1 R0, u8_0Imm:$Ii)>;
def L2_loadalignb_zomapAlias : InstAlias<"$Ryy32 = memb_fifo($Rs32)", (L2_loadalignb_io DoubleRegs:$Ryy32, IntRegs:$Rs32, 0)>;
def L2_loadalignh_zomapAlias : InstAlias<"$Ryy32 = memh_fifo($Rs32)", (L2_loadalignh_io DoubleRegs:$Ryy32, IntRegs:$Rs32, 0)>;
def L2_loadbsw2_zomapAlias : InstAlias<"$Rd32 = membh($Rs32)", (L2_loadbsw2_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>;
diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index 0f1b9a4733c5..557e6384be6a 100644
--- a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -100,7 +100,7 @@ namespace llvm {
} // end namespace llvm
static cl::opt<bool> EnableHexagonBP("enable-hexagon-br-prob", cl::Hidden,
- cl::init(false), cl::desc("Enable branch probability info"));
+ cl::init(true), cl::desc("Enable branch probability info"));
static cl::opt<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden,
cl::desc("Size limit in Hexagon early if-conversion"));
static cl::opt<bool> SkipExitBranches("eif-no-loop-exit", cl::init(false),
@@ -191,6 +191,7 @@ namespace {
bool isProfitable(const FlowPattern &FP) const;
bool isPredicableStore(const MachineInstr *MI) const;
bool isSafeToSpeculate(const MachineInstr *MI) const;
+ bool isPredicate(unsigned R) const;
unsigned getCondStoreOpcode(unsigned Opc, bool IfTrue) const;
void predicateInstr(MachineBasicBlock *ToB, MachineBasicBlock::iterator At,
@@ -207,7 +208,6 @@ namespace {
void removeBlock(MachineBasicBlock *B);
void eliminatePhis(MachineBasicBlock *B);
- void replacePhiEdges(MachineBasicBlock *OldB, MachineBasicBlock *NewB);
void mergeBlocks(MachineBasicBlock *PredB, MachineBasicBlock *SuccB);
void simplifyFlowGraph(const FlowPattern &FP);
@@ -238,11 +238,12 @@ bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock *B) const {
bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
MachineLoop *L, FlowPattern &FP) {
- DEBUG(dbgs() << "Checking flow pattern at " << printMBBReference(*B) << "\n");
+ LLVM_DEBUG(dbgs() << "Checking flow pattern at " << printMBBReference(*B)
+ << "\n");
// Interested only in conditional branches, no .new, no new-value, etc.
// Check the terminators directly, it's easier than handling all responses
- // from AnalyzeBranch.
+ // from analyzeBranch.
MachineBasicBlock *TB = nullptr, *FB = nullptr;
MachineBasicBlock::const_iterator T1I = B->getFirstTerminator();
if (T1I == B->end())
@@ -325,17 +326,17 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
}
// Don't try to predicate loop preheaders.
if ((TB && isPreheader(TB)) || (FB && isPreheader(FB))) {
- DEBUG(dbgs() << "One of blocks " << PrintMB(TB) << ", " << PrintMB(FB)
- << " is a loop preheader. Skipping.\n");
+ LLVM_DEBUG(dbgs() << "One of blocks " << PrintMB(TB) << ", " << PrintMB(FB)
+ << " is a loop preheader. Skipping.\n");
return false;
}
FP = FlowPattern(B, PredR, TB, FB, JB);
- DEBUG(dbgs() << "Detected " << PrintFP(FP, *TRI) << "\n");
+ LLVM_DEBUG(dbgs() << "Detected " << PrintFP(FP, *TRI) << "\n");
return true;
}
-// KLUDGE: HexagonInstrInfo::AnalyzeBranch won't work on a block that
+// KLUDGE: HexagonInstrInfo::analyzeBranch won't work on a block that
// contains EH_LABEL.
bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const {
for (auto &I : *B)
@@ -344,7 +345,7 @@ bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const {
return false;
}
-// KLUDGE: HexagonInstrInfo::AnalyzeBranch may be unable to recognize
+// KLUDGE: HexagonInstrInfo::analyzeBranch may be unable to recognize
// that a block can never fall-through.
bool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock *B)
const {
@@ -367,7 +368,7 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B)
return false;
for (auto &MI : *B) {
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
continue;
if (MI.isConditionalBranch())
return false;
@@ -387,13 +388,8 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B)
unsigned R = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(R))
continue;
- switch (MRI->getRegClass(R)->getID()) {
- case Hexagon::PredRegsRegClassID:
- case Hexagon::HvxQRRegClassID:
- break;
- default:
- continue;
- }
+ if (!isPredicate(R))
+ continue;
for (auto U = MRI->use_begin(R); U != MRI->use_end(); ++U)
if (U->getParent()->isPHI())
return false;
@@ -443,8 +439,7 @@ bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const {
if (usesUndefVReg(&MI))
return false;
unsigned DefR = MI.getOperand(0).getReg();
- const TargetRegisterClass *RC = MRI->getRegClass(DefR);
- if (RC == &Hexagon::PredRegsRegClass)
+ if (isPredicate(DefR))
return false;
}
}
@@ -500,7 +495,7 @@ unsigned HexagonEarlyIfConversion::countPredicateDefs(
unsigned R = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(R))
continue;
- if (MRI->getRegClass(R) == &Hexagon::PredRegsRegClass)
+ if (isPredicate(R))
PredDefs++;
}
}
@@ -508,10 +503,21 @@ unsigned HexagonEarlyIfConversion::countPredicateDefs(
}
bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
+ BranchProbability JumpProb(1, 10);
+ BranchProbability Prob(9, 10);
+ if (MBPI && FP.TrueB && !FP.FalseB &&
+ (MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) < JumpProb ||
+ MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) > Prob))
+ return false;
+
+ if (MBPI && !FP.TrueB && FP.FalseB &&
+ (MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) < JumpProb ||
+ MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) > Prob))
+ return false;
+
if (FP.TrueB && FP.FalseB) {
// Do not IfCovert if the branch is one sided.
if (MBPI) {
- BranchProbability Prob(9, 10);
if (MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) > Prob)
return false;
if (MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) > Prob)
@@ -546,8 +552,9 @@ bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
};
unsigned Spare = 0;
unsigned TotalIn = TotalCount(FP.TrueB, Spare) + TotalCount(FP.FalseB, Spare);
- DEBUG(dbgs() << "Total number of instructions to be predicated/speculated: "
- << TotalIn << ", spare room: " << Spare << "\n");
+ LLVM_DEBUG(
+ dbgs() << "Total number of instructions to be predicated/speculated: "
+ << TotalIn << ", spare room: " << Spare << "\n");
if (TotalIn >= SizeLimit+Spare)
return false;
@@ -574,12 +581,13 @@ bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
PredDefs += countPredicateDefs(SB);
}
}
- DEBUG(dbgs() << "Total number of extra muxes from converted phis: "
- << TotalPh << "\n");
+ LLVM_DEBUG(dbgs() << "Total number of extra muxes from converted phis: "
+ << TotalPh << "\n");
if (TotalIn+TotalPh >= SizeLimit+Spare)
return false;
- DEBUG(dbgs() << "Total number of predicate registers: " << PredDefs << "\n");
+ LLVM_DEBUG(dbgs() << "Total number of predicate registers: " << PredDefs
+ << "\n");
if (PredDefs > 4)
return false;
@@ -620,11 +628,11 @@ bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B,
return Changed;
if (!isValid(FP)) {
- DEBUG(dbgs() << "Conversion is not valid\n");
+ LLVM_DEBUG(dbgs() << "Conversion is not valid\n");
return Changed;
}
if (!isProfitable(FP)) {
- DEBUG(dbgs() << "Conversion is not profitable\n");
+ LLVM_DEBUG(dbgs() << "Conversion is not profitable\n");
return Changed;
}
@@ -635,8 +643,9 @@ bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B,
bool HexagonEarlyIfConversion::visitLoop(MachineLoop *L) {
MachineBasicBlock *HB = L ? L->getHeader() : nullptr;
- DEBUG((L ? dbgs() << "Visiting loop H:" << PrintMB(HB)
- : dbgs() << "Visiting function") << "\n");
+ LLVM_DEBUG((L ? dbgs() << "Visiting loop H:" << PrintMB(HB)
+ : dbgs() << "Visiting function")
+ << "\n");
bool Changed = false;
if (L) {
for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
@@ -680,10 +689,18 @@ bool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr *MI)
return false;
if (MI->hasUnmodeledSideEffects())
return false;
+ if (MI->getOpcode() == TargetOpcode::LIFETIME_END)
+ return false;
return true;
}
+bool HexagonEarlyIfConversion::isPredicate(unsigned R) const {
+ const TargetRegisterClass *RC = MRI->getRegClass(R);
+ return RC == &Hexagon::PredRegsRegClass ||
+ RC == &Hexagon::HvxQRRegClass;
+}
+
unsigned HexagonEarlyIfConversion::getCondStoreOpcode(unsigned Opc,
bool IfTrue) const {
return HII->getCondOpcode(Opc, !IfTrue);
@@ -745,7 +762,7 @@ void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB,
void HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock *ToB,
MachineBasicBlock::iterator At, MachineBasicBlock *FromB,
unsigned PredR, bool IfTrue) {
- DEBUG(dbgs() << "Predicating block " << PrintMB(FromB) << "\n");
+ LLVM_DEBUG(dbgs() << "Predicating block " << PrintMB(FromB) << "\n");
MachineBasicBlock::iterator End = FromB->getFirstTerminator();
MachineBasicBlock::iterator I, NextI;
@@ -765,9 +782,11 @@ unsigned HexagonEarlyIfConversion::buildMux(MachineBasicBlock *B,
unsigned Opc = 0;
switch (DRC->getID()) {
case Hexagon::IntRegsRegClassID:
+ case Hexagon::IntRegsLow8RegClassID:
Opc = Hexagon::C2_mux;
break;
case Hexagon::DoubleRegsRegClassID:
+ case Hexagon::GeneralDoubleLow8RegsRegClassID:
Opc = Hexagon::PS_pselect;
break;
case Hexagon::HvxVRRegClassID:
@@ -935,7 +954,7 @@ void HexagonEarlyIfConversion::convert(const FlowPattern &FP) {
}
void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) {
- DEBUG(dbgs() << "Removing block " << PrintMB(B) << "\n");
+ LLVM_DEBUG(dbgs() << "Removing block " << PrintMB(B) << "\n");
// Transfer the immediate dominator information from B to its descendants.
MachineDomTreeNode *N = MDT->getNode(B);
@@ -965,7 +984,7 @@ void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) {
}
void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) {
- DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B) << "\n");
+ LLVM_DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B) << "\n");
MachineBasicBlock::iterator I, NextI, NonPHI = B->getFirstNonPHI();
for (I = B->begin(); I != NonPHI; I = NextI) {
NextI = std::next(I);
@@ -990,34 +1009,16 @@ void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) {
}
}
-void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB,
- MachineBasicBlock *NewB) {
- for (auto I = OldB->succ_begin(), E = OldB->succ_end(); I != E; ++I) {
- MachineBasicBlock *SB = *I;
- MachineBasicBlock::iterator P, N = SB->getFirstNonPHI();
- for (P = SB->begin(); P != N; ++P) {
- MachineInstr &PN = *P;
- for (MachineOperand &MO : PN.operands())
- if (MO.isMBB() && MO.getMBB() == OldB)
- MO.setMBB(NewB);
- }
- }
-}
-
void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock *PredB,
MachineBasicBlock *SuccB) {
- DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB) << " and "
- << PrintMB(SuccB) << "\n");
+ LLVM_DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB) << " and "
+ << PrintMB(SuccB) << "\n");
bool TermOk = hasUncondBranch(SuccB);
eliminatePhis(SuccB);
HII->removeBranch(*PredB);
PredB->removeSuccessor(SuccB);
PredB->splice(PredB->end(), SuccB, SuccB->begin(), SuccB->end());
- MachineBasicBlock::succ_iterator I, E = SuccB->succ_end();
- for (I = SuccB->succ_begin(); I != E; ++I)
- PredB->addSuccessor(*I);
- PredB->normalizeSuccProbs();
- replacePhiEdges(SuccB, PredB);
+ PredB->transferSuccessorsAndUpdatePHIs(SuccB);
removeBlock(SuccB);
if (!TermOk)
PredB->updateTerminator();
@@ -1039,7 +1040,7 @@ void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) {
// By now, the split block has only one successor (SB), and SB has only
// one predecessor. We can try to merge them. We will need to update ter-
- // minators in FP.Split+SB, and that requires working AnalyzeBranch, which
+ // minators in FP.Split+SB, and that requires working analyzeBranch, which
// fails on Hexagon for blocks that have EH_LABELs. However, if SB ends
// with an unconditional branch, we won't need to touch the terminators.
if (!hasEHLabel(SB) || hasUncondBranch(SB))
diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index c2feaf5737b2..7e774674e0c0 100644
--- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -316,8 +316,10 @@ void HexagonExpandCondsets::updateKillFlags(unsigned Reg) {
auto KillAt = [this,Reg] (SlotIndex K, LaneBitmask LM) -> void {
// Set the <kill> flag on a use of Reg whose lane mask is contained in LM.
MachineInstr *MI = LIS->getInstructionFromIndex(K);
- for (auto &Op : MI->operands()) {
- if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg)
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg ||
+ MI->isRegTiedToDefOperand(i))
continue;
LaneBitmask SLM = getLaneMask(Reg, Op.getSubReg());
if ((SLM & LM) == SLM) {
@@ -497,14 +499,18 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
if (!Op.isReg() || !DefRegs.count(Op))
continue;
if (Op.isDef()) {
- ImpUses.insert({Op, i});
+ // Tied defs will always have corresponding uses, so no extra
+ // implicit uses are needed.
+ if (!Op.isTied())
+ ImpUses.insert({Op, i});
} else {
// This function can be called for the same register with different
// lane masks. If the def in this instruction was for the whole
// register, we can get here more than once. Avoid adding multiple
// implicit uses (or adding an implicit use when an explicit one is
// present).
- ImpUses.erase(Op);
+ if (Op.isTied())
+ ImpUses.erase(Op);
}
}
if (ImpUses.empty())
@@ -545,7 +551,14 @@ void HexagonExpandCondsets::removeInstr(MachineInstr &MI) {
void HexagonExpandCondsets::updateLiveness(std::set<unsigned> &RegSet,
bool Recalc, bool UpdateKills, bool UpdateDeads) {
UpdateKills |= UpdateDeads;
- for (auto R : RegSet) {
+ for (unsigned R : RegSet) {
+ if (!TargetRegisterInfo::isVirtualRegister(R)) {
+ assert(TargetRegisterInfo::isPhysicalRegister(R));
+ // There shouldn't be any physical registers as operands, except
+ // possibly reserved registers.
+ assert(MRI->isReserved(R));
+ continue;
+ }
if (Recalc)
recalculateLiveInterval(R);
if (UpdateKills)
@@ -641,7 +654,7 @@ MachineInstr *HexagonExpandCondsets::genCondTfrFor(MachineOperand &SrcOp,
.add(SrcOp);
}
- DEBUG(dbgs() << "created an initial copy: " << *MIB);
+ LLVM_DEBUG(dbgs() << "created an initial copy: " << *MIB);
return &*MIB;
}
@@ -654,8 +667,8 @@ bool HexagonExpandCondsets::split(MachineInstr &MI,
return false;
TfrCounter++;
}
- DEBUG(dbgs() << "\nsplitting " << printMBBReference(*MI.getParent()) << ": "
- << MI);
+ LLVM_DEBUG(dbgs() << "\nsplitting " << printMBBReference(*MI.getParent())
+ << ": " << MI);
MachineOperand &MD = MI.getOperand(0); // Definition
MachineOperand &MP = MI.getOperand(1); // Predicate register
assert(MD.isDef());
@@ -932,8 +945,8 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond,
unsigned Opc = TfrI.getOpcode();
(void)Opc;
assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf);
- DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false")
- << ": " << TfrI);
+ LLVM_DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false")
+ << ": " << TfrI);
MachineOperand &MD = TfrI.getOperand(0);
MachineOperand &MP = TfrI.getOperand(1);
@@ -954,7 +967,7 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond,
if (!DefI || !isPredicable(DefI))
return false;
- DEBUG(dbgs() << "Source def: " << *DefI);
+ LLVM_DEBUG(dbgs() << "Source def: " << *DefI);
// Collect the information about registers defined and used between the
// DefI and the TfrI.
@@ -1039,8 +1052,8 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond,
if (!canMoveMemTo(*DefI, TfrI, true))
CanDown = false;
- DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no")
- << ", can move down: " << (CanDown ? "yes\n" : "no\n"));
+ LLVM_DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no")
+ << ", can move down: " << (CanDown ? "yes\n" : "no\n"));
MachineBasicBlock::iterator PastDefIt = std::next(DefIt);
if (CanUp)
predicateAt(MD, *DefI, PastDefIt, MP, Cond, UpdRegs);
@@ -1135,10 +1148,10 @@ bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) {
return false;
bool Overlap = L1.overlaps(L2);
- DEBUG(dbgs() << "compatible registers: ("
- << (Overlap ? "overlap" : "disjoint") << ")\n "
- << printReg(R1.Reg, TRI, R1.Sub) << " " << L1 << "\n "
- << printReg(R2.Reg, TRI, R2.Sub) << " " << L2 << "\n");
+ LLVM_DEBUG(dbgs() << "compatible registers: ("
+ << (Overlap ? "overlap" : "disjoint") << ")\n "
+ << printReg(R1.Reg, TRI, R1.Sub) << " " << L1 << "\n "
+ << printReg(R2.Reg, TRI, R2.Sub) << " " << L2 << "\n");
if (R1.Sub || R2.Sub)
return false;
if (Overlap)
@@ -1171,7 +1184,7 @@ bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) {
LIS->removeInterval(R2.Reg);
updateKillFlags(R1.Reg);
- DEBUG(dbgs() << "coalesced: " << L1 << "\n");
+ LLVM_DEBUG(dbgs() << "coalesced: " << L1 << "\n");
L1.verify();
return true;
@@ -1252,8 +1265,8 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
LIS = &getAnalysis<LiveIntervals>();
MRI = &MF.getRegInfo();
- DEBUG(LIS->print(dbgs() << "Before expand-condsets\n",
- MF.getFunction().getParent()));
+ LLVM_DEBUG(LIS->print(dbgs() << "Before expand-condsets\n",
+ MF.getFunction().getParent()));
bool Changed = false;
std::set<unsigned> CoalUpd, PredUpd;
@@ -1280,8 +1293,8 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
if (!CoalUpd.count(Op.getReg()))
KillUpd.insert(Op.getReg());
updateLiveness(KillUpd, false, true, false);
- DEBUG(LIS->print(dbgs() << "After coalescing\n",
- MF.getFunction().getParent()));
+ LLVM_DEBUG(
+ LIS->print(dbgs() << "After coalescing\n", MF.getFunction().getParent()));
// First, simply split all muxes into a pair of conditional transfers
// and update the live intervals to reflect the new arrangement. The
@@ -1297,8 +1310,8 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
// predication, and after splitting they are difficult to recalculate
// (because of predicated defs), so make sure they are left untouched.
// Predication does not use live intervals.
- DEBUG(LIS->print(dbgs() << "After splitting\n",
- MF.getFunction().getParent()));
+ LLVM_DEBUG(
+ LIS->print(dbgs() << "After splitting\n", MF.getFunction().getParent()));
// Traverse all blocks and collapse predicable instructions feeding
// conditional transfers into predicated instructions.
@@ -1306,13 +1319,13 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
// cases that were not created in the previous step.
for (auto &B : MF)
Changed |= predicateInBlock(B, PredUpd);
- DEBUG(LIS->print(dbgs() << "After predicating\n",
- MF.getFunction().getParent()));
+ LLVM_DEBUG(LIS->print(dbgs() << "After predicating\n",
+ MF.getFunction().getParent()));
PredUpd.insert(CoalUpd.begin(), CoalUpd.end());
updateLiveness(PredUpd, true, true, true);
- DEBUG({
+ LLVM_DEBUG({
if (Changed)
LIS->print(dbgs() << "After expand-condsets\n",
MF.getFunction().getParent());
@@ -1324,7 +1337,6 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
//===----------------------------------------------------------------------===//
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-
FunctionPass *llvm::createHexagonExpandCondsets() {
return new HexagonExpandCondsets();
}
diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
index a842b672736c..e9067e2285a8 100644
--- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
+++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/PassSupport.h"
using namespace llvm;
@@ -59,12 +60,12 @@ namespace {
}
private:
- /// \brief Check the offset between each loop instruction and
+ /// Check the offset between each loop instruction and
/// the loop basic block to determine if we can use the LOOP instruction
/// or if we need to set the LC/SA registers explicitly.
bool fixupLoopInstrs(MachineFunction &MF);
- /// \brief Replace loop instruction with the constant extended
+ /// Replace loop instruction with the constant extended
/// version if the loop label is too far from the loop instruction.
void useExtLoopInstr(MachineFunction &MF,
MachineBasicBlock::iterator &MII);
@@ -80,7 +81,7 @@ FunctionPass *llvm::createHexagonFixupHwLoops() {
return new HexagonFixupHwLoops();
}
-/// \brief Returns true if the instruction is a hardware loop instruction.
+/// Returns true if the instruction is a hardware loop instruction.
static bool isHardwareLoop(const MachineInstr &MI) {
return MI.getOpcode() == Hexagon::J2_loop0r ||
MI.getOpcode() == Hexagon::J2_loop0i ||
@@ -94,7 +95,7 @@ bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) {
return fixupLoopInstrs(MF);
}
-/// \brief For Hexagon, if the loop label is to far from the
+/// For Hexagon, if the loop label is to far from the
/// loop instruction then we need to set the LC0 and SA0 registers
/// explicitly instead of using LOOP(start,count). This function
/// checks the distance, and generates register assignments if needed.
@@ -137,7 +138,7 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
MachineBasicBlock::iterator MII = MBB.begin();
MachineBasicBlock::iterator MIE = MBB.end();
while (MII != MIE) {
- InstOffset += HII->getSize(*MII);
+ unsigned InstSize = HII->getSize(*MII);
if (MII->isMetaInstruction()) {
++MII;
continue;
@@ -145,8 +146,10 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
if (isHardwareLoop(*MII)) {
assert(MII->getOperand(0).isMBB() &&
"Expect a basic block as loop operand");
- int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
- if ((unsigned)abs(diff) > MaxLoopRange) {
+ MachineBasicBlock *TargetBB = MII->getOperand(0).getMBB();
+ unsigned Diff = AbsoluteDifference(InstOffset,
+ BlockToInstOffset[TargetBB]);
+ if (Diff > MaxLoopRange) {
useExtLoopInstr(MF, MII);
MII = MBB.erase(MII);
Changed = true;
@@ -156,13 +159,14 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
} else {
++MII;
}
+ InstOffset += InstSize;
}
}
return Changed;
}
-/// \brief Replace loop instructions with the constant extended version.
+/// Replace loop instructions with the constant extended version.
void HexagonFixupHwLoops::useExtLoopInstr(MachineFunction &MF,
MachineBasicBlock::iterator &MII) {
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 65a2fc35b11b..97b02e2b34cb 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -442,7 +442,7 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
if (needsStackFrame(I, CSR, HRI))
SFBlocks.push_back(&I);
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Blocks needing SF: {";
for (auto &B : SFBlocks)
dbgs() << " " << printMBBReference(*B);
@@ -465,7 +465,7 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
if (!PDomB)
break;
}
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Computed dom block: ";
if (DomB)
dbgs() << printMBBReference(*DomB);
@@ -483,11 +483,11 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
// Make sure that DomB dominates PDomB and PDomB post-dominates DomB.
if (!MDT.dominates(DomB, PDomB)) {
- DEBUG(dbgs() << "Dom block does not dominate pdom block\n");
+ LLVM_DEBUG(dbgs() << "Dom block does not dominate pdom block\n");
return;
}
if (!MPT.dominates(PDomB, DomB)) {
- DEBUG(dbgs() << "PDom block does not post-dominate dom block\n");
+ LLVM_DEBUG(dbgs() << "PDom block does not post-dominate dom block\n");
return;
}
@@ -1396,7 +1396,7 @@ static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) {
bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const {
- DEBUG(dbgs() << __func__ << " on " << MF.getName() << '\n');
+ LLVM_DEBUG(dbgs() << __func__ << " on " << MF.getName() << '\n');
MachineFrameInfo &MFI = MF.getFrameInfo();
BitVector SRegs(Hexagon::NUM_TARGET_REGS);
@@ -1406,15 +1406,16 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
// (1) For each callee-saved register, add that register and all of its
// sub-registers to SRegs.
- DEBUG(dbgs() << "Initial CS registers: {");
+ LLVM_DEBUG(dbgs() << "Initial CS registers: {");
for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
unsigned R = CSI[i].getReg();
- DEBUG(dbgs() << ' ' << printReg(R, TRI));
+ LLVM_DEBUG(dbgs() << ' ' << printReg(R, TRI));
for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
SRegs[*SR] = true;
}
- DEBUG(dbgs() << " }\n");
- DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << " }\n");
+ LLVM_DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI);
+ dbgs() << "\n");
// (2) For each reserved register, remove that register and all of its
// sub- and super-registers from SRegs.
@@ -1424,8 +1425,10 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
for (MCSuperRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
SRegs[*SR] = false;
}
- DEBUG(dbgs() << "Res: "; dump_registers(Reserved, *TRI); dbgs() << "\n");
- DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Res: "; dump_registers(Reserved, *TRI);
+ dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI);
+ dbgs() << "\n");
// (3) Collect all registers that have at least one sub-register in SRegs,
// and also have no sub-registers that are reserved. These will be the can-
@@ -1446,11 +1449,13 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
break;
}
}
- DEBUG(dbgs() << "TmpSup: "; dump_registers(TmpSup, *TRI); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "TmpSup: "; dump_registers(TmpSup, *TRI);
+ dbgs() << "\n");
// (4) Include all super-registers found in (3) into SRegs.
SRegs |= TmpSup;
- DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI);
+ dbgs() << "\n");
// (5) For each register R in SRegs, if any super-register of R is in SRegs,
// remove R from SRegs.
@@ -1463,7 +1468,8 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
break;
}
}
- DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI);
+ dbgs() << "\n");
// Now, for each register that has a fixed stack slot, create the stack
// object for it.
@@ -1501,7 +1507,7 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
SRegs[R] = false;
}
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "CS information: {";
for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
int FI = CSI[i].getFrameIdx();
@@ -1706,11 +1712,6 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
for (auto R = B.begin(); R != It; ++R) {
Clobbers.clear();
LPR.stepForward(*R, Clobbers);
- // Dead defs are recorded in Clobbers, but are not automatically removed
- // from the live set.
- for (auto &C : Clobbers)
- if (C.second->isReg() && C.second->isDead())
- LPR.removeReg(C.first);
}
DebugLoc DL = MI->getDebugLoc();
@@ -1867,11 +1868,11 @@ bool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF,
Changed |= expandCopy(B, I, MRI, HII, NewRegs);
break;
case Hexagon::STriw_pred:
- case Hexagon::STriw_mod:
+ case Hexagon::STriw_ctr:
Changed |= expandStoreInt(B, I, MRI, HII, NewRegs);
break;
case Hexagon::LDriw_pred:
- case Hexagon::LDriw_mod:
+ case Hexagon::LDriw_ctr:
Changed |= expandLoadInt(B, I, MRI, HII, NewRegs);
break;
case Hexagon::PS_vstorerq_ai:
@@ -1914,7 +1915,7 @@ void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (OptimizeSpillSlots && !isOptNone(MF))
optimizeSpillSlots(MF, NewRegs);
- // We need to reserve a a spill slot if scavenging could potentially require
+ // We need to reserve a spill slot if scavenging could potentially require
// spilling a scavenged register.
if (!NewRegs.empty() || mayOverflowFrameOffset(MF)) {
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -2026,8 +2027,8 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
auto P = BlockIndexes.insert(
std::make_pair(&B, HexagonBlockRanges::InstrIndexMap(B)));
auto &IndexMap = P.first->second;
- DEBUG(dbgs() << "Index map for " << printMBBReference(B) << "\n"
- << IndexMap << '\n');
+ LLVM_DEBUG(dbgs() << "Index map for " << printMBBReference(B) << "\n"
+ << IndexMap << '\n');
for (auto &In : B) {
int LFI, SFI;
@@ -2134,7 +2135,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
}
}
- DEBUG({
+ LLVM_DEBUG({
for (auto &P : FIRangeMap) {
dbgs() << "fi#" << P.first;
if (BadFIs.count(P.first))
@@ -2173,7 +2174,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
}
}
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Block-to-FI map (* -- live-on-exit):\n";
for (auto &P : BlockFIMap) {
auto &FIs = P.second;
@@ -2200,16 +2201,16 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
HexagonBlockRanges::InstrIndexMap &IM = F->second;
HexagonBlockRanges::RegToRangeMap LM = HBR.computeLiveMap(IM);
HexagonBlockRanges::RegToRangeMap DM = HBR.computeDeadMap(IM, LM);
- DEBUG(dbgs() << printMBBReference(B) << " dead map\n"
- << HexagonBlockRanges::PrintRangeMap(DM, HRI));
+ LLVM_DEBUG(dbgs() << printMBBReference(B) << " dead map\n"
+ << HexagonBlockRanges::PrintRangeMap(DM, HRI));
for (auto FI : BlockFIMap[&B]) {
if (BadFIs.count(FI))
continue;
- DEBUG(dbgs() << "Working on fi#" << FI << '\n');
+ LLVM_DEBUG(dbgs() << "Working on fi#" << FI << '\n');
HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
for (auto &Range : RL) {
- DEBUG(dbgs() << "--Examining range:" << RL << '\n');
+ LLVM_DEBUG(dbgs() << "--Examining range:" << RL << '\n');
if (!IndexType::isInstr(Range.start()) ||
!IndexType::isInstr(Range.end()))
continue;
@@ -2224,7 +2225,8 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
auto *RC = HII.getRegClass(SI.getDesc(), 2, &HRI, MF);
// The this-> is needed to unconfuse MSVC.
unsigned FoundR = this->findPhysReg(MF, Range, IM, DM, RC);
- DEBUG(dbgs() << "Replacement reg:" << printReg(FoundR, &HRI) << '\n');
+ LLVM_DEBUG(dbgs() << "Replacement reg:" << printReg(FoundR, &HRI)
+ << '\n');
if (FoundR == 0)
continue;
#ifndef NDEBUG
diff --git a/lib/Target/Hexagon/HexagonGatherPacketize.cpp b/lib/Target/Hexagon/HexagonGatherPacketize.cpp
index 253f09d12839..63ec9c3d3124 100644
--- a/lib/Target/Hexagon/HexagonGatherPacketize.cpp
+++ b/lib/Target/Hexagon/HexagonGatherPacketize.cpp
@@ -62,7 +62,7 @@ bool HexagonGatherPacketize::runOnMachineFunction(MachineFunction &Fn) {
if (!EnableGatherPacketize)
return false;
auto &ST = Fn.getSubtarget<HexagonSubtarget>();
- bool HasV65 = ST.hasV65TOps();
+ bool HasV65 = ST.hasV65Ops();
bool UseHVX = ST.useHVXOps();
if (!(HasV65 & UseHVX))
return false;
diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp
index c1841d735b8c..2582a021e956 100644
--- a/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -55,6 +55,12 @@ static cl::opt<unsigned> VRegDistCutoff("insert-dist-cutoff", cl::init(30U),
cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg distance cutoff for insert "
"generation."));
+// Limit the container sizes for extreme cases where we run out of memory.
+static cl::opt<unsigned> MaxORLSize("insert-max-orl", cl::init(4096),
+ cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of OrderedRegisterList"));
+static cl::opt<unsigned> MaxIFMSize("insert-max-ifmap", cl::init(1024),
+ cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of IFMap"));
+
static cl::opt<bool> OptTiming("insert-timing", cl::init(false), cl::Hidden,
cl::ZeroOrMore, cl::desc("Enable timing of insert generation"));
static cl::opt<bool> OptTimingDetail("insert-timing-detail", cl::init(false),
@@ -86,6 +92,7 @@ namespace {
struct RegisterSet : private BitVector {
RegisterSet() = default;
explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
+ RegisterSet(const RegisterSet &RS) : BitVector(RS) {}
using BitVector::clear;
@@ -370,9 +377,11 @@ namespace {
class OrderedRegisterList {
using ListType = std::vector<unsigned>;
+ const unsigned MaxSize;
public:
- OrderedRegisterList(const RegisterOrdering &RO) : Ord(RO) {}
+ OrderedRegisterList(const RegisterOrdering &RO)
+ : MaxSize(MaxORLSize), Ord(RO) {}
void insert(unsigned VR);
void remove(unsigned VR);
@@ -433,12 +442,17 @@ void OrderedRegisterList::insert(unsigned VR) {
Seq.push_back(VR);
else
Seq.insert(L, VR);
+
+ unsigned S = Seq.size();
+ if (S > MaxSize)
+ Seq.resize(MaxSize);
+ assert(Seq.size() <= MaxSize);
}
void OrderedRegisterList::remove(unsigned VR) {
iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord);
- assert(L != Seq.end());
- Seq.erase(L);
+ if (L != Seq.end())
+ Seq.erase(L);
}
namespace {
@@ -618,7 +632,7 @@ void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB,
SortableVectorType VRs;
for (RegisterOrdering::iterator I = RB.begin(), E = RB.end(); I != E; ++I)
VRs.push_back(I->first);
- std::sort(VRs.begin(), VRs.end(), LexCmp);
+ llvm::sort(VRs.begin(), VRs.end(), LexCmp);
// Transfer the results to the outgoing register ordering.
for (unsigned i = 0, n = VRs.size(); i < n; ++i)
RO.insert(std::make_pair(VRs[i], i));
@@ -950,6 +964,9 @@ void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
continue;
findRecordInsertForms(VR, AVs);
+ // Stop if the map size is too large.
+ if (IFMap.size() > MaxIFMSize)
+ return;
}
}
diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp
index 5a001d6ed9c1..e5af96468af1 100644
--- a/lib/Target/Hexagon/HexagonGenMux.cpp
+++ b/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -40,6 +40,7 @@
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
#include <cassert>
@@ -56,6 +57,11 @@ namespace llvm {
} // end namespace llvm
+// Initialize this to 0 to always prefer generating mux by default.
+static cl::opt<unsigned> MinPredDist("hexagon-gen-mux-threshold", cl::Hidden,
+ cl::init(0), cl::desc("Minimum distance between predicate definition and "
+ "farther of the two predicated uses"));
+
namespace {
class HexagonGenMux : public MachineFunctionPass {
@@ -269,11 +275,13 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
// There is now a complete definition of DR, i.e. we have the predicate
// register, the definition if-true, and definition if-false.
- // First, check if both definitions are far enough from the definition
+ // First, check if the definitions are far enough from the definition
// of the predicate register.
unsigned MinX = std::min(CI.TrueX, CI.FalseX);
unsigned MaxX = std::max(CI.TrueX, CI.FalseX);
- unsigned SearchX = (MaxX > 4) ? MaxX-4 : 0;
+ // Specifically, check if the predicate definition is within a prescribed
+ // distance from the farther of the two predicated instructions.
+ unsigned SearchX = (MaxX >= MinPredDist) ? MaxX-MinPredDist : 0;
bool NearDef = false;
for (unsigned X = SearchX; X < MaxX; ++X) {
const DefUseInfo &DU = DUM.lookup(X);
@@ -348,7 +356,7 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
return false;
};
for (auto I = B.rbegin(), E = B.rend(); I != E; ++I) {
- if (I->isDebugValue())
+ if (I->isDebugInstr())
continue;
// This isn't 100% accurate, but it's safe.
// It won't detect (as a kill) a case like this
diff --git a/lib/Target/Hexagon/HexagonGenPredicate.cpp b/lib/Target/Hexagon/HexagonGenPredicate.cpp
index 9288ed03d4d2..c0d2de90467a 100644
--- a/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -222,13 +222,12 @@ void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) {
}
void HexagonGenPredicate::processPredicateGPR(const Register &Reg) {
- DEBUG(dbgs() << __func__ << ": "
- << printReg(Reg.R, TRI, Reg.S) << "\n");
+ LLVM_DEBUG(dbgs() << __func__ << ": " << printReg(Reg.R, TRI, Reg.S) << "\n");
using use_iterator = MachineRegisterInfo::use_iterator;
use_iterator I = MRI->use_begin(Reg.R), E = MRI->use_end();
if (I == E) {
- DEBUG(dbgs() << "Dead reg: " << printReg(Reg.R, TRI, Reg.S) << '\n');
+ LLVM_DEBUG(dbgs() << "Dead reg: " << printReg(Reg.R, TRI, Reg.S) << '\n');
MachineInstr *DefI = MRI->getVRegDef(Reg.R);
DefI->eraseFromParent();
return;
@@ -250,7 +249,7 @@ Register HexagonGenPredicate::getPredRegFor(const Register &Reg) {
if (F != G2P.end())
return F->second;
- DEBUG(dbgs() << __func__ << ": " << PrintRegister(Reg, *TRI));
+ LLVM_DEBUG(dbgs() << __func__ << ": " << PrintRegister(Reg, *TRI));
MachineInstr *DefI = MRI->getVRegDef(Reg.R);
assert(DefI);
unsigned Opc = DefI->getOpcode();
@@ -258,7 +257,7 @@ Register HexagonGenPredicate::getPredRegFor(const Register &Reg) {
assert(DefI->getOperand(0).isDef() && DefI->getOperand(1).isUse());
Register PR = DefI->getOperand(1);
G2P.insert(std::make_pair(Reg, PR));
- DEBUG(dbgs() << " -> " << PrintRegister(PR, *TRI) << '\n');
+ LLVM_DEBUG(dbgs() << " -> " << PrintRegister(PR, *TRI) << '\n');
return PR;
}
@@ -274,7 +273,8 @@ Register HexagonGenPredicate::getPredRegFor(const Register &Reg) {
BuildMI(B, std::next(DefIt), DL, TII->get(TargetOpcode::COPY), NewPR)
.addReg(Reg.R, 0, Reg.S);
G2P.insert(std::make_pair(Reg, Register(NewPR)));
- DEBUG(dbgs() << " -> !" << PrintRegister(Register(NewPR), *TRI) << '\n');
+ LLVM_DEBUG(dbgs() << " -> !" << PrintRegister(Register(NewPR), *TRI)
+ << '\n');
return Register(NewPR);
}
@@ -364,7 +364,7 @@ bool HexagonGenPredicate::isScalarPred(Register PredReg) {
}
bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
- DEBUG(dbgs() << __func__ << ": " << MI << " " << *MI);
+ LLVM_DEBUG(dbgs() << __func__ << ": " << MI << " " << *MI);
unsigned Opc = MI->getOpcode();
assert(isConvertibleToPredForm(MI));
@@ -426,7 +426,7 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
Register Pred = getPredRegFor(GPR);
MIB.addReg(Pred.R, 0, Pred.S);
}
- DEBUG(dbgs() << "generated: " << *MIB);
+ LLVM_DEBUG(dbgs() << "generated: " << *MIB);
// Generate a copy-out: NewGPR = NewPR, and replace all uses of OutR
// with NewGPR.
@@ -449,7 +449,7 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
}
bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) {
- DEBUG(dbgs() << __func__ << "\n");
+ LLVM_DEBUG(dbgs() << __func__ << "\n");
const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
bool Changed = false;
VectOfInst Erase;
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 715fd52f3acd..0e33976a58ac 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -168,7 +168,7 @@ namespace {
}
};
- /// \brief Find the register that contains the loop controlling
+ /// Find the register that contains the loop controlling
/// induction variable.
/// If successful, it will return true and set the \p Reg, \p IVBump
/// and \p IVOp arguments. Otherwise it will return false.
@@ -183,19 +183,19 @@ namespace {
bool findInductionRegister(MachineLoop *L, unsigned &Reg,
int64_t &IVBump, MachineInstr *&IVOp) const;
- /// \brief Return the comparison kind for the specified opcode.
+ /// Return the comparison kind for the specified opcode.
Comparison::Kind getComparisonKind(unsigned CondOpc,
MachineOperand *InitialValue,
const MachineOperand *Endvalue,
int64_t IVBump) const;
- /// \brief Analyze the statements in a loop to determine if the loop
+ /// Analyze the statements in a loop to determine if the loop
/// has a computable trip count and, if so, return a value that represents
/// the trip count expression.
CountValue *getLoopTripCount(MachineLoop *L,
SmallVectorImpl<MachineInstr *> &OldInsts);
- /// \brief Return the expression that represents the number of times
+ /// Return the expression that represents the number of times
/// a loop iterates. The function takes the operands that represent the
/// loop start value, loop end value, and induction value. Based upon
/// these operands, the function attempts to compute the trip count.
@@ -206,64 +206,64 @@ namespace {
const MachineOperand *End, unsigned IVReg,
int64_t IVBump, Comparison::Kind Cmp) const;
- /// \brief Return true if the instruction is not valid within a hardware
+ /// Return true if the instruction is not valid within a hardware
/// loop.
bool isInvalidLoopOperation(const MachineInstr *MI,
bool IsInnerHWLoop) const;
- /// \brief Return true if the loop contains an instruction that inhibits
+ /// Return true if the loop contains an instruction that inhibits
/// using the hardware loop.
bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const;
- /// \brief Given a loop, check if we can convert it to a hardware loop.
+ /// Given a loop, check if we can convert it to a hardware loop.
/// If so, then perform the conversion and return true.
bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used);
- /// \brief Return true if the instruction is now dead.
+ /// Return true if the instruction is now dead.
bool isDead(const MachineInstr *MI,
SmallVectorImpl<MachineInstr *> &DeadPhis) const;
- /// \brief Remove the instruction if it is now dead.
+ /// Remove the instruction if it is now dead.
void removeIfDead(MachineInstr *MI);
- /// \brief Make sure that the "bump" instruction executes before the
+ /// Make sure that the "bump" instruction executes before the
/// compare. We need that for the IV fixup, so that the compare
/// instruction would not use a bumped value that has not yet been
/// defined. If the instructions are out of order, try to reorder them.
bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
- /// \brief Return true if MO and MI pair is visited only once. If visited
+ /// Return true if MO and MI pair is visited only once. If visited
/// more than once, this indicates there is recursion. In such a case,
/// return false.
bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI,
const MachineOperand *MO,
LoopFeederMap &LoopFeederPhi) const;
- /// \brief Return true if the Phi may generate a value that may underflow,
+ /// Return true if the Phi may generate a value that may underflow,
/// or may wrap.
bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal,
MachineBasicBlock *MBB, MachineLoop *L,
LoopFeederMap &LoopFeederPhi) const;
- /// \brief Return true if the induction variable may underflow an unsigned
+ /// Return true if the induction variable may underflow an unsigned
/// value in the first iteration.
bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal,
const MachineOperand *EndVal,
MachineBasicBlock *MBB, MachineLoop *L,
LoopFeederMap &LoopFeederPhi) const;
- /// \brief Check if the given operand has a compile-time known constant
+ /// Check if the given operand has a compile-time known constant
/// value. Return true if yes, and false otherwise. When returning true, set
/// Val to the corresponding constant value.
bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const;
- /// \brief Check if the operand has a compile-time known constant value.
+ /// Check if the operand has a compile-time known constant value.
bool isImmediate(const MachineOperand &MO) const {
int64_t V;
return checkForImmediate(MO, V);
}
- /// \brief Return the immediate for the specified operand.
+ /// Return the immediate for the specified operand.
int64_t getImmediate(const MachineOperand &MO) const {
int64_t V;
if (!checkForImmediate(MO, V))
@@ -271,12 +271,12 @@ namespace {
return V;
}
- /// \brief Reset the given machine operand to now refer to a new immediate
+ /// Reset the given machine operand to now refer to a new immediate
/// value. Assumes that the operand was already referencing an immediate
/// value, either directly, or via a register.
void setImmediate(MachineOperand &MO, int64_t Val);
- /// \brief Fix the data flow of the induction variable.
+ /// Fix the data flow of the induction variable.
/// The desired flow is: phi ---> bump -+-> comparison-in-latch.
/// |
/// +-> back to phi
@@ -297,7 +297,7 @@ namespace {
/// cannot be adjusted to reflect the post-bump value.
bool fixupInductionVariable(MachineLoop *L);
- /// \brief Given a loop, if it does not have a preheader, create one.
+ /// Given a loop, if it does not have a preheader, create one.
/// Return the block that is the preheader.
MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
};
@@ -307,7 +307,7 @@ namespace {
int HexagonHardwareLoops::Counter = 0;
#endif
- /// \brief Abstraction for a trip count of a loop. A smaller version
+ /// Abstraction for a trip count of a loop. A smaller version
/// of the MachineOperand class without the concerns of changing the
/// operand representation.
class CountValue {
@@ -376,7 +376,7 @@ FunctionPass *llvm::createHexagonHardwareLoops() {
}
bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
- DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
+ LLVM_DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
if (skipFunction(MF.getFunction()))
return false;
@@ -556,7 +556,7 @@ HexagonHardwareLoops::getComparisonKind(unsigned CondOpc,
return Cmp;
}
-/// \brief Analyze the statements in a loop to determine if the loop has
+/// Analyze the statements in a loop to determine if the loop has
/// a computable trip count and, if so, return a value that represents
/// the trip count expression.
///
@@ -718,7 +718,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
}
-/// \brief Helper function that returns the expression that represents the
+/// Helper function that returns the expression that represents the
/// number of times a loop iterates. The function takes the operands that
/// represent the loop start value, loop end value, and induction value.
/// Based upon these operands, the function attempts to compute the trip count.
@@ -928,6 +928,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
// 'Add' instruction.
const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
if (EndValInstr->getOpcode() == Hexagon::A2_addi &&
+ EndValInstr->getOperand(1).getSubReg() == 0 &&
EndValInstr->getOperand(2).getImm() == StartV) {
DistR = EndValInstr->getOperand(1).getReg();
} else {
@@ -984,7 +985,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
return new CountValue(CountValue::CV_Register, CountR, CountSR);
}
-/// \brief Return true if the operation is invalid within hardware loop.
+/// Return true if the operation is invalid within hardware loop.
bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
bool IsInnerHWLoop) const {
// Call is not allowed because the callee may use a hardware loop except for
@@ -1006,19 +1007,20 @@ bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
return false;
}
-/// \brief Return true if the loop contains an instruction that inhibits
+/// Return true if the loop contains an instruction that inhibits
/// the use of the hardware loop instruction.
bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
bool IsInnerHWLoop) const {
const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
- DEBUG(dbgs() << "\nhw_loop head, " << printMBBReference(*Blocks[0]));
+ LLVM_DEBUG(dbgs() << "\nhw_loop head, " << printMBBReference(*Blocks[0]));
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
MachineBasicBlock *MBB = Blocks[i];
for (MachineBasicBlock::iterator
MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
const MachineInstr *MI = &*MII;
if (isInvalidLoopOperation(MI, IsInnerHWLoop)) {
- DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump(););
+ LLVM_DEBUG(dbgs() << "\nCannot convert to hw_loop due to:";
+ MI->dump(););
return true;
}
}
@@ -1026,7 +1028,7 @@ bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
return false;
}
-/// \brief Returns true if the instruction is dead. This was essentially
+/// Returns true if the instruction is dead. This was essentially
/// copied from DeadMachineInstructionElim::isDead, but with special cases
/// for inline asm, physical registers and instructions with side effects
/// removed.
@@ -1083,7 +1085,7 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
SmallVector<MachineInstr*, 1> DeadPhis;
if (isDead(MI, DeadPhis)) {
- DEBUG(dbgs() << "HW looping will remove: " << *MI);
+ LLVM_DEBUG(dbgs() << "HW looping will remove: " << *MI);
// It is possible that some DBG_VALUE instructions refer to this
// instruction. Examine each def operand for such references;
@@ -1112,7 +1114,7 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
}
}
-/// \brief Check if the loop is a candidate for converting to a hardware
+/// Check if the loop is a candidate for converting to a hardware
/// loop. If so, then perform the transformation.
///
/// This function works on innermost loops first. A loop can be converted
@@ -1237,7 +1239,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
LoopStart = TopBlock;
// Convert the loop to a hardware loop.
- DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
+ LLVM_DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
DebugLoc DL;
if (InsertPos != Preheader->end())
DL = InsertPos->getDebugLoc();
@@ -1367,7 +1369,7 @@ bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A,
LoopFeederMap &LoopFeederPhi) const {
if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) {
const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
- DEBUG(dbgs() << "\nhw_loop head, " << printMBBReference(*Blocks[0]));
+ LLVM_DEBUG(dbgs() << "\nhw_loop head, " << printMBBReference(*Blocks[0]));
// Ignore all BBs that form Loop.
for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
MachineBasicBlock *MBB = Blocks[i];
@@ -1768,16 +1770,16 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) {
MachineOperand &MO = PredDef->getOperand(i);
if (MO.isReg() && MO.getReg() == RB.first) {
- DEBUG(dbgs() << "\n DefMI(" << i << ") = "
- << *(MRI->getVRegDef(I->first)));
+ LLVM_DEBUG(dbgs() << "\n DefMI(" << i
+ << ") = " << *(MRI->getVRegDef(I->first)));
if (IndI)
return false;
IndI = MRI->getVRegDef(I->first);
IndMO = &MO;
} else if (MO.isReg()) {
- DEBUG(dbgs() << "\n DefMI(" << i << ") = "
- << *(MRI->getVRegDef(MO.getReg())));
+ LLVM_DEBUG(dbgs() << "\n DefMI(" << i
+ << ") = " << *(MRI->getVRegDef(MO.getReg())));
if (nonIndI)
return false;
diff --git a/lib/Target/Hexagon/HexagonHazardRecognizer.cpp b/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
index 036b18678709..44f1f554c662 100644
--- a/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
+++ b/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
@@ -26,11 +26,13 @@ using namespace llvm;
#define DEBUG_TYPE "post-RA-sched"
void HexagonHazardRecognizer::Reset() {
- DEBUG(dbgs() << "Reset hazard recognizer\n");
+ LLVM_DEBUG(dbgs() << "Reset hazard recognizer\n");
Resources->clearResources();
PacketNum = 0;
UsesDotCur = nullptr;
DotCurPNum = -1;
+ UsesLoad = false;
+ PrefVectorStoreNew = nullptr;
RegDefs.clear();
}
@@ -41,7 +43,7 @@ HexagonHazardRecognizer::getHazardType(SUnit *SU, int stalls) {
return NoHazard;
if (!Resources->canReserveResources(*MI)) {
- DEBUG(dbgs() << "*** Hazard in cycle " << PacketNum << ", " << *MI);
+ LLVM_DEBUG(dbgs() << "*** Hazard in cycle " << PacketNum << ", " << *MI);
HazardType RetVal = Hazard;
if (TII->mayBeNewStore(*MI)) {
// Make sure the register to be stored is defined by an instruction in the
@@ -57,14 +59,16 @@ HexagonHazardRecognizer::getHazardType(SUnit *SU, int stalls) {
MI->getDebugLoc());
if (Resources->canReserveResources(*NewMI))
RetVal = NoHazard;
- DEBUG(dbgs() << "*** Try .new version? " << (RetVal == NoHazard) << "\n");
+ LLVM_DEBUG(dbgs() << "*** Try .new version? " << (RetVal == NoHazard)
+ << "\n");
MF->DeleteMachineInstr(NewMI);
}
return RetVal;
}
if (SU == UsesDotCur && DotCurPNum != (int)PacketNum) {
- DEBUG(dbgs() << "*** .cur Hazard in cycle " << PacketNum << ", " << *MI);
+ LLVM_DEBUG(dbgs() << "*** .cur Hazard in cycle " << PacketNum << ", "
+ << *MI);
return Hazard;
}
@@ -72,21 +76,33 @@ HexagonHazardRecognizer::getHazardType(SUnit *SU, int stalls) {
}
void HexagonHazardRecognizer::AdvanceCycle() {
- DEBUG(dbgs() << "Advance cycle, clear state\n");
+ LLVM_DEBUG(dbgs() << "Advance cycle, clear state\n");
Resources->clearResources();
if (DotCurPNum != -1 && DotCurPNum != (int)PacketNum) {
UsesDotCur = nullptr;
DotCurPNum = -1;
}
+ UsesLoad = false;
+ PrefVectorStoreNew = nullptr;
PacketNum++;
RegDefs.clear();
}
-/// If a packet contains a dot cur instruction, then we may prefer the
-/// instruction that can use the dot cur result. Or, if the use
-/// isn't scheduled in the same packet, then prefer other instructions
-/// in the subsequent packet.
+/// Handle the cases when we prefer one instruction over another. Case 1 - we
+/// prefer not to generate multiple loads in the packet to avoid a potential
+/// bank conflict. Case 2 - if a packet contains a dot cur instruction, then we
+/// prefer the instruction that can use the dot cur result. However, if the use
+/// is not scheduled in the same packet, then prefer other instructions in the
+/// subsequent packet. Case 3 - we prefer a vector store that can be converted
+/// to a .new store. The packetizer will not generate the .new store if the
+/// store doesn't have resources to fit in the packet (but the .new store may
+/// have resources). We attempt to schedule the store as soon as possible to
+/// help packetize the two instructions together.
bool HexagonHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
+ if (PrefVectorStoreNew != nullptr && PrefVectorStoreNew != SU)
+ return true;
+ if (UsesLoad && SU->isInstr() && SU->getInstr()->mayLoad())
+ return true;
return UsesDotCur && ((SU == UsesDotCur) ^ (DotCurPNum == (int)PacketNum));
}
@@ -118,17 +134,16 @@ void HexagonHazardRecognizer::EmitInstruction(SUnit *SU) {
}
else
Resources->reserveResources(*MI);
- DEBUG(dbgs() << " Add instruction " << *MI);
+ LLVM_DEBUG(dbgs() << " Add instruction " << *MI);
// When scheduling a dot cur instruction, check if there is an instruction
// that can use the dot cur in the same packet. If so, we'll attempt to
- // schedule it before other instructions. We only do this if the use has
- // the same height as the dot cur. Otherwise, we may miss scheduling an
- // instruction with a greater height, which is more important.
+ // schedule it before other instructions. We only do this if the load has a
+ // single zero-latency use.
if (TII->mayBeCurLoad(*MI))
for (auto &S : SU->Succs)
if (S.isAssignedRegDep() && S.getLatency() == 0 &&
- SU->getHeight() == S.getSUnit()->getHeight()) {
+ S.getSUnit()->NumPredsLeft == 1) {
UsesDotCur = S.getSUnit();
DotCurPNum = PacketNum;
break;
@@ -137,4 +152,15 @@ void HexagonHazardRecognizer::EmitInstruction(SUnit *SU) {
UsesDotCur = nullptr;
DotCurPNum = -1;
}
+
+ UsesLoad = MI->mayLoad();
+
+ if (TII->isHVXVec(*MI) && !MI->mayLoad() && !MI->mayStore())
+ for (auto &S : SU->Succs)
+ if (S.isAssignedRegDep() && S.getLatency() == 0 &&
+ TII->mayBeNewStore(*S.getSUnit()->getInstr()) &&
+ Resources->canReserveResources(*S.getSUnit()->getInstr())) {
+ PrefVectorStoreNew = S.getSUnit();
+ break;
+ }
}
diff --git a/lib/Target/Hexagon/HexagonHazardRecognizer.h b/lib/Target/Hexagon/HexagonHazardRecognizer.h
index 70efcb7a9f76..2874d73ce819 100644
--- a/lib/Target/Hexagon/HexagonHazardRecognizer.h
+++ b/lib/Target/Hexagon/HexagonHazardRecognizer.h
@@ -23,13 +23,21 @@ namespace llvm {
class HexagonHazardRecognizer : public ScheduleHazardRecognizer {
DFAPacketizer *Resources;
const HexagonInstrInfo *TII;
- unsigned PacketNum;
+ unsigned PacketNum = 0;
// If the packet contains a potential dot cur instruction. This is
// used for the scheduling priority function.
- SUnit *UsesDotCur;
+ SUnit *UsesDotCur = nullptr;
// The packet number when a dor cur is emitted. If its use is not generated
// in the same packet, then try to wait another cycle before emitting.
- int DotCurPNum;
+ int DotCurPNum = -1;
+ // Does the packet contain a load. Used to restrict another load, if possible.
+ bool UsesLoad = false;
+ // Check if we should prefer a vector store that will become a .new version.
+ // The .new store uses different resources than a normal store, and the
+ // packetizer will not generate the .new if the regular store does not have
+ // resources available (even if the .new version does). To help, the schedule
+ // attempts to schedule the .new as soon as possible in the packet.
+ SUnit *PrefVectorStoreNew = nullptr;
// The set of registers defined by instructions in the current packet.
SmallSet<unsigned, 8> RegDefs;
@@ -37,8 +45,7 @@ public:
HexagonHazardRecognizer(const InstrItineraryData *II,
const HexagonInstrInfo *HII,
const HexagonSubtarget &ST)
- : Resources(ST.createDFAPacketizer(II)), TII(HII), PacketNum(0),
- UsesDotCur(nullptr), DotCurPNum(-1) { }
+ : Resources(ST.createDFAPacketizer(II)), TII(HII) { }
~HexagonHazardRecognizer() override {
if (Resources)
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index a6ac4e3df745..efb4c2eb0fc3 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -64,51 +64,6 @@ FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
}
}
-// Intrinsics that return a a predicate.
-static bool doesIntrinsicReturnPredicate(unsigned ID) {
- switch (ID) {
- default:
- return false;
- case Intrinsic::hexagon_C2_cmpeq:
- case Intrinsic::hexagon_C2_cmpgt:
- case Intrinsic::hexagon_C2_cmpgtu:
- case Intrinsic::hexagon_C2_cmpgtup:
- case Intrinsic::hexagon_C2_cmpgtp:
- case Intrinsic::hexagon_C2_cmpeqp:
- case Intrinsic::hexagon_C2_bitsset:
- case Intrinsic::hexagon_C2_bitsclr:
- case Intrinsic::hexagon_C2_cmpeqi:
- case Intrinsic::hexagon_C2_cmpgti:
- case Intrinsic::hexagon_C2_cmpgtui:
- case Intrinsic::hexagon_C2_cmpgei:
- case Intrinsic::hexagon_C2_cmpgeui:
- case Intrinsic::hexagon_C2_cmplt:
- case Intrinsic::hexagon_C2_cmpltu:
- case Intrinsic::hexagon_C2_bitsclri:
- case Intrinsic::hexagon_C2_and:
- case Intrinsic::hexagon_C2_or:
- case Intrinsic::hexagon_C2_xor:
- case Intrinsic::hexagon_C2_andn:
- case Intrinsic::hexagon_C2_not:
- case Intrinsic::hexagon_C2_orn:
- case Intrinsic::hexagon_C2_pxfer_map:
- case Intrinsic::hexagon_C2_any8:
- case Intrinsic::hexagon_C2_all8:
- case Intrinsic::hexagon_A2_vcmpbeq:
- case Intrinsic::hexagon_A2_vcmpbgtu:
- case Intrinsic::hexagon_A2_vcmpheq:
- case Intrinsic::hexagon_A2_vcmphgt:
- case Intrinsic::hexagon_A2_vcmphgtu:
- case Intrinsic::hexagon_A2_vcmpweq:
- case Intrinsic::hexagon_A2_vcmpwgt:
- case Intrinsic::hexagon_A2_vcmpwgtu:
- case Intrinsic::hexagon_C2_tfrrp:
- case Intrinsic::hexagon_S2_tstbit_i:
- case Intrinsic::hexagon_S2_tstbit_r:
- return true;
- }
-}
-
void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
@@ -138,12 +93,18 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
Opcode = IsValidInc ? Hexagon::L2_loadrh_pi : Hexagon::L2_loadrh_io;
break;
case MVT::i32:
+ case MVT::f32:
+ case MVT::v2i16:
+ case MVT::v4i8:
Opcode = IsValidInc ? Hexagon::L2_loadri_pi : Hexagon::L2_loadri_io;
break;
case MVT::i64:
+ case MVT::f64:
+ case MVT::v2i32:
+ case MVT::v4i16:
+ case MVT::v8i8:
Opcode = IsValidInc ? Hexagon::L2_loadrd_pi : Hexagon::L2_loadrd_io;
break;
- // 64B
case MVT::v64i8:
case MVT::v32i16:
case MVT::v16i32:
@@ -223,7 +184,6 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
CurDAG->RemoveDeadNode(LD);
}
-
MachineSDNode *HexagonDAGToDAGISel::LoadInstrForLoadIntrinsic(SDNode *IntN) {
if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
return nullptr;
@@ -241,35 +201,14 @@ MachineSDNode *HexagonDAGToDAGISel::LoadInstrForLoadIntrinsic(SDNode *IntN) {
};
auto FLC = LoadPciMap.find(IntNo);
if (FLC != LoadPciMap.end()) {
- SDNode *Mod = CurDAG->getMachineNode(Hexagon::A2_tfrrcr, dl, MVT::i32,
- IntN->getOperand(4));
EVT ValTy = (IntNo == Intrinsic::hexagon_circ_ldd) ? MVT::i64 : MVT::i32;
EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
// Operands: { Base, Increment, Modifier, Chain }
auto Inc = cast<ConstantSDNode>(IntN->getOperand(5));
SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), dl, MVT::i32);
MachineSDNode *Res = CurDAG->getMachineNode(FLC->second, dl, RTys,
- { IntN->getOperand(2), I, SDValue(Mod,0), IntN->getOperand(0) });
- return Res;
- }
-
- static std::map<unsigned,unsigned> LoadPbrMap = {
- { Intrinsic::hexagon_brev_ldb, Hexagon::L2_loadrb_pbr },
- { Intrinsic::hexagon_brev_ldub, Hexagon::L2_loadrub_pbr },
- { Intrinsic::hexagon_brev_ldh, Hexagon::L2_loadrh_pbr },
- { Intrinsic::hexagon_brev_lduh, Hexagon::L2_loadruh_pbr },
- { Intrinsic::hexagon_brev_ldw, Hexagon::L2_loadri_pbr },
- { Intrinsic::hexagon_brev_ldd, Hexagon::L2_loadrd_pbr },
- };
- auto FLB = LoadPbrMap.find(IntNo);
- if (FLB != LoadPbrMap.end()) {
- SDNode *Mod = CurDAG->getMachineNode(Hexagon::A2_tfrrcr, dl, MVT::i32,
- IntN->getOperand(4));
- EVT ValTy = (IntNo == Intrinsic::hexagon_brev_ldd) ? MVT::i64 : MVT::i32;
- EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
- // Operands: { Base, Modifier, Chain }
- MachineSDNode *Res = CurDAG->getMachineNode(FLB->second, dl, RTys,
- { IntN->getOperand(2), SDValue(Mod,0), IntN->getOperand(0) });
+ { IntN->getOperand(2), I, IntN->getOperand(4),
+ IntN->getOperand(0) });
return Res;
}
@@ -343,14 +282,10 @@ bool HexagonDAGToDAGISel::tryLoadOfLoadIntrinsic(LoadSDNode *N) {
// a sign-extending intrinsic into (or the other way around).
ISD::LoadExtType IntExt;
switch (cast<ConstantSDNode>(C->getOperand(1))->getZExtValue()) {
- case Intrinsic::hexagon_brev_ldub:
- case Intrinsic::hexagon_brev_lduh:
case Intrinsic::hexagon_circ_ldub:
case Intrinsic::hexagon_circ_lduh:
IntExt = ISD::ZEXTLOAD;
break;
- case Intrinsic::hexagon_brev_ldw:
- case Intrinsic::hexagon_brev_ldd:
case Intrinsic::hexagon_circ_ldw:
case Intrinsic::hexagon_circ_ldd:
IntExt = ISD::NON_EXTLOAD;
@@ -378,6 +313,134 @@ bool HexagonDAGToDAGISel::tryLoadOfLoadIntrinsic(LoadSDNode *N) {
CurDAG->RemoveDeadNode(C);
return true;
}
+ return false;
+}
+
+// Convert the bit-reverse load intrinsic to appropriate target instruction.
+bool HexagonDAGToDAGISel::SelectBrevLdIntrinsic(SDNode *IntN) {
+ if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+ return false;
+
+ const SDLoc &dl(IntN);
+ unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
+
+ static const std::map<unsigned, unsigned> LoadBrevMap = {
+ { Intrinsic::hexagon_L2_loadrb_pbr, Hexagon::L2_loadrb_pbr },
+ { Intrinsic::hexagon_L2_loadrub_pbr, Hexagon::L2_loadrub_pbr },
+ { Intrinsic::hexagon_L2_loadrh_pbr, Hexagon::L2_loadrh_pbr },
+ { Intrinsic::hexagon_L2_loadruh_pbr, Hexagon::L2_loadruh_pbr },
+ { Intrinsic::hexagon_L2_loadri_pbr, Hexagon::L2_loadri_pbr },
+ { Intrinsic::hexagon_L2_loadrd_pbr, Hexagon::L2_loadrd_pbr }
+ };
+ auto FLI = LoadBrevMap.find(IntNo);
+ if (FLI != LoadBrevMap.end()) {
+ EVT ValTy =
+ (IntNo == Intrinsic::hexagon_L2_loadrd_pbr) ? MVT::i64 : MVT::i32;
+ EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
+ // Operands of Intrinsic: {chain, enum ID of intrinsic, baseptr,
+ // modifier}.
+ // Operands of target instruction: { Base, Modifier, Chain }.
+ MachineSDNode *Res = CurDAG->getMachineNode(
+ FLI->second, dl, RTys,
+ {IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(0)});
+
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(IntN)->getMemOperand();
+ Res->setMemRefs(MemOp, MemOp + 1);
+
+ ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
+ ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
+ ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
+ CurDAG->RemoveDeadNode(IntN);
+ return true;
+ }
+ return false;
+}
+
+/// Generate a machine instruction node for the new circlar buffer intrinsics.
+/// The new versions use a CSx register instead of the K field.
+bool HexagonDAGToDAGISel::SelectNewCircIntrinsic(SDNode *IntN) {
+ if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+ return false;
+
+ SDLoc DL(IntN);
+ unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
+ SmallVector<SDValue, 7> Ops;
+
+ static std::map<unsigned,unsigned> LoadNPcMap = {
+ { Intrinsic::hexagon_L2_loadrub_pci, Hexagon::PS_loadrub_pci },
+ { Intrinsic::hexagon_L2_loadrb_pci, Hexagon::PS_loadrb_pci },
+ { Intrinsic::hexagon_L2_loadruh_pci, Hexagon::PS_loadruh_pci },
+ { Intrinsic::hexagon_L2_loadrh_pci, Hexagon::PS_loadrh_pci },
+ { Intrinsic::hexagon_L2_loadri_pci, Hexagon::PS_loadri_pci },
+ { Intrinsic::hexagon_L2_loadrd_pci, Hexagon::PS_loadrd_pci },
+ { Intrinsic::hexagon_L2_loadrub_pcr, Hexagon::PS_loadrub_pcr },
+ { Intrinsic::hexagon_L2_loadrb_pcr, Hexagon::PS_loadrb_pcr },
+ { Intrinsic::hexagon_L2_loadruh_pcr, Hexagon::PS_loadruh_pcr },
+ { Intrinsic::hexagon_L2_loadrh_pcr, Hexagon::PS_loadrh_pcr },
+ { Intrinsic::hexagon_L2_loadri_pcr, Hexagon::PS_loadri_pcr },
+ { Intrinsic::hexagon_L2_loadrd_pcr, Hexagon::PS_loadrd_pcr }
+ };
+ auto FLI = LoadNPcMap.find (IntNo);
+ if (FLI != LoadNPcMap.end()) {
+ EVT ValTy = MVT::i32;
+ if (IntNo == Intrinsic::hexagon_L2_loadrd_pci ||
+ IntNo == Intrinsic::hexagon_L2_loadrd_pcr)
+ ValTy = MVT::i64;
+ EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
+ // Handle load.*_pci case which has 6 operands.
+ if (IntN->getNumOperands() == 6) {
+ auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
+ SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
+ // Operands: { Base, Increment, Modifier, Start, Chain }.
+ Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
+ IntN->getOperand(0) };
+ } else
+ // Handle load.*_pcr case which has 5 operands.
+ // Operands: { Base, Modifier, Start, Chain }.
+ Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
+ IntN->getOperand(0) };
+ MachineSDNode *Res = CurDAG->getMachineNode(FLI->second, DL, RTys, Ops);
+ ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
+ ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
+ ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
+ CurDAG->RemoveDeadNode(IntN);
+ return true;
+ }
+
+ static std::map<unsigned,unsigned> StoreNPcMap = {
+ { Intrinsic::hexagon_S2_storerb_pci, Hexagon::PS_storerb_pci },
+ { Intrinsic::hexagon_S2_storerh_pci, Hexagon::PS_storerh_pci },
+ { Intrinsic::hexagon_S2_storerf_pci, Hexagon::PS_storerf_pci },
+ { Intrinsic::hexagon_S2_storeri_pci, Hexagon::PS_storeri_pci },
+ { Intrinsic::hexagon_S2_storerd_pci, Hexagon::PS_storerd_pci },
+ { Intrinsic::hexagon_S2_storerb_pcr, Hexagon::PS_storerb_pcr },
+ { Intrinsic::hexagon_S2_storerh_pcr, Hexagon::PS_storerh_pcr },
+ { Intrinsic::hexagon_S2_storerf_pcr, Hexagon::PS_storerf_pcr },
+ { Intrinsic::hexagon_S2_storeri_pcr, Hexagon::PS_storeri_pcr },
+ { Intrinsic::hexagon_S2_storerd_pcr, Hexagon::PS_storerd_pcr }
+ };
+ auto FSI = StoreNPcMap.find (IntNo);
+ if (FSI != StoreNPcMap.end()) {
+ EVT RTys[] = { MVT::i32, MVT::Other };
+ // Handle store.*_pci case which has 7 operands.
+ if (IntN->getNumOperands() == 7) {
+ auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
+ SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
+ // Operands: { Base, Increment, Modifier, Value, Start, Chain }.
+ Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
+ IntN->getOperand(6), IntN->getOperand(0) };
+ } else
+ // Handle store.*_pcr case which has 6 operands.
+ // Operands: { Base, Modifier, Value, Start, Chain }.
+ Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
+ IntN->getOperand(5), IntN->getOperand(0) };
+ MachineSDNode *Res = CurDAG->getMachineNode(FSI->second, DL, RTys, Ops);
+ ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
+ ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
+ CurDAG->RemoveDeadNode(IntN);
+ return true;
+ }
return false;
}
@@ -385,9 +448,9 @@ bool HexagonDAGToDAGISel::tryLoadOfLoadIntrinsic(LoadSDNode *N) {
void HexagonDAGToDAGISel::SelectLoad(SDNode *N) {
SDLoc dl(N);
LoadSDNode *LD = cast<LoadSDNode>(N);
- ISD::MemIndexedMode AM = LD->getAddressingMode();
// Handle indexed loads.
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
if (AM != ISD::UNINDEXED) {
SelectIndexedLoad(LD, dl);
return;
@@ -422,9 +485,16 @@ void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) {
Opcode = IsValidInc ? Hexagon::S2_storerh_pi : Hexagon::S2_storerh_io;
break;
case MVT::i32:
+ case MVT::f32:
+ case MVT::v2i16:
+ case MVT::v4i8:
Opcode = IsValidInc ? Hexagon::S2_storeri_pi : Hexagon::S2_storeri_io;
break;
case MVT::i64:
+ case MVT::f64:
+ case MVT::v2i32:
+ case MVT::v4i16:
+ case MVT::v8i8:
Opcode = IsValidInc ? Hexagon::S2_storerd_pi : Hexagon::S2_storerd_io;
break;
case MVT::v64i8:
@@ -488,9 +558,9 @@ void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) {
void HexagonDAGToDAGISel::SelectStore(SDNode *N) {
SDLoc dl(N);
StoreSDNode *ST = cast<StoreSDNode>(N);
- ISD::MemIndexedMode AM = ST->getAddressingMode();
// Handle indexed stores.
+ ISD::MemIndexedMode AM = ST->getAddressingMode();
if (AM != ISD::UNINDEXED) {
SelectIndexedStore(ST, dl);
return;
@@ -553,85 +623,6 @@ void HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
return Default();
}
-
-//
-// If there is an zero_extend followed an intrinsic in DAG (this means - the
-// result of the intrinsic is predicate); convert the zero_extend to
-// transfer instruction.
-//
-// Zero extend -> transfer is lowered here. Otherwise, zero_extend will be
-// converted into a MUX as predicate registers defined as 1 bit in the
-// compiler. Architecture defines them as 8-bit registers.
-// We want to preserve all the lower 8-bits and, not just 1 LSB bit.
-//
-void HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
- SDLoc dl(N);
-
- SDValue Op0 = N->getOperand(0);
- EVT OpVT = Op0.getValueType();
- unsigned OpBW = OpVT.getSizeInBits();
-
- // Special handling for zero-extending a vector of booleans.
- if (OpVT.isVector() && OpVT.getVectorElementType() == MVT::i1 && OpBW <= 64) {
- SDNode *Mask = CurDAG->getMachineNode(Hexagon::C2_mask, dl, MVT::i64, Op0);
- unsigned NE = OpVT.getVectorNumElements();
- EVT ExVT = N->getValueType(0);
- unsigned ES = ExVT.getScalarSizeInBits();
- uint64_t MV = 0, Bit = 1;
- for (unsigned i = 0; i < NE; ++i) {
- MV |= Bit;
- Bit <<= ES;
- }
- SDValue Ones = CurDAG->getTargetConstant(MV, dl, MVT::i64);
- SDNode *OnesReg = CurDAG->getMachineNode(Hexagon::CONST64, dl,
- MVT::i64, Ones);
- if (ExVT.getSizeInBits() == 32) {
- SDNode *And = CurDAG->getMachineNode(Hexagon::A2_andp, dl, MVT::i64,
- SDValue(Mask,0), SDValue(OnesReg,0));
- SDValue SubR = CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32);
- ReplaceNode(N, CurDAG->getMachineNode(Hexagon::EXTRACT_SUBREG, dl, ExVT,
- SDValue(And, 0), SubR));
- return;
- }
- ReplaceNode(N,
- CurDAG->getMachineNode(Hexagon::A2_andp, dl, ExVT,
- SDValue(Mask, 0), SDValue(OnesReg, 0)));
- return;
- }
-
- SDNode *Int = N->getOperand(0).getNode();
- if ((Int->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) {
- unsigned ID = cast<ConstantSDNode>(Int->getOperand(0))->getZExtValue();
- if (doesIntrinsicReturnPredicate(ID)) {
- // Now we need to differentiate target data types.
- if (N->getValueType(0) == MVT::i64) {
- // Convert the zero_extend to Rs = Pd followed by A2_combinew(0,Rs).
- SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
- SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl,
- MVT::i32, SDValue(Int, 0));
- SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl,
- MVT::i32, TargetConst0);
- SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
- MVT::i64, MVT::Other,
- SDValue(Result_2, 0),
- SDValue(Result_1, 0));
- ReplaceNode(N, Result_3);
- return;
- }
- if (N->getValueType(0) == MVT::i32) {
- // Convert the zero_extend to Rs = Pd
- SDNode* RsPd = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl,
- MVT::i32, SDValue(Int, 0));
- ReplaceNode(N, RsPd);
- return;
- }
- llvm_unreachable("Unexpected value type");
- }
- }
- SelectCode(N);
-}
-
-
//
// Handling intrinsics for circular load and bitreverse load.
//
@@ -642,6 +633,13 @@ void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) {
return;
}
+ // Handle bit-reverse load intrinsics.
+ if (SelectBrevLdIntrinsic(N))
+ return;
+
+ if (SelectNewCircIntrinsic(N))
+ return;
+
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
if (IntNo == Intrinsic::hexagon_V6_vgathermw ||
IntNo == Intrinsic::hexagon_V6_vgathermw_128B ||
@@ -735,7 +733,6 @@ void HexagonDAGToDAGISel::SelectConstant(SDNode *N) {
SelectCode(N);
}
-
void HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) {
MachineFrameInfo &MFI = MF->getFrameInfo();
const HexagonFrameLowering *HFI = HST->getFrameLowering();
@@ -765,20 +762,113 @@ void HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) {
ReplaceNode(N, R);
}
+void HexagonDAGToDAGISel::SelectAddSubCarry(SDNode *N) {
+ unsigned OpcCarry = N->getOpcode() == HexagonISD::ADDC ? Hexagon::A4_addp_c
+ : Hexagon::A4_subp_c;
+ SDNode *C = CurDAG->getMachineNode(OpcCarry, SDLoc(N), N->getVTList(),
+ { N->getOperand(0), N->getOperand(1),
+ N->getOperand(2) });
+ ReplaceNode(N, C);
+}
-void HexagonDAGToDAGISel::SelectBitcast(SDNode *N) {
- EVT SVT = N->getOperand(0).getValueType();
- EVT DVT = N->getValueType(0);
- if (!SVT.isVector() || !DVT.isVector() ||
- SVT.getVectorElementType() == MVT::i1 ||
- DVT.getVectorElementType() == MVT::i1 ||
- SVT.getSizeInBits() != DVT.getSizeInBits()) {
- SelectCode(N);
- return;
+void HexagonDAGToDAGISel::SelectVAlign(SDNode *N) {
+ MVT ResTy = N->getValueType(0).getSimpleVT();
+ if (HST->isHVXVectorType(ResTy, true))
+ return SelectHvxVAlign(N);
+
+ const SDLoc &dl(N);
+ unsigned VecLen = ResTy.getSizeInBits();
+ if (VecLen == 32) {
+ SDValue Ops[] = {
+ CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
+ N->getOperand(0),
+ CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
+ N->getOperand(1),
+ CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
+ };
+ SDNode *R = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
+ MVT::i64, Ops);
+
+ // Shift right by "(Addr & 0x3) * 8" bytes.
+ SDValue M0 = CurDAG->getTargetConstant(0x18, dl, MVT::i32);
+ SDValue M1 = CurDAG->getTargetConstant(0x03, dl, MVT::i32);
+ SDNode *C = CurDAG->getMachineNode(Hexagon::S4_andi_asl_ri, dl, MVT::i32,
+ M0, N->getOperand(2), M1);
+ SDNode *S = CurDAG->getMachineNode(Hexagon::S2_lsr_r_p, dl, MVT::i64,
+ SDValue(R, 0), SDValue(C, 0));
+ SDValue E = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo, dl, ResTy,
+ SDValue(S, 0));
+ ReplaceNode(N, E.getNode());
+ } else {
+ assert(VecLen == 64);
+ SDNode *Pu = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::v8i1,
+ N->getOperand(2));
+ SDNode *VA = CurDAG->getMachineNode(Hexagon::S2_valignrb, dl, ResTy,
+ N->getOperand(0), N->getOperand(1),
+ SDValue(Pu,0));
+ ReplaceNode(N, VA);
}
+}
+
+void HexagonDAGToDAGISel::SelectVAlignAddr(SDNode *N) {
+ const SDLoc &dl(N);
+ SDValue A = N->getOperand(1);
+ int Mask = -cast<ConstantSDNode>(A.getNode())->getSExtValue();
+ assert(isPowerOf2_32(-Mask));
+
+ SDValue M = CurDAG->getTargetConstant(Mask, dl, MVT::i32);
+ SDNode *AA = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32,
+ N->getOperand(0), M);
+ ReplaceNode(N, AA);
+}
+
+// Handle these nodes here to avoid having to write patterns for all
+// combinations of input/output types. In all cases, the resulting
+// instruction is the same.
+void HexagonDAGToDAGISel::SelectTypecast(SDNode *N) {
+ SDValue Op = N->getOperand(0);
+ MVT OpTy = Op.getValueType().getSimpleVT();
+ SDNode *T = CurDAG->MorphNodeTo(N, N->getOpcode(),
+ CurDAG->getVTList(OpTy), {Op});
+ ReplaceNode(T, Op.getNode());
+}
+
+void HexagonDAGToDAGISel::SelectP2D(SDNode *N) {
+ MVT ResTy = N->getValueType(0).getSimpleVT();
+ SDNode *T = CurDAG->getMachineNode(Hexagon::C2_mask, SDLoc(N), ResTy,
+ N->getOperand(0));
+ ReplaceNode(N, T);
+}
+
+void HexagonDAGToDAGISel::SelectD2P(SDNode *N) {
+ const SDLoc &dl(N);
+ MVT ResTy = N->getValueType(0).getSimpleVT();
+ SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
+ SDNode *T = CurDAG->getMachineNode(Hexagon::A4_vcmpbgtui, dl, ResTy,
+ N->getOperand(0), Zero);
+ ReplaceNode(N, T);
+}
+
+void HexagonDAGToDAGISel::SelectV2Q(SDNode *N) {
+ const SDLoc &dl(N);
+ MVT ResTy = N->getValueType(0).getSimpleVT();
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(N,0), N->getOperand(0));
- CurDAG->RemoveDeadNode(N);
+ SDValue C = CurDAG->getTargetConstant(-1, dl, MVT::i32);
+ SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
+ SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandvrt, dl, ResTy,
+ N->getOperand(0), SDValue(R,0));
+ ReplaceNode(N, T);
+}
+
+void HexagonDAGToDAGISel::SelectQ2V(SDNode *N) {
+ const SDLoc &dl(N);
+ MVT ResTy = N->getValueType(0).getSimpleVT();
+
+ SDValue C = CurDAG->getTargetConstant(-1, dl, MVT::i32);
+ SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
+ SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandqrt, dl, ResTy,
+ N->getOperand(0), SDValue(R,0));
+ ReplaceNode(N, T);
}
void HexagonDAGToDAGISel::Select(SDNode *N) {
@@ -789,13 +879,21 @@ void HexagonDAGToDAGISel::Select(SDNode *N) {
case ISD::Constant: return SelectConstant(N);
case ISD::ConstantFP: return SelectConstantFP(N);
case ISD::FrameIndex: return SelectFrameIndex(N);
- case ISD::BITCAST: return SelectBitcast(N);
case ISD::SHL: return SelectSHL(N);
case ISD::LOAD: return SelectLoad(N);
case ISD::STORE: return SelectStore(N);
- case ISD::ZERO_EXTEND: return SelectZeroExtend(N);
case ISD::INTRINSIC_W_CHAIN: return SelectIntrinsicWChain(N);
case ISD::INTRINSIC_WO_CHAIN: return SelectIntrinsicWOChain(N);
+
+ case HexagonISD::ADDC:
+ case HexagonISD::SUBC: return SelectAddSubCarry(N);
+ case HexagonISD::VALIGN: return SelectVAlign(N);
+ case HexagonISD::VALIGNADDR: return SelectVAlignAddr(N);
+ case HexagonISD::TYPECAST: return SelectTypecast(N);
+ case HexagonISD::P2D: return SelectP2D(N);
+ case HexagonISD::D2P: return SelectD2P(N);
+ case HexagonISD::Q2V: return SelectQ2V(N);
+ case HexagonISD::V2Q: return SelectV2Q(N);
}
if (HST->useHVXOps()) {
@@ -1240,7 +1338,7 @@ bool HexagonDAGToDAGISel::SelectAnyImmediate(SDValue &N, SDValue &R,
}
case HexagonISD::JT:
case HexagonISD::CP:
- // These are assumed to always be aligned at at least 8-byte boundary.
+ // These are assumed to always be aligned at least 8-byte boundary.
if (LogAlign > 3)
return false;
R = N.getOperand(0);
@@ -1252,7 +1350,7 @@ bool HexagonDAGToDAGISel::SelectAnyImmediate(SDValue &N, SDValue &R,
R = N;
return true;
case ISD::BlockAddress:
- // Block address is always aligned at at least 4-byte boundary.
+ // Block address is always aligned at least 4-byte boundary.
if (LogAlign > 2 || !IsAligned(cast<BlockAddressSDNode>(N)->getOffset()))
return false;
R = N;
@@ -1345,9 +1443,13 @@ bool HexagonDAGToDAGISel::DetectUseSxtw(SDValue &N, SDValue &R) {
EVT T = Opc == ISD::SIGN_EXTEND
? N.getOperand(0).getValueType()
: cast<VTSDNode>(N.getOperand(1))->getVT();
- if (T.getSizeInBits() != 32)
+ unsigned SW = T.getSizeInBits();
+ if (SW == 32)
+ R = N.getOperand(0);
+ else if (SW < 32)
+ R = N;
+ else
return false;
- R = N.getOperand(0);
break;
}
case ISD::LOAD: {
@@ -1361,6 +1463,13 @@ bool HexagonDAGToDAGISel::DetectUseSxtw(SDValue &N, SDValue &R) {
R = N;
break;
}
+ case ISD::SRA: {
+ auto *S = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!S || S->getZExtValue() != 32)
+ return false;
+ R = N;
+ break;
+ }
default:
return false;
}
@@ -1500,7 +1609,7 @@ static bool isOpcodeHandled(const SDNode *N) {
}
}
-/// \brief Return the weight of an SDNode
+/// Return the weight of an SDNode
int HexagonDAGToDAGISel::getWeight(SDNode *N) {
if (!isOpcodeHandled(N))
return 1;
@@ -1799,15 +1908,15 @@ SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()),
getHeight(N->getOperand(1).getNode())) + 1;
- DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight
- << " Height=" << RootHeights[N] << "): ");
- DEBUG(N->dump());
+ LLVM_DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight
+ << " Height=" << RootHeights[N] << "): ");
+ LLVM_DEBUG(N->dump(CurDAG));
return SDValue(N, 0);
}
- DEBUG(dbgs() << "** Balancing root node: ");
- DEBUG(N->dump());
+ LLVM_DEBUG(dbgs() << "** Balancing root node: ");
+ LLVM_DEBUG(N->dump(CurDAG));
unsigned NOpcode = N->getOpcode();
@@ -1855,7 +1964,7 @@ SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
// Whoops, this node was RAUWd by one of the balanceSubTree calls we
// made. Our worklist isn't up to date anymore.
// Restart the whole process.
- DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
+ LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
return balanceSubTree(N, TopLevel);
}
@@ -1926,15 +2035,15 @@ SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
}
}
- DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)]
- << " weight=" << CurrentWeight << " imbalanced="
- << Imbalanced << "\n");
+ LLVM_DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)]
+ << " weight=" << CurrentWeight
+ << " imbalanced=" << Imbalanced << "\n");
// Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y)
// This factors out a shift in order to match memw(a<<Y+b).
if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) ||
willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) {
- DEBUG(dbgs() << "--> Found common factor for two MUL children!\n");
+ LLVM_DEBUG(dbgs() << "--> Found common factor for two MUL children!\n");
int Weight = Mul1.Weight + Mul2.Weight;
int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1;
SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2);
@@ -1968,9 +2077,9 @@ SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() &&
getTargetLowering()->isOffsetFoldingLegal(GANode)) {
- DEBUG(dbgs() << "--> Combining GA and offset (" << Offset->getSExtValue()
- << "): ");
- DEBUG(GANode->dump());
+ LLVM_DEBUG(dbgs() << "--> Combining GA and offset ("
+ << Offset->getSExtValue() << "): ");
+ LLVM_DEBUG(GANode->dump(CurDAG));
SDValue NewTGA =
CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value),
@@ -2014,7 +2123,7 @@ SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
// If this is the top level and we haven't factored out a shift, we should try
// to move a constant to the bottom to match addressing modes like memw(rX+C)
if (TopLevel && !CanFactorize && Leaves.hasConst()) {
- DEBUG(dbgs() << "--> Pushing constant to tip of tree.");
+ LLVM_DEBUG(dbgs() << "--> Pushing constant to tip of tree.");
Leaves.pushToBottom(Leaves.pop());
}
@@ -2041,7 +2150,7 @@ SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
// Make sure that none of these nodes have been RAUW'd
if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) ||
(RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) {
- DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
+ LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
return balanceSubTree(N, TopLevel);
}
@@ -2075,9 +2184,9 @@ SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
int Weight = V0Weight + V1Weight;
Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder));
- DEBUG(dbgs() << "--> Built new node (Weight=" << Weight << ",Height="
- << Height << "):\n");
- DEBUG(NewNode.dump());
+ LLVM_DEBUG(dbgs() << "--> Built new node (Weight=" << Weight
+ << ",Height=" << Height << "):\n");
+ LLVM_DEBUG(NewNode.dump());
}
assert(Leaves.size() == 1);
@@ -2101,15 +2210,15 @@ SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
}
if (N != NewRoot.getNode()) {
- DEBUG(dbgs() << "--> Root is now: ");
- DEBUG(NewRoot.dump());
+ LLVM_DEBUG(dbgs() << "--> Root is now: ");
+ LLVM_DEBUG(NewRoot.dump());
// Replace all uses of old root by new root
CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode());
// Mark that we have RAUW'd N
RootWeights[N] = -2;
} else {
- DEBUG(dbgs() << "--> Root unchanged.\n");
+ LLVM_DEBUG(dbgs() << "--> Root unchanged.\n");
}
RootWeights[NewRoot.getNode()] = Leaves.top().Weight;
@@ -2132,8 +2241,8 @@ void HexagonDAGToDAGISel::rebalanceAddressTrees() {
if (RootWeights.count(BasePtr.getNode()))
continue;
- DEBUG(dbgs() << "** Rebalancing address calculation in node: ");
- DEBUG(N->dump());
+ LLVM_DEBUG(dbgs() << "** Rebalancing address calculation in node: ");
+ LLVM_DEBUG(N->dump(CurDAG));
// FindRoots
SmallVector<SDNode *, 4> Worklist;
@@ -2173,8 +2282,8 @@ void HexagonDAGToDAGISel::rebalanceAddressTrees() {
N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
NewBasePtr, N->getOperand(3));
- DEBUG(dbgs() << "--> Final node: ");
- DEBUG(N->dump());
+ LLVM_DEBUG(dbgs() << "--> Final node: ");
+ LLVM_DEBUG(N->dump(CurDAG));
}
CurDAG->RemoveDeadNodes();
@@ -2182,4 +2291,3 @@ void HexagonDAGToDAGISel::rebalanceAddressTrees() {
RootHeights.clear();
RootWeights.clear();
}
-
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.h b/lib/Target/Hexagon/HexagonISelDAGToDAG.h
index fc66940ee52d..f4f09dd4e758 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.h
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.h
@@ -90,6 +90,8 @@ public:
unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
bool tryLoadOfLoadIntrinsic(LoadSDNode *N);
+ bool SelectBrevLdIntrinsic(SDNode *IntN);
+ bool SelectNewCircIntrinsic(SDNode *IntN);
void SelectLoad(SDNode *N);
void SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl);
void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl);
@@ -100,10 +102,17 @@ public:
void SelectIntrinsicWOChain(SDNode *N);
void SelectConstant(SDNode *N);
void SelectConstantFP(SDNode *N);
- void SelectBitcast(SDNode *N);
void SelectV65Gather(SDNode *N);
void SelectV65GatherPred(SDNode *N);
void SelectHVXDualOutput(SDNode *N);
+ void SelectAddSubCarry(SDNode *N);
+ void SelectVAlign(SDNode *N);
+ void SelectVAlignAddr(SDNode *N);
+ void SelectTypecast(SDNode *N);
+ void SelectP2D(SDNode *N);
+ void SelectD2P(SDNode *N);
+ void SelectQ2V(SDNode *N);
+ void SelectV2Q(SDNode *N);
// Include the declarations autogenerated from the selection patterns.
#define GET_DAGISEL_DECL
@@ -122,6 +131,7 @@ private:
void SelectHvxShuffle(SDNode *N);
void SelectHvxRor(SDNode *N);
+ void SelectHvxVAlign(SDNode *N);
bool keepsLowBits(const SDValue &Val, unsigned NumBits, SDValue &Src);
bool isAlignedMemNode(const MemSDNode *N) const;
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 740861851185..8aef9b4560d5 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -11,6 +11,7 @@
#include "HexagonISelDAGToDAG.h"
#include "HexagonISelLowering.h"
#include "HexagonTargetMachine.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/Intrinsics.h"
@@ -94,18 +95,13 @@ namespace {
// Benes network is a forward delta network immediately followed by
// a reverse delta network.
+enum class ColorKind { None, Red, Black };
// Graph coloring utility used to partition nodes into two groups:
// they will correspond to nodes routed to the upper and lower networks.
struct Coloring {
- enum : uint8_t {
- None = 0,
- Red,
- Black
- };
-
using Node = int;
- using MapType = std::map<Node,uint8_t>;
+ using MapType = std::map<Node, ColorKind>;
static constexpr Node Ignore = Node(-1);
Coloring(ArrayRef<Node> Ord) : Order(Ord) {
@@ -118,10 +114,10 @@ struct Coloring {
return Colors;
}
- uint8_t other(uint8_t Color) {
- if (Color == None)
- return Red;
- return Color == Red ? Black : Red;
+ ColorKind other(ColorKind Color) {
+ if (Color == ColorKind::None)
+ return ColorKind::Red;
+ return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red;
}
void dump() const;
@@ -139,28 +135,28 @@ private:
return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2;
}
- uint8_t getColor(Node N) {
+ ColorKind getColor(Node N) {
auto F = Colors.find(N);
- return F != Colors.end() ? F->second : (uint8_t)None;
+ return F != Colors.end() ? F->second : ColorKind::None;
}
- std::pair<bool,uint8_t> getUniqueColor(const NodeSet &Nodes);
+ std::pair<bool, ColorKind> getUniqueColor(const NodeSet &Nodes);
void build();
bool color();
};
} // namespace
-std::pair<bool,uint8_t> Coloring::getUniqueColor(const NodeSet &Nodes) {
- uint8_t Color = None;
+std::pair<bool, ColorKind> Coloring::getUniqueColor(const NodeSet &Nodes) {
+ auto Color = ColorKind::None;
for (Node N : Nodes) {
- uint8_t ColorN = getColor(N);
- if (ColorN == None)
+ ColorKind ColorN = getColor(N);
+ if (ColorN == ColorKind::None)
continue;
- if (Color == None)
+ if (Color == ColorKind::None)
Color = ColorN;
- else if (Color != None && Color != ColorN)
- return { false, None };
+ else if (Color != ColorKind::None && Color != ColorN)
+ return { false, ColorKind::None };
}
return { true, Color };
}
@@ -245,12 +241,12 @@ bool Coloring::color() {
// Coloring failed. Split this node.
Node C = conj(N);
- uint8_t ColorN = other(None);
- uint8_t ColorC = other(ColorN);
+ ColorKind ColorN = other(ColorKind::None);
+ ColorKind ColorC = other(ColorN);
NodeSet &Cs = Edges[C];
NodeSet CopyNs = Ns;
for (Node M : CopyNs) {
- uint8_t ColorM = getColor(M);
+ ColorKind ColorM = getColor(M);
if (ColorM == ColorC) {
// Connect M with C, disconnect M from N.
Cs.insert(M);
@@ -263,10 +259,10 @@ bool Coloring::color() {
Colors[C] = ColorC;
}
- // Explicitly assign "None" all all uncolored nodes.
+ // Explicitly assign "None" to all uncolored nodes.
for (unsigned I = 0; I != Order.size(); ++I)
if (Colors.count(I) == 0)
- Colors[I] = None;
+ Colors[I] = ColorKind::None;
return true;
}
@@ -296,10 +292,21 @@ void Coloring::dump() const {
}
dbgs() << " }\n";
- static const char *const Names[] = { "None", "Red", "Black" };
+ auto ColorKindToName = [](ColorKind C) {
+ switch (C) {
+ case ColorKind::None:
+ return "None";
+ case ColorKind::Red:
+ return "Red";
+ case ColorKind::Black:
+ return "Black";
+ }
+ llvm_unreachable("all ColorKinds should be handled by the switch above");
+ };
+
dbgs() << " Colors: {\n";
for (auto C : Colors)
- dbgs() << " " << C.first << " -> " << Names[C.second] << "\n";
+ dbgs() << " " << C.first << " -> " << ColorKindToName(C.second) << "\n";
dbgs() << " }\n}\n";
}
@@ -471,21 +478,21 @@ bool ReverseDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
if (M.empty())
return false;
- uint8_t ColorUp = Coloring::None;
+ ColorKind ColorUp = ColorKind::None;
for (ElemType J = 0; J != Num; ++J) {
ElemType I = P[J];
// I is the position in the input,
// J is the position in the output.
if (I == Ignore)
continue;
- uint8_t C = M.at(I);
- if (C == Coloring::None)
+ ColorKind C = M.at(I);
+ if (C == ColorKind::None)
continue;
// During "Step", inputs cannot switch halves, so if the "up" color
// is still unknown, make sure that it is selected in such a way that
// "I" will stay in the same half.
bool InpUp = I < Num/2;
- if (ColorUp == Coloring::None)
+ if (ColorUp == ColorKind::None)
ColorUp = InpUp ? C : G.other(C);
if ((C == ColorUp) != InpUp) {
// If I should go to a different half than where is it now, give up.
@@ -545,16 +552,16 @@ bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size,
// Both assignments, i.e. Red->Up and Red->Down are valid, but they will
// result in different controls. Let's pick the one where the first
// control will be "Pass".
- uint8_t ColorUp = Coloring::None;
+ ColorKind ColorUp = ColorKind::None;
for (ElemType J = 0; J != Num; ++J) {
ElemType I = P[J];
if (I == Ignore)
continue;
- uint8_t C = M.at(I);
- if (C == Coloring::None)
+ ColorKind C = M.at(I);
+ if (C == ColorKind::None)
continue;
- if (ColorUp == Coloring::None) {
- ColorUp = (I < Num/2) ? Coloring::Red : Coloring::Black;
+ if (ColorUp == ColorKind::None) {
+ ColorUp = (I < Num / 2) ? ColorKind::Red : ColorKind::Black;
}
unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2;
if (C == ColorUp) {
@@ -769,6 +776,13 @@ struct ShuffleMask {
size_t H = Mask.size()/2;
return ShuffleMask(Mask.take_back(H));
}
+
+ void print(raw_ostream &OS) const {
+ OS << "MinSrc:" << MinSrc << ", MaxSrc:" << MaxSrc << " {";
+ for (int M : Mask)
+ OS << ' ' << M;
+ OS << " }";
+ }
};
} // namespace
@@ -806,6 +820,7 @@ namespace llvm {
void selectShuffle(SDNode *N);
void selectRor(SDNode *N);
+ void selectVAlign(SDNode *N);
private:
void materialize(const ResultStack &Results);
@@ -821,7 +836,6 @@ namespace llvm {
MutableArrayRef<int> NewMask, unsigned Options = None);
OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
MutableArrayRef<int> NewMask);
- OpRef zerous(ShuffleMask SM, OpRef Va, ResultStack &Results);
OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
ResultStack &Results);
OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
@@ -905,42 +919,55 @@ static bool isPermutation(ArrayRef<int> Mask) {
}
bool HvxSelector::selectVectorConstants(SDNode *N) {
- // Constant vectors are generated as loads from constant pools.
- // Since they are generated during the selection process, the main
- // selection algorithm is not aware of them. Select them directly
- // here.
- SmallVector<SDNode*,4> Loads;
- SmallVector<SDNode*,16> WorkQ;
+ // Constant vectors are generated as loads from constant pools or as
+ // splats of a constant value. Since they are generated during the
+ // selection process, the main selection algorithm is not aware of them.
+ // Select them directly here.
+ SmallVector<SDNode*,4> Nodes;
+ SetVector<SDNode*> WorkQ;
+
+ // The one-use test for VSPLATW's operand may fail due to dead nodes
+ // left over in the DAG.
+ DAG.RemoveDeadNodes();
// The DAG can change (due to CSE) during selection, so cache all the
// unselected nodes first to avoid traversing a mutating DAG.
- auto IsLoadToSelect = [] (SDNode *N) {
- if (!N->isMachineOpcode() && N->getOpcode() == ISD::LOAD) {
- SDValue Addr = cast<LoadSDNode>(N)->getBasePtr();
- unsigned AddrOpc = Addr.getOpcode();
- if (AddrOpc == HexagonISD::AT_PCREL || AddrOpc == HexagonISD::CP)
- if (Addr.getOperand(0).getOpcode() == ISD::TargetConstantPool)
- return true;
+ auto IsNodeToSelect = [] (SDNode *N) {
+ if (N->isMachineOpcode())
+ return false;
+ switch (N->getOpcode()) {
+ case HexagonISD::VZERO:
+ case HexagonISD::VSPLATW:
+ return true;
+ case ISD::LOAD: {
+ SDValue Addr = cast<LoadSDNode>(N)->getBasePtr();
+ unsigned AddrOpc = Addr.getOpcode();
+ if (AddrOpc == HexagonISD::AT_PCREL || AddrOpc == HexagonISD::CP)
+ if (Addr.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+ return true;
+ }
+ break;
}
- return false;
+ // Make sure to select the operand of VSPLATW.
+ bool IsSplatOp = N->hasOneUse() &&
+ N->use_begin()->getOpcode() == HexagonISD::VSPLATW;
+ return IsSplatOp;
};
- WorkQ.push_back(N);
+ WorkQ.insert(N);
for (unsigned i = 0; i != WorkQ.size(); ++i) {
SDNode *W = WorkQ[i];
- if (IsLoadToSelect(W)) {
- Loads.push_back(W);
- continue;
- }
+ if (IsNodeToSelect(W))
+ Nodes.push_back(W);
for (unsigned j = 0, f = W->getNumOperands(); j != f; ++j)
- WorkQ.push_back(W->getOperand(j).getNode());
+ WorkQ.insert(W->getOperand(j).getNode());
}
- for (SDNode *L : Loads)
+ for (SDNode *L : Nodes)
ISel.Select(L);
- return !Loads.empty();
+ return !Nodes.empty();
}
void HvxSelector::materialize(const ResultStack &Results) {
@@ -977,15 +1004,11 @@ void HvxSelector::materialize(const ResultStack &Results) {
MVT OpTy = Op.getValueType().getSimpleVT();
if (Part != OpRef::Whole) {
assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf);
- if (Op.getOpcode() == HexagonISD::VCOMBINE) {
- Op = (Part == OpRef::HiHalf) ? Op.getOperand(0) : Op.getOperand(1);
- } else {
- MVT HalfTy = MVT::getVectorVT(OpTy.getVectorElementType(),
- OpTy.getVectorNumElements()/2);
- unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo
- : Hexagon::vsub_hi;
- Op = DAG.getTargetExtractSubreg(Sub, dl, HalfTy, Op);
- }
+ MVT HalfTy = MVT::getVectorVT(OpTy.getVectorElementType(),
+ OpTy.getVectorNumElements()/2);
+ unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo
+ : Hexagon::vsub_hi;
+ Op = DAG.getTargetExtractSubreg(Sub, dl, HalfTy, Op);
}
Ops.push_back(Op);
} // for (Node : Results)
@@ -1031,25 +1054,53 @@ OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
int VecLen = SM.Mask.size();
MVT Ty = getSingleVT(MVT::i8);
- if (SM.MaxSrc - SM.MinSrc < int(HwLen)) {
- if (SM.MaxSrc < int(HwLen)) {
- memcpy(NewMask.data(), SM.Mask.data(), sizeof(int)*VecLen);
- return Va;
+ auto IsExtSubvector = [] (ShuffleMask M) {
+ assert(M.MinSrc >= 0 && M.MaxSrc >= 0);
+ for (int I = 0, E = M.Mask.size(); I != E; ++I) {
+ if (M.Mask[I] >= 0 && M.Mask[I]-I != M.MinSrc)
+ return false;
}
- if (SM.MinSrc >= int(HwLen)) {
- for (int I = 0; I != VecLen; ++I) {
- int M = SM.Mask[I];
- if (M != -1)
- M -= HwLen;
- NewMask[I] = M;
+ return true;
+ };
+
+ if (SM.MaxSrc - SM.MinSrc < int(HwLen)) {
+ if (SM.MinSrc == 0 || SM.MinSrc == int(HwLen) || !IsExtSubvector(SM)) {
+ // If the mask picks elements from only one of the operands, return
+ // that operand, and update the mask to use index 0 to refer to the
+ // first element of that operand.
+ // If the mask extracts a subvector, it will be handled below, so
+ // skip it here.
+ if (SM.MaxSrc < int(HwLen)) {
+ memcpy(NewMask.data(), SM.Mask.data(), sizeof(int)*VecLen);
+ return Va;
+ }
+ if (SM.MinSrc >= int(HwLen)) {
+ for (int I = 0; I != VecLen; ++I) {
+ int M = SM.Mask[I];
+ if (M != -1)
+ M -= HwLen;
+ NewMask[I] = M;
+ }
+ return Vb;
}
- return Vb;
+ }
+ int MinSrc = SM.MinSrc;
+ if (SM.MaxSrc < int(HwLen)) {
+ Vb = Va;
+ } else if (SM.MinSrc > int(HwLen)) {
+ Va = Vb;
+ MinSrc = SM.MinSrc - HwLen;
}
const SDLoc &dl(Results.InpNode);
- SDValue S = DAG.getTargetConstant(SM.MinSrc, dl, MVT::i32);
- if (isUInt<3>(SM.MinSrc)) {
- Results.push(Hexagon::V6_valignbi, Ty, {Vb, Va, S});
+ if (isUInt<3>(MinSrc) || isUInt<3>(HwLen-MinSrc)) {
+ bool IsRight = isUInt<3>(MinSrc); // Right align.
+ SDValue S = DAG.getTargetConstant(IsRight ? MinSrc : HwLen-MinSrc,
+ dl, MVT::i32);
+ unsigned Opc = IsRight ? Hexagon::V6_valignbi
+ : Hexagon::V6_vlalignbi;
+ Results.push(Opc, Ty, {Vb, Va, S});
} else {
+ SDValue S = DAG.getTargetConstant(MinSrc, dl, MVT::i32);
Results.push(Hexagon::A2_tfrsi, MVT::i32, {S});
unsigned Top = Results.top();
Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(Top)});
@@ -1139,25 +1190,6 @@ OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
return concat(Out[0], Out[1], Results);
}
-OpRef HvxSelector::zerous(ShuffleMask SM, OpRef Va, ResultStack &Results) {
- DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
-
- int VecLen = SM.Mask.size();
- SmallVector<uint8_t,128> UsedBytes(VecLen);
- bool HasUnused = false;
- for (int I = 0; I != VecLen; ++I) {
- if (SM.Mask[I] != -1)
- UsedBytes[I] = 0xFF;
- else
- HasUnused = true;
- }
- if (!HasUnused)
- return Va;
- SDValue B = getVectorConstant(UsedBytes, SDLoc(Results.InpNode));
- Results.push(Hexagon::V6_vand, getSingleVT(MVT::i8), {Va, OpRef(B)});
- return OpRef::res(Results.top());
-}
-
OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
ResultStack &Results) {
DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
@@ -1279,6 +1311,8 @@ OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
return shuffp1(ShuffleMask(PackedMask), P, Results);
SmallVector<int,256> MaskL(VecLen), MaskR(VecLen);
+ splitMask(SM.Mask, MaskL, MaskR);
+
OpRef L = shuffp1(ShuffleMask(MaskL), Va, Results);
OpRef R = shuffp1(ShuffleMask(MaskR), Vb, Results);
if (!L.isValid() || !R.isValid())
@@ -1934,7 +1968,6 @@ void HvxSelector::selectShuffle(SDNode *N) {
// If the mask is all -1's, generate "undef".
if (!UseLeft && !UseRight) {
ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode());
- DAG.RemoveDeadNode(N);
return;
}
@@ -1976,8 +2009,8 @@ void HvxSelector::selectRor(SDNode *N) {
SDNode *NewN = nullptr;
if (auto *CN = dyn_cast<ConstantSDNode>(RotV.getNode())) {
- unsigned S = CN->getZExtValue();
- if (S % HST.getVectorLength() == 0) {
+ unsigned S = CN->getZExtValue() % HST.getVectorLength();
+ if (S == 0) {
NewN = VecV.getNode();
} else if (isUInt<3>(S)) {
SDValue C = DAG.getTargetConstant(S, dl, MVT::i32);
@@ -1990,6 +2023,15 @@ void HvxSelector::selectRor(SDNode *N) {
NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV});
ISel.ReplaceNode(N, NewN);
+}
+
+void HvxSelector::selectVAlign(SDNode *N) {
+ SDValue Vv = N->getOperand(0);
+ SDValue Vu = N->getOperand(1);
+ SDValue Rt = N->getOperand(2);
+ SDNode *NewN = DAG.getMachineNode(Hexagon::V6_valignb, SDLoc(N),
+ N->getValueType(0), {Vv, Vu, Rt});
+ ISel.ReplaceNode(N, NewN);
DAG.RemoveDeadNode(N);
}
@@ -2001,7 +2043,15 @@ void HexagonDAGToDAGISel::SelectHvxRor(SDNode *N) {
HvxSelector(*this, *CurDAG).selectRor(N);
}
+void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) {
+ HvxSelector(*this, *CurDAG).selectVAlign(N);
+}
+
void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) {
+ if (!HST->usePackets()) {
+ report_fatal_error("Support for gather requires packets, "
+ "which are disabled");
+ }
const SDLoc &dl(N);
SDValue Chain = N->getOperand(0);
SDValue Address = N->getOperand(2);
@@ -2037,11 +2087,14 @@ void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) {
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
- ReplaceUses(N, Result);
- CurDAG->RemoveDeadNode(N);
+ ReplaceNode(N, Result);
}
void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) {
+ if (!HST->usePackets()) {
+ report_fatal_error("Support for gather requires packets, "
+ "which are disabled");
+ }
const SDLoc &dl(N);
SDValue Chain = N->getOperand(0);
SDValue Address = N->getOperand(2);
@@ -2076,8 +2129,7 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) {
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
- ReplaceUses(N, Result);
- CurDAG->RemoveDeadNode(N);
+ ReplaceNode(N, Result);
}
void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) {
@@ -2120,5 +2172,3 @@ void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) {
ReplaceUses(SDValue(N, 1), SDValue(Result, 1));
CurDAG->RemoveDeadNode(N);
}
-
-
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 0e0da2ddc400..604d84994b6c 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -40,6 +40,7 @@
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
@@ -103,427 +104,52 @@ static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os",
cl::Hidden, cl::ZeroOrMore, cl::init(4),
cl::desc("Max #stores to inline memset"));
+static cl::opt<bool> AlignLoads("hexagon-align-loads",
+ cl::Hidden, cl::init(false),
+ cl::desc("Rewrite unaligned loads as a pair of aligned loads"));
+
namespace {
class HexagonCCState : public CCState {
- unsigned NumNamedVarArgParams;
+ unsigned NumNamedVarArgParams = 0;
public:
- HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+ HexagonCCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF,
SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
- int NumNamedVarArgParams)
- : CCState(CC, isVarArg, MF, locs, C),
- NumNamedVarArgParams(NumNamedVarArgParams) {}
-
+ unsigned NumNamedArgs)
+ : CCState(CC, IsVarArg, MF, locs, C),
+ NumNamedVarArgParams(NumNamedArgs) {}
unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
};
- enum StridedLoadKind {
- Even = 0,
- Odd,
- NoPattern
- };
-
} // end anonymous namespace
-// Implement calling convention for Hexagon.
-
-static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
-static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
-static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
-static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
-
-static bool
-CC_Hexagon(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State);
-
-static bool
-CC_Hexagon32(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State);
-
-static bool
-CC_Hexagon64(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State);
-
-static bool
-CC_HexagonVector(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State);
-
-static bool
-RetCC_Hexagon(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State);
-
-static bool
-RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State);
-
-static bool
-RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State);
-
-static bool
-RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State);
-
-static bool
-CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- HexagonCCState &HState = static_cast<HexagonCCState &>(State);
-
- if (ValNo < HState.getNumNamedVarArgParams()) {
- // Deal with named arguments.
- return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State);
- }
-
- // Deal with un-named arguments.
- unsigned Offset;
- if (ArgFlags.isByVal()) {
- // If pass-by-value, the size allocated on stack is decided
- // by ArgFlags.getByValSize(), not by the size of LocVT.
- Offset = State.AllocateStack(ArgFlags.getByValSize(),
- ArgFlags.getByValAlign());
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
- LocVT = MVT::i32;
- ValVT = MVT::i32;
- if (ArgFlags.isSExt())
- LocInfo = CCValAssign::SExt;
- else if (ArgFlags.isZExt())
- LocInfo = CCValAssign::ZExt;
- else
- LocInfo = CCValAssign::AExt;
- }
- if (LocVT == MVT::i32 || LocVT == MVT::f32) {
- Offset = State.AllocateStack(4, 4);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- if (LocVT == MVT::i64 || LocVT == MVT::f64) {
- Offset = State.AllocateStack(8, 8);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- if (LocVT == MVT::v2i64 || LocVT == MVT::v4i32 || LocVT == MVT::v8i16 ||
- LocVT == MVT::v16i8) {
- Offset = State.AllocateStack(16, 16);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- if (LocVT == MVT::v4i64 || LocVT == MVT::v8i32 || LocVT == MVT::v16i16 ||
- LocVT == MVT::v32i8) {
- Offset = State.AllocateStack(32, 32);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- if (LocVT == MVT::v16i32 || LocVT == MVT::v32i16 ||
- LocVT == MVT::v64i8 || LocVT == MVT::v512i1) {
- Offset = State.AllocateStack(64, 64);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- if (LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
- LocVT == MVT::v128i8 || LocVT == MVT::v1024i1) {
- Offset = State.AllocateStack(128, 128);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- if (LocVT == MVT::v64i32 || LocVT == MVT::v128i16 ||
- LocVT == MVT::v256i8) {
- Offset = State.AllocateStack(256, 256);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
-
- llvm_unreachable(nullptr);
-}
-
-static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) {
- if (ArgFlags.isByVal()) {
- // Passed on stack.
- unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(),
- ArgFlags.getByValAlign());
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
-
- if (LocVT == MVT::i1) {
- LocVT = MVT::i32;
- } else if (LocVT == MVT::i8 || LocVT == MVT::i16) {
- LocVT = MVT::i32;
- ValVT = MVT::i32;
- if (ArgFlags.isSExt())
- LocInfo = CCValAssign::SExt;
- else if (ArgFlags.isZExt())
- LocInfo = CCValAssign::ZExt;
- else
- LocInfo = CCValAssign::AExt;
- } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) {
- LocVT = MVT::i32;
- LocInfo = CCValAssign::BCvt;
- } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
- LocVT = MVT::i64;
- LocInfo = CCValAssign::BCvt;
- }
-
- if (LocVT == MVT::i32 || LocVT == MVT::f32) {
- if (!CC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
- return false;
- }
-
- if (LocVT == MVT::i64 || LocVT == MVT::f64) {
- if (!CC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
- return false;
- }
-
- if (LocVT == MVT::v8i32 || LocVT == MVT::v16i16 || LocVT == MVT::v32i8) {
- unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 32);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
-
- auto &HST = State.getMachineFunction().getSubtarget<HexagonSubtarget>();
- if (HST.isHVXVectorType(LocVT)) {
- if (!CC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
- return false;
- }
-
- return true; // CC didn't match.
-}
+// Implement calling convention for Hexagon.
-static bool CC_Hexagon32(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- static const MCPhysReg RegList[] = {
- Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
- Hexagon::R5
+static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ static const MCPhysReg ArgRegs[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2,
+ Hexagon::R3, Hexagon::R4, Hexagon::R5
};
- if (unsigned Reg = State.AllocateReg(RegList)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs);
- unsigned Offset = State.AllocateStack(4, 4);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
-}
-
-static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
-
- static const MCPhysReg RegList1[] = {
- Hexagon::D1, Hexagon::D2
- };
- static const MCPhysReg RegList2[] = {
- Hexagon::R1, Hexagon::R3
- };
- if (unsigned Reg = State.AllocateReg(RegList1, RegList2)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
+ // RegNum is an index into ArgRegs: skip a register if RegNum is odd.
+ if (RegNum != NumArgRegs && RegNum % 2 == 1)
+ State.AllocateReg(ArgRegs[RegNum]);
- unsigned Offset = State.AllocateStack(8, 8, Hexagon::D2);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ // Always return false here, as this function only makes sure that the first
+ // unallocated register has an even register number and does not actually
+ // allocate a register for the current argument.
return false;
}
-static bool CC_HexagonVector(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- static const MCPhysReg VecLstS[] = {
- Hexagon::V0, Hexagon::V1, Hexagon::V2, Hexagon::V3, Hexagon::V4,
- Hexagon::V5, Hexagon::V6, Hexagon::V7, Hexagon::V8, Hexagon::V9,
- Hexagon::V10, Hexagon::V11, Hexagon::V12, Hexagon::V13, Hexagon::V14,
- Hexagon::V15
- };
- static const MCPhysReg VecLstD[] = {
- Hexagon::W0, Hexagon::W1, Hexagon::W2, Hexagon::W3, Hexagon::W4,
- Hexagon::W5, Hexagon::W6, Hexagon::W7
- };
- auto &MF = State.getMachineFunction();
- auto &HST = MF.getSubtarget<HexagonSubtarget>();
-
- if (HST.useHVX64BOps() &&
- (LocVT == MVT::v16i32 || LocVT == MVT::v32i16 ||
- LocVT == MVT::v64i8 || LocVT == MVT::v512i1)) {
- if (unsigned Reg = State.AllocateReg(VecLstS)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
- unsigned Offset = State.AllocateStack(64, 64);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- if (HST.useHVX64BOps() && (LocVT == MVT::v32i32 ||
- LocVT == MVT::v64i16 || LocVT == MVT::v128i8)) {
- if (unsigned Reg = State.AllocateReg(VecLstD)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
- unsigned Offset = State.AllocateStack(128, 128);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- // 128B Mode
- if (HST.useHVX128BOps() && (LocVT == MVT::v64i32 ||
- LocVT == MVT::v128i16 || LocVT == MVT::v256i8)) {
- if (unsigned Reg = State.AllocateReg(VecLstD)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
- unsigned Offset = State.AllocateStack(256, 256);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- if (HST.useHVX128BOps() &&
- (LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
- LocVT == MVT::v128i8 || LocVT == MVT::v1024i1)) {
- if (unsigned Reg = State.AllocateReg(VecLstS)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
- unsigned Offset = State.AllocateStack(128, 128);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- return true;
-}
-
-static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- auto &MF = State.getMachineFunction();
- auto &HST = MF.getSubtarget<HexagonSubtarget>();
-
- if (LocVT == MVT::i1) {
- // Return values of type MVT::i1 still need to be assigned to R0, but
- // the value type needs to remain i1. LowerCallResult will deal with it,
- // but it needs to recognize i1 as the value type.
- LocVT = MVT::i32;
- } else if (LocVT == MVT::i8 || LocVT == MVT::i16) {
- LocVT = MVT::i32;
- ValVT = MVT::i32;
- if (ArgFlags.isSExt())
- LocInfo = CCValAssign::SExt;
- else if (ArgFlags.isZExt())
- LocInfo = CCValAssign::ZExt;
- else
- LocInfo = CCValAssign::AExt;
- } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) {
- LocVT = MVT::i32;
- LocInfo = CCValAssign::BCvt;
- } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
- LocVT = MVT::i64;
- LocInfo = CCValAssign::BCvt;
- } else if (LocVT == MVT::v64i8 || LocVT == MVT::v32i16 ||
- LocVT == MVT::v16i32 || LocVT == MVT::v512i1) {
- LocVT = MVT::v16i32;
- ValVT = MVT::v16i32;
- LocInfo = CCValAssign::Full;
- } else if (LocVT == MVT::v128i8 || LocVT == MVT::v64i16 ||
- LocVT == MVT::v32i32 ||
- (LocVT == MVT::v1024i1 && HST.useHVX128BOps())) {
- LocVT = MVT::v32i32;
- ValVT = MVT::v32i32;
- LocInfo = CCValAssign::Full;
- } else if (LocVT == MVT::v256i8 || LocVT == MVT::v128i16 ||
- LocVT == MVT::v64i32) {
- LocVT = MVT::v64i32;
- ValVT = MVT::v64i32;
- LocInfo = CCValAssign::Full;
- }
- if (LocVT == MVT::i32 || LocVT == MVT::f32) {
- if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
- return false;
- }
-
- if (LocVT == MVT::i64 || LocVT == MVT::f64) {
- if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
- return false;
- }
- if (LocVT == MVT::v16i32 || LocVT == MVT::v32i32 || LocVT == MVT::v64i32) {
- if (!RetCC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
- return false;
- }
- return true; // CC didn't match.
-}
-
-static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- if (LocVT == MVT::i32 || LocVT == MVT::f32) {
- // Note that use of registers beyond R1 is not ABI compliant. However there
- // are (experimental) IR passes which generate internal functions that
- // return structs using these additional registers.
- static const uint16_t RegList[] = { Hexagon::R0, Hexagon::R1,
- Hexagon::R2, Hexagon::R3,
- Hexagon::R4, Hexagon::R5 };
- if (unsigned Reg = State.AllocateReg(RegList)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
- }
-
- return true;
-}
-
-static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- if (LocVT == MVT::i64 || LocVT == MVT::f64) {
- if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
- }
-
- return true;
-}
+#include "HexagonGenCallingConv.inc"
-static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- auto &MF = State.getMachineFunction();
- auto &HST = MF.getSubtarget<HexagonSubtarget>();
-
- if (LocVT == MVT::v16i32) {
- if (unsigned Reg = State.AllocateReg(Hexagon::V0)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
- } else if (LocVT == MVT::v32i32) {
- unsigned Req = HST.useHVX128BOps() ? Hexagon::V0 : Hexagon::W0;
- if (unsigned Reg = State.AllocateReg(Req)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
- } else if (LocVT == MVT::v64i32) {
- if (unsigned Reg = State.AllocateReg(Hexagon::W0)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
- }
-
- return true;
-}
void HexagonTargetLowering::promoteLdStType(MVT VT, MVT PromotedLdStVT) {
if (VT != PromotedLdStVT) {
@@ -558,11 +184,14 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
bool
HexagonTargetLowering::CanLowerReturn(
- CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
+ CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
+ CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
+
+ if (MF.getSubtarget<HexagonSubtarget>().useHVXOps())
+ return CCInfo.CheckReturn(Outs, RetCC_Hexagon_HVX);
return CCInfo.CheckReturn(Outs, RetCC_Hexagon);
}
@@ -571,7 +200,7 @@ HexagonTargetLowering::CanLowerReturn(
// the value is stored in memory pointed by a pointer passed by caller.
SDValue
HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
- bool isVarArg,
+ bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const {
@@ -579,11 +208,14 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
// Analyze return values of ISD::RET
- CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
+ if (Subtarget.useHVXOps())
+ CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon_HVX);
+ else
+ CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
SDValue Flag;
SmallVector<SDValue, 4> RetOps(1, Chain);
@@ -624,17 +256,20 @@ bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
/// being lowered. Returns a SDNode with the same number of values as the
/// ISD::CALL.
SDValue HexagonTargetLowering::LowerCallResult(
- SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool isVarArg,
+ SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
- CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
+ if (Subtarget.useHVXOps())
+ CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon_HVX);
+ else
+ CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -683,67 +318,57 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
- bool &IsTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
bool DoesNotReturn = CLI.DoesNotReturn;
- bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+ bool IsStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
auto PtrVT = getPointerTy(MF.getDataLayout());
- // Check for varargs.
- unsigned NumNamedVarArgParams = -1U;
- if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee)) {
- const GlobalValue *GV = GAN->getGlobal();
- Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
- if (const Function* F = dyn_cast<Function>(GV)) {
- // If a function has zero args and is a vararg function, that's
- // disallowed so it must be an undeclared function. Do not assume
- // varargs if the callee is undefined.
- if (F->isVarArg() && F->getFunctionType()->getNumParams() != 0)
- NumNamedVarArgParams = F->getFunctionType()->getNumParams();
- }
- }
+ unsigned NumParams = CLI.CS.getInstruction()
+ ? CLI.CS.getFunctionType()->getNumParams()
+ : 0;
+ if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, MVT::i32);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- HexagonCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext(), NumNamedVarArgParams);
+ HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(),
+ NumParams);
- if (IsVarArg)
- CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg);
+ if (Subtarget.useHVXOps())
+ CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_HVX);
else
CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
if (Attr.getValueAsString() == "true")
- IsTailCall = false;
+ CLI.IsTailCall = false;
- if (IsTailCall) {
+ if (CLI.IsTailCall) {
bool StructAttrFlag = MF.getFunction().hasStructRetAttr();
- IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
- IsVarArg, IsStructRet,
- StructAttrFlag,
- Outs, OutVals, Ins, DAG);
+ CLI.IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ IsVarArg, IsStructRet, StructAttrFlag, Outs,
+ OutVals, Ins, DAG);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (VA.isMemLoc()) {
- IsTailCall = false;
+ CLI.IsTailCall = false;
break;
}
}
- DEBUG(dbgs() << (IsTailCall ? "Eligible for Tail Call\n"
- : "Argument must be passed on stack. "
- "Not eligible for Tail Call\n"));
+ LLVM_DEBUG(dbgs() << (CLI.IsTailCall ? "Eligible for Tail Call\n"
+ : "Argument must be passed on stack. "
+ "Not eligible for Tail Call\n"));
}
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
- auto &HRI = *Subtarget.getRegisterInfo();
+ const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
SDValue StackPtr =
DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
@@ -789,7 +414,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
VA.getLocVT().getStoreSizeInBits() >> 3);
if (Flags.isByVal()) {
// The argument is a struct passed by value. According to LLVM, "Arg"
- // is is pointer.
+ // is a pointer.
MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
Flags, DAG, dl));
} else {
@@ -807,14 +432,10 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
}
- if (NeedsArgAlign && Subtarget.hasV60TOps()) {
- DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
- // V6 vectors passed by value have 64 or 128 byte alignment depending
- // on whether we are 64 byte vector mode or 128 byte.
- bool UseHVX128B = Subtarget.useHVX128BOps();
- assert(Subtarget.useHVXOps());
- const unsigned ObjAlign = UseHVX128B ? 128 : 64;
- LargestAlignSeen = std::max(LargestAlignSeen, ObjAlign);
+ if (NeedsArgAlign && Subtarget.hasV60Ops()) {
+ LLVM_DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
+ unsigned VecAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
+ LargestAlignSeen = std::max(LargestAlignSeen, VecAlign);
MFI.ensureMaxAlignment(LargestAlignSeen);
}
// Transform all store nodes into one single node because all store
@@ -823,7 +444,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
SDValue Glue;
- if (!IsTailCall) {
+ if (!CLI.IsTailCall) {
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
Glue = Chain.getValue(1);
}
@@ -832,7 +453,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// chain and flag operands which copy the outgoing args into registers.
// The Glue is necessary since all emitted instructions must be
// stuck together.
- if (!IsTailCall) {
+ if (!CLI.IsTailCall) {
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, Glue);
@@ -891,7 +512,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (Glue.getNode())
Ops.push_back(Glue);
- if (IsTailCall) {
+ if (CLI.IsTailCall) {
MFI.setHasTailCall();
return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops);
}
@@ -916,66 +537,36 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InVals, OutVals, Callee);
}
-static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
- SDValue &Base, SDValue &Offset,
- bool &IsInc, SelectionDAG &DAG) {
- if (Ptr->getOpcode() != ISD::ADD)
- return false;
-
- auto &HST = static_cast<const HexagonSubtarget&>(DAG.getSubtarget());
-
- bool ValidHVX128BType =
- HST.useHVX128BOps() && (VT == MVT::v32i32 ||
- VT == MVT::v64i16 || VT == MVT::v128i8);
- bool ValidHVXType =
- HST.useHVX64BOps() && (VT == MVT::v16i32 ||
- VT == MVT::v32i16 || VT == MVT::v64i8);
-
- if (ValidHVX128BType || ValidHVXType || VT == MVT::i64 || VT == MVT::i32 ||
- VT == MVT::i16 || VT == MVT::i8) {
- IsInc = (Ptr->getOpcode() == ISD::ADD);
- Base = Ptr->getOperand(0);
- Offset = Ptr->getOperand(1);
- // Ensure that Offset is a constant.
- return isa<ConstantSDNode>(Offset);
- }
-
- return false;
-}
-
-/// getPostIndexedAddressParts - returns true by value, base pointer and
-/// offset pointer and addressing mode by reference if this node can be
-/// combined with a load / store to form a post-indexed load / store.
+/// Returns true by value, base pointer and offset pointer and addressing
+/// mode by reference if this node can be combined with a load / store to
+/// form a post-indexed load / store.
bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
- SDValue &Base,
- SDValue &Offset,
- ISD::MemIndexedMode &AM,
- SelectionDAG &DAG) const
-{
- EVT VT;
-
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
- VT = LD->getMemoryVT();
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
- VT = ST->getMemoryVT();
- if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore())
- return false;
- } else {
+ SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N);
+ if (!LSN)
+ return false;
+ EVT VT = LSN->getMemoryVT();
+ if (!VT.isSimple())
+ return false;
+ bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
+ VT == MVT::i64 || VT == MVT::f32 || VT == MVT::f64 ||
+ VT == MVT::v2i16 || VT == MVT::v2i32 || VT == MVT::v4i8 ||
+ VT == MVT::v4i16 || VT == MVT::v8i8 ||
+ Subtarget.isHVXVectorType(VT.getSimpleVT());
+ if (!IsLegalType)
return false;
- }
- bool IsInc = false;
- bool isLegal = getIndexedAddressParts(Op, VT, Base, Offset, IsInc, DAG);
- if (isLegal) {
- auto &HII = *Subtarget.getInstrInfo();
- int32_t OffsetVal = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
- if (HII.isValidAutoIncImm(VT, OffsetVal)) {
- AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
- return true;
- }
- }
+ if (Op->getOpcode() != ISD::ADD)
+ return false;
+ Base = Op->getOperand(0);
+ Offset = Op->getOperand(1);
+ if (!isa<ConstantSDNode>(Offset.getNode()))
+ return false;
+ AM = ISD::POST_INC;
- return false;
+ int32_t V = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
+ return Subtarget.getInstrInfo()->isValidAutoIncImm(VT, V);
}
SDValue
@@ -1080,7 +671,7 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
if (A == 0)
A = HFI.getStackAlignment();
- DEBUG({
+ LLVM_DEBUG({
dbgs () << __func__ << " Align: " << A << " Size: ";
Size.getNode()->dump(&DAG);
dbgs() << "\n";
@@ -1095,20 +686,22 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
}
SDValue HexagonTargetLowering::LowerFormalArguments(
- SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext());
+ HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(),
+ MF.getFunction().getFunctionType()->getNumParams());
- CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
+ if (Subtarget.useHVXOps())
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_HVX);
+ else
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
// For LLVM, in the case when returning a struct by value (>8byte),
// the first argument is a pointer that points to the location on caller's
@@ -1117,110 +710,62 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
// equal to) 8 bytes. If not, no address will be passed into callee and
// callee return the result direclty through R0/R1.
- SmallVector<SDValue, 8> MemOps;
+ auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
ISD::ArgFlagsTy Flags = Ins[i].Flags;
- unsigned ObjSize;
- unsigned StackLocation;
- int FI;
-
- if ( (VA.isRegLoc() && !Flags.isByVal())
- || (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() > 8)) {
- // Arguments passed in registers
- // 1. int, long long, ptr args that get allocated in register.
- // 2. Large struct that gets an register to put its address in.
- EVT RegVT = VA.getLocVT();
- if (RegVT == MVT::i8 || RegVT == MVT::i16 ||
- RegVT == MVT::i32 || RegVT == MVT::f32) {
- unsigned VReg =
- RegInfo.createVirtualRegister(&Hexagon::IntRegsRegClass);
- RegInfo.addLiveIn(VA.getLocReg(), VReg);
- if (VA.getLocInfo() == CCValAssign::BCvt)
- RegVT = VA.getValVT();
- SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
- // Treat values of type MVT::i1 specially: they are passed in
- // registers of type i32, but they need to remain as values of
- // type i1 for consistency of the argument lowering.
- if (VA.getValVT() == MVT::i1) {
- // Generate a copy into a predicate register and use the value
- // of the register as the "InVal".
- unsigned PReg =
- RegInfo.createVirtualRegister(&Hexagon::PredRegsRegClass);
- SDNode *T = DAG.getMachineNode(Hexagon::C2_tfrrp, dl, MVT::i1,
- Copy.getValue(0));
- Copy = DAG.getCopyToReg(Copy.getValue(1), dl, PReg, SDValue(T, 0));
- Copy = DAG.getCopyFromReg(Copy, dl, PReg, MVT::i1);
- }
- InVals.push_back(Copy);
- Chain = Copy.getValue(1);
- } else if (RegVT == MVT::i64 || RegVT == MVT::f64) {
- unsigned VReg =
- RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
- RegInfo.addLiveIn(VA.getLocReg(), VReg);
- if (VA.getLocInfo() == CCValAssign::BCvt)
- RegVT = VA.getValVT();
- InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
-
- // Single Vector
- } else if ((RegVT == MVT::v16i32 ||
- RegVT == MVT::v32i16 || RegVT == MVT::v64i8)) {
- unsigned VReg =
- RegInfo.createVirtualRegister(&Hexagon::HvxVRRegClass);
- RegInfo.addLiveIn(VA.getLocReg(), VReg);
- InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
- } else if (Subtarget.useHVX128BOps() &&
- ((RegVT == MVT::v32i32 ||
- RegVT == MVT::v64i16 || RegVT == MVT::v128i8))) {
- unsigned VReg =
- RegInfo.createVirtualRegister(&Hexagon::HvxVRRegClass);
- RegInfo.addLiveIn(VA.getLocReg(), VReg);
- InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
-
- // Double Vector
- } else if ((RegVT == MVT::v32i32 ||
- RegVT == MVT::v64i16 || RegVT == MVT::v128i8)) {
- unsigned VReg =
- RegInfo.createVirtualRegister(&Hexagon::HvxWRRegClass);
- RegInfo.addLiveIn(VA.getLocReg(), VReg);
- InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
- } else if (Subtarget.useHVX128BOps() &&
- ((RegVT == MVT::v64i32 ||
- RegVT == MVT::v128i16 || RegVT == MVT::v256i8))) {
- unsigned VReg =
- RegInfo.createVirtualRegister(&Hexagon::HvxWRRegClass);
- RegInfo.addLiveIn(VA.getLocReg(), VReg);
- InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
- } else if (RegVT == MVT::v512i1 || RegVT == MVT::v1024i1) {
- assert(0 && "need to support VecPred regs");
- unsigned VReg =
- RegInfo.createVirtualRegister(&Hexagon::HvxQRRegClass);
- RegInfo.addLiveIn(VA.getLocReg(), VReg);
- InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ bool ByVal = Flags.isByVal();
+
+ // Arguments passed in registers:
+ // 1. 32- and 64-bit values and HVX vectors are passed directly,
+ // 2. Large structs are passed via an address, and the address is
+ // passed in a register.
+ if (VA.isRegLoc() && ByVal && Flags.getByValSize() <= 8)
+ llvm_unreachable("ByValSize must be bigger than 8 bytes");
+
+ bool InReg = VA.isRegLoc() &&
+ (!ByVal || (ByVal && Flags.getByValSize() > 8));
+
+ if (InReg) {
+ MVT RegVT = VA.getLocVT();
+ if (VA.getLocInfo() == CCValAssign::BCvt)
+ RegVT = VA.getValVT();
+
+ const TargetRegisterClass *RC = getRegClassFor(RegVT);
+ unsigned VReg = MRI.createVirtualRegister(RC);
+ SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
+
+ // Treat values of type MVT::i1 specially: they are passed in
+ // registers of type i32, but they need to remain as values of
+ // type i1 for consistency of the argument lowering.
+ if (VA.getValVT() == MVT::i1) {
+ assert(RegVT.getSizeInBits() <= 32);
+ SDValue T = DAG.getNode(ISD::AND, dl, RegVT,
+ Copy, DAG.getConstant(1, dl, RegVT));
+ Copy = DAG.getSetCC(dl, MVT::i1, T, DAG.getConstant(0, dl, RegVT),
+ ISD::SETNE);
} else {
- assert (0);
+#ifndef NDEBUG
+ unsigned RegSize = RegVT.getSizeInBits();
+ assert(RegSize == 32 || RegSize == 64 ||
+ Subtarget.isHVXVectorType(RegVT));
+#endif
}
- } else if (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() <= 8) {
- assert (0 && "ByValSize must be bigger than 8 bytes");
+ InVals.push_back(Copy);
+ MRI.addLiveIn(VA.getLocReg(), VReg);
} else {
- // Sanity check.
- assert(VA.isMemLoc());
-
- if (Flags.isByVal()) {
- // If it's a byval parameter, then we need to compute the
- // "real" size, not the size of the pointer.
- ObjSize = Flags.getByValSize();
- } else {
- ObjSize = VA.getLocVT().getStoreSizeInBits() >> 3;
- }
+ assert(VA.isMemLoc() && "Argument should be passed in memory");
- StackLocation = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
- // Create the frame index object for this incoming parameter...
- FI = MFI.CreateFixedObject(ObjSize, StackLocation, true);
+ // If it's a byval parameter, then we need to compute the
+ // "real" size, not the size of the pointer.
+ unsigned ObjSize = Flags.isByVal()
+ ? Flags.getByValSize()
+ : VA.getLocVT().getStoreSizeInBits() / 8;
- // Create the SelectionDAG nodes cordl, responding to a load
- // from this parameter.
+ // Create the frame index object for this incoming parameter.
+ int Offset = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
+ int FI = MFI.CreateFixedObject(ObjSize, Offset, true);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
if (Flags.isByVal()) {
@@ -1229,22 +774,19 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
// location.
InVals.push_back(FIN);
} else {
- InVals.push_back(
- DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
+ SDValue L = DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(MF, FI, 0));
+ InVals.push_back(L);
}
}
}
- if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
- if (isVarArg) {
+ if (IsVarArg) {
// This will point to the next argument passed via stack.
- int FrameIndex = MFI.CreateFixedObject(Hexagon_PointerSize,
- HEXAGON_LRFP_SIZE +
- CCInfo.getNextStackOffset(),
- true);
- FuncInfo.setVarArgsFrameIndex(FrameIndex);
+ int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
+ int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
+ HMFI.setVarArgsFrameIndex(FI);
}
return Chain;
@@ -1262,66 +804,62 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(SV));
}
-static bool isSExtFree(SDValue N) {
- // A sign-extend of a truncate of a sign-extend is free.
- if (N.getOpcode() == ISD::TRUNCATE &&
- N.getOperand(0).getOpcode() == ISD::AssertSext)
- return true;
- // We have sign-extended loads.
- if (N.getOpcode() == ISD::LOAD)
- return true;
- return false;
-}
-
SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
- SDLoc dl(Op);
-
+ const SDLoc &dl(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
- if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(ty(LHS)))
- return LowerHvxSetCC(Op, DAG);
-
- SDValue Cmp = Op.getOperand(2);
- ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get();
-
- EVT VT = Op.getValueType();
- EVT LHSVT = LHS.getValueType();
- EVT RHSVT = RHS.getValueType();
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ MVT ResTy = ty(Op);
+ MVT OpTy = ty(LHS);
- if (LHSVT == MVT::v2i16) {
- assert(ISD::isSignedIntSetCC(CC) || ISD::isUnsignedIntSetCC(CC));
- unsigned ExtOpc = ISD::isSignedIntSetCC(CC) ? ISD::SIGN_EXTEND
- : ISD::ZERO_EXTEND;
- SDValue LX = DAG.getNode(ExtOpc, dl, MVT::v2i32, LHS);
- SDValue RX = DAG.getNode(ExtOpc, dl, MVT::v2i32, RHS);
- SDValue SC = DAG.getNode(ISD::SETCC, dl, MVT::v2i1, LX, RX, Cmp);
- return SC;
+ if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) {
+ MVT ElemTy = OpTy.getVectorElementType();
+ assert(ElemTy.isScalarInteger());
+ MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()),
+ OpTy.getVectorNumElements());
+ return DAG.getSetCC(dl, ResTy,
+ DAG.getSExtOrTrunc(LHS, SDLoc(LHS), WideTy),
+ DAG.getSExtOrTrunc(RHS, SDLoc(RHS), WideTy), CC);
}
// Treat all other vector types as legal.
- if (VT.isVector())
+ if (ResTy.isVector())
return Op;
- // Equals and not equals should use sign-extend, not zero-extend, since
- // we can represent small negative values in the compare instructions.
+ // Comparisons of short integers should use sign-extend, not zero-extend,
+ // since we can represent small negative values in the compare instructions.
// The LLVM default is to use zero-extend arbitrarily in these cases.
- if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
- (RHSVT == MVT::i8 || RHSVT == MVT::i16) &&
- (LHSVT == MVT::i8 || LHSVT == MVT::i16)) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
- if (C && C->getAPIntValue().isNegative()) {
- LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
- RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
- return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
- LHS, RHS, Op.getOperand(2));
- }
- if (isSExtFree(LHS) || isSExtFree(RHS)) {
- LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
- RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
- return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
- LHS, RHS, Op.getOperand(2));
+ auto isSExtFree = [this](SDValue N) {
+ switch (N.getOpcode()) {
+ case ISD::TRUNCATE: {
+ // A sign-extend of a truncate of a sign-extend is free.
+ SDValue Op = N.getOperand(0);
+ if (Op.getOpcode() != ISD::AssertSext)
+ return false;
+ EVT OrigTy = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ unsigned ThisBW = ty(N).getSizeInBits();
+ unsigned OrigBW = OrigTy.getSizeInBits();
+ // The type that was sign-extended to get the AssertSext must be
+ // narrower than the type of N (so that N has still the same value
+ // as the original).
+ return ThisBW >= OrigBW;
+ }
+ case ISD::LOAD:
+ // We have sign-extended loads.
+ return true;
}
+ return false;
+ };
+
+ if (OpTy == MVT::i8 || OpTy == MVT::i16) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
+ bool IsNegative = C && C->getAPIntValue().isNegative();
+ if (IsNegative || isSExtFree(LHS) || isSExtFree(RHS))
+ return DAG.getSetCC(dl, ResTy,
+ DAG.getSExtOrTrunc(LHS, SDLoc(LHS), MVT::i32),
+ DAG.getSExtOrTrunc(RHS, SDLoc(RHS), MVT::i32), CC);
}
+
return SDValue();
}
@@ -1393,8 +931,7 @@ HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
else if (isVTi1Type)
T = DAG.getTargetConstantPool(CVal, ValTy, Align, Offset, TF);
else
- T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, Offset,
- TF);
+ T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, Offset, TF);
assert(cast<ConstantPoolSDNode>(T)->getTargetFlags() == TF &&
"Inconsistent target flag encountered");
@@ -1480,7 +1017,7 @@ HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
if (RM == Reloc::Static) {
SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
const GlobalObject *GO = GV->getBaseObject();
- if (GO && HLOF.isGlobalInSmallSection(GO, HTM))
+ if (GO && Subtarget.useSmallData() && HLOF.isGlobalInSmallSection(GO, HTM))
return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
}
@@ -1688,13 +1225,15 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
const HexagonSubtarget &ST)
: TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
Subtarget(ST) {
- bool IsV4 = !Subtarget.hasV5TOps();
+ bool IsV4 = !Subtarget.hasV5Ops();
auto &HRI = *Subtarget.getRegisterInfo();
setPrefLoopAlignment(4);
setPrefFunctionAlignment(4);
setMinFunctionAlignment(2);
setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
+ setBooleanContents(TargetLoweringBase::UndefinedBooleanContent);
+ setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent);
setMaxAtomicSizeInBitsSupported(64);
setMinCmpXchgSizeInBits(32);
@@ -1728,45 +1267,11 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
- if (Subtarget.hasV5TOps()) {
+ if (Subtarget.hasV5Ops()) {
addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
}
- if (Subtarget.hasV60TOps()) {
- if (Subtarget.useHVX64BOps()) {
- addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass);
- addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
- addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
- addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
- addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
- addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
- // These "short" boolean vector types should be legal because
- // they will appear as results of vector compares. If they were
- // not legal, type legalization would try to make them legal
- // and that would require using operations that do not use or
- // produce such types. That, in turn, would imply using custom
- // nodes, which would be unoptimizable by the DAG combiner.
- // The idea is to rely on target-independent operations as much
- // as possible.
- addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
- addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
- addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
- addRegisterClass(MVT::v512i1, &Hexagon::HvxQRRegClass);
- } else if (Subtarget.useHVX128BOps()) {
- addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
- addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
- addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass);
- addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass);
- addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
- addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
- addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
- addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
- addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
- addRegisterClass(MVT::v1024i1, &Hexagon::HvxQRRegClass);
- }
- }
-
//
// Handling of scalar operations.
//
@@ -1801,13 +1306,16 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
// Hexagon needs to optimize cases with negative constants.
- setOperationAction(ISD::SETCC, MVT::i8, Custom);
- setOperationAction(ISD::SETCC, MVT::i16, Custom);
+ setOperationAction(ISD::SETCC, MVT::i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::i16, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex.
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
@@ -1819,35 +1327,21 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setMinimumJumpTableEntries(std::numeric_limits<int>::max());
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- // Hexagon has instructions for add/sub with carry. The problem with
- // modeling these instructions is that they produce 2 results: Rdd and Px.
- // To model the update of Px, we will have to use Defs[p0..p3] which will
- // cause any predicate live range to spill. So, we pretend we dont't have
- // these instructions.
- setOperationAction(ISD::ADDE, MVT::i8, Expand);
- setOperationAction(ISD::ADDE, MVT::i16, Expand);
- setOperationAction(ISD::ADDE, MVT::i32, Expand);
- setOperationAction(ISD::ADDE, MVT::i64, Expand);
- setOperationAction(ISD::SUBE, MVT::i8, Expand);
- setOperationAction(ISD::SUBE, MVT::i16, Expand);
- setOperationAction(ISD::SUBE, MVT::i32, Expand);
- setOperationAction(ISD::SUBE, MVT::i64, Expand);
- setOperationAction(ISD::ADDC, MVT::i8, Expand);
- setOperationAction(ISD::ADDC, MVT::i16, Expand);
- setOperationAction(ISD::ADDC, MVT::i32, Expand);
- setOperationAction(ISD::ADDC, MVT::i64, Expand);
- setOperationAction(ISD::SUBC, MVT::i8, Expand);
- setOperationAction(ISD::SUBC, MVT::i16, Expand);
- setOperationAction(ISD::SUBC, MVT::i32, Expand);
- setOperationAction(ISD::SUBC, MVT::i64, Expand);
-
- // Only add and sub that detect overflow are the saturating ones.
+ setOperationAction(ISD::ABS, MVT::i32, Legal);
+ setOperationAction(ISD::ABS, MVT::i64, Legal);
+
+ // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
+ // but they only operate on i64.
for (MVT VT : MVT::integer_valuetypes()) {
- setOperationAction(ISD::UADDO, VT, Expand);
- setOperationAction(ISD::SADDO, VT, Expand);
- setOperationAction(ISD::USUBO, VT, Expand);
- setOperationAction(ISD::SSUBO, VT, Expand);
+ setOperationAction(ISD::UADDO, VT, Expand);
+ setOperationAction(ISD::USUBO, VT, Expand);
+ setOperationAction(ISD::SADDO, VT, Expand);
+ setOperationAction(ISD::SSUBO, VT, Expand);
+ setOperationAction(ISD::ADDCARRY, VT, Expand);
+ setOperationAction(ISD::SUBCARRY, VT, Expand);
}
+ setOperationAction(ISD::ADDCARRY, MVT::i64, Custom);
+ setOperationAction(ISD::SUBCARRY, MVT::i64, Custom);
setOperationAction(ISD::CTLZ, MVT::i8, Promote);
setOperationAction(ISD::CTLZ, MVT::i16, Promote);
@@ -1865,22 +1359,21 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
setOperationAction(ISD::BSWAP, MVT::i32, Legal);
setOperationAction(ISD::BSWAP, MVT::i64, Legal);
- setOperationAction(ISD::MUL, MVT::i64, Legal);
for (unsigned IntExpOp :
- { ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
- ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR,
- ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
- ISD::SMUL_LOHI, ISD::UMUL_LOHI }) {
- setOperationAction(IntExpOp, MVT::i32, Expand);
- setOperationAction(IntExpOp, MVT::i64, Expand);
+ {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
+ ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR,
+ ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
+ ISD::SMUL_LOHI, ISD::UMUL_LOHI}) {
+ for (MVT VT : MVT::integer_valuetypes())
+ setOperationAction(IntExpOp, VT, Expand);
}
for (unsigned FPExpOp :
{ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
ISD::FPOW, ISD::FCOPYSIGN}) {
- setOperationAction(FPExpOp, MVT::f32, Expand);
- setOperationAction(FPExpOp, MVT::f64, Expand);
+ for (MVT VT : MVT::fp_valuetypes())
+ setOperationAction(FPExpOp, VT, Expand);
}
// No extending loads from i32.
@@ -1920,10 +1413,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
// either "custom" or "legal" for specific cases.
static const unsigned VectExpOps[] = {
// Integer arithmetic:
- ISD::ADD, ISD::SUB, ISD::MUL, ISD::SDIV, ISD::UDIV,
- ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::ADDC,
- ISD::SUBC, ISD::SADDO, ISD::UADDO, ISD::SSUBO, ISD::USUBO,
- ISD::SMUL_LOHI, ISD::UMUL_LOHI,
+ ISD::ADD, ISD::SUB, ISD::MUL, ISD::SDIV, ISD::UDIV,
+ ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::SADDO,
+ ISD::UADDO, ISD::SSUBO, ISD::USUBO, ISD::SMUL_LOHI, ISD::UMUL_LOHI,
// Logical/bit:
ISD::AND, ISD::OR, ISD::XOR, ISD::ROTL, ISD::ROTR,
ISD::CTPOP, ISD::CTLZ, ISD::CTTZ,
@@ -1970,16 +1462,16 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
// Extending loads from (native) vectors of i8 into (native) vectors of i16
// are legal.
- setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
// Types natively supported:
- for (MVT NativeVT : {MVT::v32i1, MVT::v64i1, MVT::v4i8, MVT::v8i8, MVT::v2i16,
- MVT::v4i16, MVT::v1i32, MVT::v2i32, MVT::v1i64}) {
+ for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8,
+ MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
setOperationAction(ISD::BUILD_VECTOR, NativeVT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, NativeVT, Custom);
@@ -1995,19 +1487,34 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::XOR, NativeVT, Legal);
}
+ // Custom lower unaligned loads.
+ for (MVT VecVT : {MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8,
+ MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
+ setOperationAction(ISD::LOAD, VecVT, Custom);
+ }
+
+ for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v2i32, MVT::v4i16, MVT::v2i32}) {
+ setCondCodeAction(ISD::SETLT, VT, Expand);
+ setCondCodeAction(ISD::SETLE, VT, Expand);
+ setCondCodeAction(ISD::SETULT, VT, Expand);
+ setCondCodeAction(ISD::SETULE, VT, Expand);
+ }
+
+ // Custom-lower bitcasts from i8 to v8i1.
+ setOperationAction(ISD::BITCAST, MVT::i8, Custom);
setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
setOperationAction(ISD::VSELECT, MVT::v2i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
- auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
- setOperationAction(Opc, FromTy, Promote);
- AddPromotedToType(Opc, FromTy, ToTy);
- };
-
// Subtarget-specific operation actions.
//
- if (Subtarget.hasV5TOps()) {
+ if (Subtarget.hasV60Ops()) {
+ setOperationAction(ISD::ROTL, MVT::i32, Custom);
+ setOperationAction(ISD::ROTL, MVT::i64, Custom);
+ }
+ if (Subtarget.hasV5Ops()) {
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FADD, MVT::f64, Expand);
setOperationAction(ISD::FSUB, MVT::f64, Expand);
@@ -2061,71 +1568,14 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
// Handling of indexed loads/stores: default is "expand".
//
- for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
+ for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::f32, MVT::f64,
+ MVT::v2i16, MVT::v2i32, MVT::v4i8, MVT::v4i16, MVT::v8i8}) {
setIndexedLoadAction(ISD::POST_INC, VT, Legal);
setIndexedStoreAction(ISD::POST_INC, VT, Legal);
}
- if (Subtarget.useHVXOps()) {
- bool Use64b = Subtarget.useHVX64BOps();
- ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
- ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
- MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
- MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
-
- setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal);
- setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, ByteW, Legal);
- setOperationAction(ISD::AND, ByteV, Legal);
- setOperationAction(ISD::OR, ByteV, Legal);
- setOperationAction(ISD::XOR, ByteV, Legal);
-
- for (MVT T : LegalV) {
- setIndexedLoadAction(ISD::POST_INC, T, Legal);
- setIndexedStoreAction(ISD::POST_INC, T, Legal);
-
- setOperationAction(ISD::ADD, T, Legal);
- setOperationAction(ISD::SUB, T, Legal);
- if (T != ByteV) {
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
- setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal);
- }
-
- setOperationAction(ISD::MUL, T, Custom);
- setOperationAction(ISD::SETCC, T, Custom);
- setOperationAction(ISD::BUILD_VECTOR, T, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
- if (T != ByteV)
- setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom);
- }
-
- for (MVT T : LegalV) {
- if (T == ByteV)
- continue;
- // Promote all shuffles and concats to operate on vectors of bytes.
- setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
- setPromoteTo(ISD::CONCAT_VECTORS, T, ByteV);
- setPromoteTo(ISD::AND, T, ByteV);
- setPromoteTo(ISD::OR, T, ByteV);
- setPromoteTo(ISD::XOR, T, ByteV);
- }
-
- for (MVT T : LegalW) {
- // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
- // independent) handling of it would convert it to a load, which is
- // not always the optimal choice.
- setOperationAction(ISD::BUILD_VECTOR, T, Custom);
-
- if (T == ByteW)
- continue;
- // Promote all shuffles and concats to operate on vectors of bytes.
- setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
- setPromoteTo(ISD::CONCAT_VECTORS, T, ByteW);
- }
- }
+ if (Subtarget.useHVXOps())
+ initializeHVXLowering();
computeRegisterProperties(&HRI);
@@ -2195,7 +1645,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
}
- if (Subtarget.hasV5TOps()) {
+ if (Subtarget.hasV5Ops()) {
if (FastMath)
setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
else
@@ -2242,6 +1692,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((HexagonISD::NodeType)Opcode) {
+ case HexagonISD::ADDC: return "HexagonISD::ADDC";
+ case HexagonISD::SUBC: return "HexagonISD::SUBC";
case HexagonISD::ALLOCA: return "HexagonISD::ALLOCA";
case HexagonISD::AT_GOT: return "HexagonISD::AT_GOT";
case HexagonISD::AT_PCREL: return "HexagonISD::AT_PCREL";
@@ -2255,16 +1707,12 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::CP: return "HexagonISD::CP";
case HexagonISD::DCFETCH: return "HexagonISD::DCFETCH";
case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN";
+ case HexagonISD::TSTBIT: return "HexagonISD::TSTBIT";
case HexagonISD::EXTRACTU: return "HexagonISD::EXTRACTU";
- case HexagonISD::EXTRACTURP: return "HexagonISD::EXTRACTURP";
case HexagonISD::INSERT: return "HexagonISD::INSERT";
- case HexagonISD::INSERTRP: return "HexagonISD::INSERTRP";
case HexagonISD::JT: return "HexagonISD::JT";
case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
- case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE";
- case HexagonISD::VPACKE: return "HexagonISD::VPACKE";
- case HexagonISD::VPACKO: return "HexagonISD::VPACKO";
case HexagonISD::VASL: return "HexagonISD::VASL";
case HexagonISD::VASR: return "HexagonISD::VASR";
case HexagonISD::VLSR: return "HexagonISD::VLSR";
@@ -2274,11 +1722,97 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::VROR: return "HexagonISD::VROR";
case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE";
case HexagonISD::VZERO: return "HexagonISD::VZERO";
+ case HexagonISD::VSPLATW: return "HexagonISD::VSPLATW";
+ case HexagonISD::D2P: return "HexagonISD::D2P";
+ case HexagonISD::P2D: return "HexagonISD::P2D";
+ case HexagonISD::V2Q: return "HexagonISD::V2Q";
+ case HexagonISD::Q2V: return "HexagonISD::Q2V";
+ case HexagonISD::QCAT: return "HexagonISD::QCAT";
+ case HexagonISD::QTRUE: return "HexagonISD::QTRUE";
+ case HexagonISD::QFALSE: return "HexagonISD::QFALSE";
+ case HexagonISD::TYPECAST: return "HexagonISD::TYPECAST";
+ case HexagonISD::VALIGN: return "HexagonISD::VALIGN";
+ case HexagonISD::VALIGNADDR: return "HexagonISD::VALIGNADDR";
case HexagonISD::OP_END: break;
}
return nullptr;
}
+// Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
+// intrinsic.
+static bool isBrevLdIntrinsic(const Value *Inst) {
+ unsigned ID = cast<IntrinsicInst>(Inst)->getIntrinsicID();
+ return (ID == Intrinsic::hexagon_L2_loadrd_pbr ||
+ ID == Intrinsic::hexagon_L2_loadri_pbr ||
+ ID == Intrinsic::hexagon_L2_loadrh_pbr ||
+ ID == Intrinsic::hexagon_L2_loadruh_pbr ||
+ ID == Intrinsic::hexagon_L2_loadrb_pbr ||
+ ID == Intrinsic::hexagon_L2_loadrub_pbr);
+}
+
+// Bit-reverse Load Intrinsic :Crawl up and figure out the object from previous
+// instruction. So far we only handle bitcast, extract value and bit reverse
+// load intrinsic instructions. Should we handle CGEP ?
+static Value *getBrevLdObject(Value *V) {
+ if (Operator::getOpcode(V) == Instruction::ExtractValue ||
+ Operator::getOpcode(V) == Instruction::BitCast)
+ V = cast<Operator>(V)->getOperand(0);
+ else if (isa<IntrinsicInst>(V) && isBrevLdIntrinsic(V))
+ V = cast<Instruction>(V)->getOperand(0);
+ return V;
+}
+
+// Bit-reverse Load Intrinsic: For a PHI Node return either an incoming edge or
+// a back edge. If the back edge comes from the intrinsic itself, the incoming
+// edge is returned.
+static Value *returnEdge(const PHINode *PN, Value *IntrBaseVal) {
+ const BasicBlock *Parent = PN->getParent();
+ int Idx = -1;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
+ BasicBlock *Blk = PN->getIncomingBlock(i);
+ // Determine if the back edge is originated from intrinsic.
+ if (Blk == Parent) {
+ Value *BackEdgeVal = PN->getIncomingValue(i);
+ Value *BaseVal;
+ // Loop over till we return the same Value or we hit the IntrBaseVal.
+ do {
+ BaseVal = BackEdgeVal;
+ BackEdgeVal = getBrevLdObject(BackEdgeVal);
+ } while ((BaseVal != BackEdgeVal) && (IntrBaseVal != BackEdgeVal));
+ // If the getBrevLdObject returns IntrBaseVal, we should return the
+ // incoming edge.
+ if (IntrBaseVal == BackEdgeVal)
+ continue;
+ Idx = i;
+ break;
+ } else // Set the node to incoming edge.
+ Idx = i;
+ }
+ assert(Idx >= 0 && "Unexpected index to incoming argument in PHI");
+ return PN->getIncomingValue(Idx);
+}
+
+// Bit-reverse Load Intrinsic: Figure out the underlying object the base
+// pointer points to, for the bit-reverse load intrinsic. Setting this to
+// memoperand might help alias analysis to figure out the dependencies.
+static Value *getUnderLyingObjectForBrevLdIntr(Value *V) {
+ Value *IntrBaseVal = V;
+ Value *BaseVal;
+ // Loop over till we return the same Value, implies we either figure out
+ // the object or we hit a PHI
+ do {
+ BaseVal = V;
+ V = getBrevLdObject(V);
+ } while (BaseVal != V);
+
+ // Identify the object from PHINode.
+ if (const PHINode *PN = dyn_cast<PHINode>(V))
+ return returnEdge(PN, IntrBaseVal);
+ // For non PHI nodes, the object is the last value returned by getBrevLdObject
+ else
+ return V;
+}
+
/// Given an intrinsic, checks if on the target the intrinsic will need to map
/// to a MemIntrinsicNode (touches memory). If this is the case, it returns
/// true and store the intrinsic information into the IntrinsicInfo that was
@@ -2288,6 +1822,32 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
MachineFunction &MF,
unsigned Intrinsic) const {
switch (Intrinsic) {
+ case Intrinsic::hexagon_L2_loadrd_pbr:
+ case Intrinsic::hexagon_L2_loadri_pbr:
+ case Intrinsic::hexagon_L2_loadrh_pbr:
+ case Intrinsic::hexagon_L2_loadruh_pbr:
+ case Intrinsic::hexagon_L2_loadrb_pbr:
+ case Intrinsic::hexagon_L2_loadrub_pbr: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
+ auto &Cont = I.getCalledFunction()->getParent()->getContext();
+ // The intrinsic function call is of the form { ElTy, i8* }
+ // @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type
+ // should be derived from ElTy.
+ PointerType *PtrTy = I.getCalledFunction()
+ ->getReturnType()
+ ->getContainedType(0)
+ ->getPointerTo();
+ Info.memVT = MVT::getVT(PtrTy->getElementType());
+ llvm::Value *BasePtrVal = I.getOperand(0);
+ Info.ptrVal = getUnderLyingObjectForBrevLdIntr(BasePtrVal);
+ // The offset value comes through Modifier register. For now, assume the
+ // offset is 0.
+ Info.offset = 0;
+ Info.align = DL.getABITypeAlignment(Info.memVT.getTypeForEVT(Cont));
+ Info.flags = MachineMemOperand::MOLoad;
+ return true;
+ }
case Intrinsic::hexagon_V6_vgathermw:
case Intrinsic::hexagon_V6_vgathermw_128B:
case Intrinsic::hexagon_V6_vgathermh:
@@ -2319,17 +1879,13 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
}
bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
- EVT MTy1 = EVT::getEVT(Ty1);
- EVT MTy2 = EVT::getEVT(Ty2);
- if (!MTy1.isSimple() || !MTy2.isSimple())
- return false;
- return (MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32);
+ return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
}
bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
if (!VT1.isSimple() || !VT2.isSimple())
return false;
- return (VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32);
+ return VT1.getSimpleVT() == MVT::i64 && VT2.getSimpleVT() == MVT::i32;
}
bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
@@ -2372,126 +1928,199 @@ HexagonTargetLowering::getPreferredVectorAction(EVT VT) const {
return TargetLoweringBase::TypeSplitVector;
}
+std::pair<SDValue, int>
+HexagonTargetLowering::getBaseAndOffset(SDValue Addr) const {
+ if (Addr.getOpcode() == ISD::ADD) {
+ SDValue Op1 = Addr.getOperand(1);
+ if (auto *CN = dyn_cast<const ConstantSDNode>(Op1.getNode()))
+ return { Addr.getOperand(0), CN->getSExtValue() };
+ }
+ return { Addr, 0 };
+}
+
// Lower a vector shuffle (V1, V2, V3). V1 and V2 are the two vectors
// to select data from, V3 is the permutation.
SDValue
HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
const {
- const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
- SDValue V1 = Op.getOperand(0);
- SDValue V2 = Op.getOperand(1);
- SDLoc dl(Op);
- EVT VT = Op.getValueType();
+ const auto *SVN = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> AM = SVN->getMask();
+ assert(AM.size() <= 8 && "Unexpected shuffle mask");
+ unsigned VecLen = AM.size();
- if (V2.isUndef())
- V2 = V1;
-
- if (SVN->isSplat()) {
- int Lane = SVN->getSplatIndex();
- if (Lane == -1) Lane = 0;
-
- // Test if V1 is a SCALAR_TO_VECTOR.
- if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
- return DAG.getNode(HexagonISD::VSPLAT, dl, VT, V1.getOperand(0));
-
- // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
- // (and probably will turn into a SCALAR_TO_VECTOR once legalization
- // reaches it).
- if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
- !isa<ConstantSDNode>(V1.getOperand(0))) {
- bool IsScalarToVector = true;
- for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) {
- if (!V1.getOperand(i).isUndef()) {
- IsScalarToVector = false;
- break;
- }
- }
- if (IsScalarToVector)
- return DAG.getNode(HexagonISD::VSPLAT, dl, VT, V1.getOperand(0));
+ MVT VecTy = ty(Op);
+ assert(!Subtarget.isHVXVectorType(VecTy, true) &&
+ "HVX shuffles should be legal");
+ assert(VecTy.getSizeInBits() <= 64 && "Unexpected vector length");
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ const SDLoc &dl(Op);
+
+ // If the inputs are not the same as the output, bail. This is not an
+ // error situation, but complicates the handling and the default expansion
+ // (into BUILD_VECTOR) should be adequate.
+ if (ty(Op0) != VecTy || ty(Op1) != VecTy)
+ return SDValue();
+
+ // Normalize the mask so that the first non-negative index comes from
+ // the first operand.
+ SmallVector<int,8> Mask(AM.begin(), AM.end());
+ unsigned F = llvm::find_if(AM, [](int M) { return M >= 0; }) - AM.data();
+ if (F == AM.size())
+ return DAG.getUNDEF(VecTy);
+ if (AM[F] >= int(VecLen)) {
+ ShuffleVectorSDNode::commuteMask(Mask);
+ std::swap(Op0, Op1);
+ }
+
+ // Express the shuffle mask in terms of bytes.
+ SmallVector<int,8> ByteMask;
+ unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / 8;
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+ int M = Mask[i];
+ if (M < 0) {
+ for (unsigned j = 0; j != ElemBytes; ++j)
+ ByteMask.push_back(-1);
+ } else {
+ for (unsigned j = 0; j != ElemBytes; ++j)
+ ByteMask.push_back(M*ElemBytes + j);
}
- return DAG.getNode(HexagonISD::VSPLAT, dl, VT,
- DAG.getConstant(Lane, dl, MVT::i32));
}
+ assert(ByteMask.size() <= 8);
+
+ // All non-undef (non-negative) indexes are well within [0..127], so they
+ // fit in a single byte. Build two 64-bit words:
+ // - MaskIdx where each byte is the corresponding index (for non-negative
+ // indexes), and 0xFF for negative indexes, and
+ // - MaskUnd that has 0xFF for each negative index.
+ uint64_t MaskIdx = 0;
+ uint64_t MaskUnd = 0;
+ for (unsigned i = 0, e = ByteMask.size(); i != e; ++i) {
+ unsigned S = 8*i;
+ uint64_t M = ByteMask[i] & 0xFF;
+ if (M == 0xFF)
+ MaskUnd |= M << S;
+ MaskIdx |= M << S;
+ }
+
+ if (ByteMask.size() == 4) {
+ // Identity.
+ if (MaskIdx == (0x03020100 | MaskUnd))
+ return Op0;
+ // Byte swap.
+ if (MaskIdx == (0x00010203 | MaskUnd)) {
+ SDValue T0 = DAG.getBitcast(MVT::i32, Op0);
+ SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i32, T0);
+ return DAG.getBitcast(VecTy, T1);
+ }
- // FIXME: We need to support more general vector shuffles. See
- // below the comment from the ARM backend that deals in the general
- // case with the vector shuffles. For now, let expand handle these.
- return SDValue();
+ // Byte packs.
+ SDValue Concat10 = DAG.getNode(HexagonISD::COMBINE, dl,
+ typeJoin({ty(Op1), ty(Op0)}), {Op1, Op0});
+ if (MaskIdx == (0x06040200 | MaskUnd))
+ return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat10}, DAG);
+ if (MaskIdx == (0x07050301 | MaskUnd))
+ return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat10}, DAG);
+
+ SDValue Concat01 = DAG.getNode(HexagonISD::COMBINE, dl,
+ typeJoin({ty(Op0), ty(Op1)}), {Op0, Op1});
+ if (MaskIdx == (0x02000604 | MaskUnd))
+ return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat01}, DAG);
+ if (MaskIdx == (0x03010705 | MaskUnd))
+ return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat01}, DAG);
+ }
+
+ if (ByteMask.size() == 8) {
+ // Identity.
+ if (MaskIdx == (0x0706050403020100ull | MaskUnd))
+ return Op0;
+ // Byte swap.
+ if (MaskIdx == (0x0001020304050607ull | MaskUnd)) {
+ SDValue T0 = DAG.getBitcast(MVT::i64, Op0);
+ SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i64, T0);
+ return DAG.getBitcast(VecTy, T1);
+ }
- // If the shuffle is not directly supported and it has 4 elements, use
- // the PerfectShuffle-generated table to synthesize it from other shuffles.
-}
+ // Halfword picks.
+ if (MaskIdx == (0x0d0c050409080100ull | MaskUnd))
+ return getInstr(Hexagon::S2_shuffeh, dl, VecTy, {Op1, Op0}, DAG);
+ if (MaskIdx == (0x0f0e07060b0a0302ull | MaskUnd))
+ return getInstr(Hexagon::S2_shuffoh, dl, VecTy, {Op1, Op0}, DAG);
+ if (MaskIdx == (0x0d0c090805040100ull | MaskUnd))
+ return getInstr(Hexagon::S2_vtrunewh, dl, VecTy, {Op1, Op0}, DAG);
+ if (MaskIdx == (0x0f0e0b0a07060302ull | MaskUnd))
+ return getInstr(Hexagon::S2_vtrunowh, dl, VecTy, {Op1, Op0}, DAG);
+ if (MaskIdx == (0x0706030205040100ull | MaskUnd)) {
+ VectorPair P = opSplit(Op0, dl, DAG);
+ return getInstr(Hexagon::S2_packhl, dl, VecTy, {P.second, P.first}, DAG);
+ }
-// If BUILD_VECTOR has same base element repeated several times,
-// report true.
-static bool isCommonSplatElement(BuildVectorSDNode *BVN) {
- unsigned NElts = BVN->getNumOperands();
- SDValue V0 = BVN->getOperand(0);
+ // Byte packs.
+ if (MaskIdx == (0x0e060c040a020800ull | MaskUnd))
+ return getInstr(Hexagon::S2_shuffeb, dl, VecTy, {Op1, Op0}, DAG);
+ if (MaskIdx == (0x0f070d050b030901ull | MaskUnd))
+ return getInstr(Hexagon::S2_shuffob, dl, VecTy, {Op1, Op0}, DAG);
+ }
- for (unsigned i = 1, e = NElts; i != e; ++i) {
- if (BVN->getOperand(i) != V0)
- return false;
+ return SDValue();
+}
+
+// Create a Hexagon-specific node for shifting a vector by an integer.
+SDValue
+HexagonTargetLowering::getVectorShiftByInt(SDValue Op, SelectionDAG &DAG)
+ const {
+ if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) {
+ if (SDValue S = BVN->getSplatValue()) {
+ unsigned NewOpc;
+ switch (Op.getOpcode()) {
+ case ISD::SHL:
+ NewOpc = HexagonISD::VASL;
+ break;
+ case ISD::SRA:
+ NewOpc = HexagonISD::VASR;
+ break;
+ case ISD::SRL:
+ NewOpc = HexagonISD::VLSR;
+ break;
+ default:
+ llvm_unreachable("Unexpected shift opcode");
+ }
+ return DAG.getNode(NewOpc, SDLoc(Op), ty(Op), Op.getOperand(0), S);
+ }
}
- return true;
+
+ return SDValue();
}
-// Lower a vector shift. Try to convert
-// <VT> = SHL/SRA/SRL <VT> by <VT> to Hexagon specific
-// <VT> = SHL/SRA/SRL <VT> by <IT/i32>.
SDValue
HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const {
- BuildVectorSDNode *BVN = nullptr;
- SDValue V1 = Op.getOperand(0);
- SDValue V2 = Op.getOperand(1);
- SDValue V3;
- SDLoc dl(Op);
- EVT VT = Op.getValueType();
+ return getVectorShiftByInt(Op, DAG);
+}
- if ((BVN = dyn_cast<BuildVectorSDNode>(V1.getNode())) &&
- isCommonSplatElement(BVN))
- V3 = V2;
- else if ((BVN = dyn_cast<BuildVectorSDNode>(V2.getNode())) &&
- isCommonSplatElement(BVN))
- V3 = V1;
- else
- return SDValue();
+SDValue
+HexagonTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
+ if (isa<ConstantSDNode>(Op.getOperand(1).getNode()))
+ return Op;
+ return SDValue();
+}
- SDValue CommonSplat = BVN->getOperand(0);
- SDValue Result;
+SDValue
+HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
+ MVT ResTy = ty(Op);
+ SDValue InpV = Op.getOperand(0);
+ MVT InpTy = ty(InpV);
+ assert(ResTy.getSizeInBits() == InpTy.getSizeInBits());
+ const SDLoc &dl(Op);
- if (VT.getSimpleVT() == MVT::v4i16) {
- switch (Op.getOpcode()) {
- case ISD::SRA:
- Result = DAG.getNode(HexagonISD::VASR, dl, VT, V3, CommonSplat);
- break;
- case ISD::SHL:
- Result = DAG.getNode(HexagonISD::VASL, dl, VT, V3, CommonSplat);
- break;
- case ISD::SRL:
- Result = DAG.getNode(HexagonISD::VLSR, dl, VT, V3, CommonSplat);
- break;
- default:
- return SDValue();
- }
- } else if (VT.getSimpleVT() == MVT::v2i32) {
- switch (Op.getOpcode()) {
- case ISD::SRA:
- Result = DAG.getNode(HexagonISD::VASR, dl, VT, V3, CommonSplat);
- break;
- case ISD::SHL:
- Result = DAG.getNode(HexagonISD::VASL, dl, VT, V3, CommonSplat);
- break;
- case ISD::SRL:
- Result = DAG.getNode(HexagonISD::VLSR, dl, VT, V3, CommonSplat);
- break;
- default:
- return SDValue();
- }
- } else {
- return SDValue();
+ // Handle conversion from i8 to v8i1.
+ if (ResTy == MVT::v8i1) {
+ SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV);
+ SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32);
+ return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG);
}
- return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ return SDValue();
}
bool
@@ -2509,9 +2138,10 @@ HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values,
Consts[i] = ConstantInt::get(IntTy, 0);
continue;
}
+ // Make sure to always cast to IntTy.
if (auto *CN = dyn_cast<ConstantSDNode>(V.getNode())) {
const ConstantInt *CI = CN->getConstantIntValue();
- Consts[i] = const_cast<ConstantInt*>(CI);
+ Consts[i] = ConstantInt::get(IntTy, CI->getValue().getSExtValue());
} else if (auto *CN = dyn_cast<ConstantFPSDNode>(V.getNode())) {
const ConstantFP *CF = CN->getConstantFPValue();
APInt A = CF->getValueAPF().bitcastToAPInt();
@@ -2550,8 +2180,8 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
Consts[1]->getZExtValue() << 16;
return DAG.getBitcast(MVT::v2i16, DAG.getConstant(V, dl, MVT::i32));
}
- SDValue N = getNode(Hexagon::A2_combine_ll, dl, MVT::i32,
- {Elem[1], Elem[0]}, DAG);
+ SDValue N = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32,
+ {Elem[1], Elem[0]}, DAG);
return DAG.getBitcast(MVT::v2i16, N);
}
@@ -2596,7 +2226,7 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
SDValue B0 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[0], T0});
SDValue B1 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[2], T1});
- SDValue R = getNode(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG);
+ SDValue R = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG);
return DAG.getBitcast(MVT::v4i8, R);
}
@@ -2651,7 +2281,7 @@ HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
uint64_t Mask = (ElemTy == MVT::i8) ? 0xFFull
: (ElemTy == MVT::i16) ? 0xFFFFull : 0xFFFFFFFFull;
for (unsigned i = 0; i != Num; ++i)
- Val = (Val << W) | (Consts[i]->getZExtValue() & Mask);
+ Val = (Val << W) | (Consts[Num-1-i]->getZExtValue() & Mask);
SDValue V0 = DAG.getConstant(Val, dl, MVT::i64);
return DAG.getBitcast(VecTy, V0);
}
@@ -2677,8 +2307,56 @@ HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
unsigned VecWidth = VecTy.getSizeInBits();
unsigned ValWidth = ValTy.getSizeInBits();
unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits();
- assert(VecWidth == 32 || VecWidth == 64);
assert((VecWidth % ElemWidth) == 0);
+ auto *IdxN = dyn_cast<ConstantSDNode>(IdxV);
+
+ // Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
+ // without any coprocessors).
+ if (ElemWidth == 1) {
+ assert(VecWidth == VecTy.getVectorNumElements() && "Sanity failure");
+ assert(VecWidth == 8 || VecWidth == 4 || VecWidth == 2);
+ // Check if this is an extract of the lowest bit.
+ if (IdxN) {
+ // Extracting the lowest bit is a no-op, but it changes the type,
+ // so it must be kept as an operation to avoid errors related to
+ // type mismatches.
+ if (IdxN->isNullValue() && ValTy.getSizeInBits() == 1)
+ return DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, VecV);
+ }
+
+ // If the value extracted is a single bit, use tstbit.
+ if (ValWidth == 1) {
+ SDValue A0 = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG);
+ SDValue M0 = DAG.getConstant(8 / VecWidth, dl, MVT::i32);
+ SDValue I0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, M0);
+ return DAG.getNode(HexagonISD::TSTBIT, dl, MVT::i1, A0, I0);
+ }
+
+ // Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in
+ // a predicate register. The elements of the vector are repeated
+ // in the register (if necessary) so that the total number is 8.
+ // The extracted subvector will need to be expanded in such a way.
+ unsigned Scale = VecWidth / ValWidth;
+
+ // Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
+ // position 0.
+ assert(ty(IdxV) == MVT::i32);
+ SDValue S0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
+ DAG.getConstant(8*Scale, dl, MVT::i32));
+ SDValue T0 = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
+ SDValue T1 = DAG.getNode(ISD::SRL, dl, MVT::i64, T0, S0);
+ while (Scale > 1) {
+ // The longest possible subvector is at most 32 bits, so it is always
+ // contained in the low subregister.
+ T1 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, T1);
+ T1 = expandPredicate(T1, dl, DAG);
+ Scale /= 2;
+ }
+
+ return DAG.getNode(HexagonISD::D2P, dl, ResTy, T1);
+ }
+
+ assert(VecWidth == 32 || VecWidth == 64);
// Cast everything to scalar integer types.
MVT ScalarTy = tyScalar(VecTy);
@@ -2687,8 +2365,8 @@ HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
SDValue ExtV;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(IdxV)) {
- unsigned Off = C->getZExtValue() * ElemWidth;
+ if (IdxN) {
+ unsigned Off = IdxN->getZExtValue() * ElemWidth;
if (VecWidth == 64 && ValWidth == 32) {
assert(Off == 0 || Off == 32);
unsigned SubIdx = Off == 0 ? Hexagon::isub_lo : Hexagon::isub_hi;
@@ -2707,11 +2385,8 @@ HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
DAG.getConstant(ElemWidth, dl, MVT::i32));
- // EXTRACTURP takes width/offset in a 64-bit pair.
- SDValue CombV = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
- {WidthV, OffV});
- ExtV = DAG.getNode(HexagonISD::EXTRACTURP, dl, ScalarTy,
- {VecV, CombV});
+ ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
+ {VecV, WidthV, OffV});
}
// Cast ExtV to the requested result type.
@@ -2725,6 +2400,33 @@ HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
const SDLoc &dl, MVT ValTy,
SelectionDAG &DAG) const {
MVT VecTy = ty(VecV);
+ if (VecTy.getVectorElementType() == MVT::i1) {
+ MVT ValTy = ty(ValV);
+ assert(ValTy.getVectorElementType() == MVT::i1);
+ SDValue ValR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, ValV);
+ unsigned VecLen = VecTy.getVectorNumElements();
+ unsigned Scale = VecLen / ValTy.getVectorNumElements();
+ assert(Scale > 1);
+
+ for (unsigned R = Scale; R > 1; R /= 2) {
+ ValR = contractPredicate(ValR, dl, DAG);
+ ValR = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
+ DAG.getUNDEF(MVT::i32), ValR);
+ }
+ // The longest possible subvector is at most 32 bits, so it is always
+ // contained in the low subregister.
+ ValR = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, ValR);
+
+ unsigned ValBytes = 64 / Scale;
+ SDValue Width = DAG.getConstant(ValBytes*8, dl, MVT::i32);
+ SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
+ DAG.getConstant(8, dl, MVT::i32));
+ SDValue VecR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
+ SDValue Ins = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
+ {VecR, ValR, Width, Idx});
+ return DAG.getNode(HexagonISD::D2P, dl, VecTy, Ins);
+ }
+
unsigned VecWidth = VecTy.getSizeInBits();
unsigned ValWidth = ValTy.getSizeInBits();
assert(VecWidth == 32 || VecWidth == 64);
@@ -2752,17 +2454,32 @@ HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
if (ty(IdxV) != MVT::i32)
IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, WidthV);
- // INSERTRP takes width/offset in a 64-bit pair.
- SDValue CombV = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
- {WidthV, OffV});
- InsV = DAG.getNode(HexagonISD::INSERTRP, dl, ScalarTy,
- {VecV, ValV, CombV});
+ InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
+ {VecV, ValV, WidthV, OffV});
}
return DAG.getNode(ISD::BITCAST, dl, VecTy, InsV);
}
SDValue
+HexagonTargetLowering::expandPredicate(SDValue Vec32, const SDLoc &dl,
+ SelectionDAG &DAG) const {
+ assert(ty(Vec32).getSizeInBits() == 32);
+ if (isUndef(Vec32))
+ return DAG.getUNDEF(MVT::i64);
+ return getInstr(Hexagon::S2_vsxtbh, dl, MVT::i64, {Vec32}, DAG);
+}
+
+SDValue
+HexagonTargetLowering::contractPredicate(SDValue Vec64, const SDLoc &dl,
+ SelectionDAG &DAG) const {
+ assert(ty(Vec64).getSizeInBits() == 64);
+ if (isUndef(Vec64))
+ return DAG.getUNDEF(MVT::i32);
+ return getInstr(Hexagon::S2_vtrunehb, dl, MVT::i32, {Vec64}, DAG);
+}
+
+SDValue
HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
const {
if (Ty.isVector()) {
@@ -2784,18 +2501,34 @@ SDValue
HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
MVT VecTy = ty(Op);
unsigned BW = VecTy.getSizeInBits();
-
- if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy, true))
- return LowerHvxBuildVector(Op, DAG);
-
- if (BW == 32 || BW == 64) {
- const SDLoc &dl(Op);
- SmallVector<SDValue,8> Ops;
- for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i)
- Ops.push_back(Op.getOperand(i));
- if (BW == 32)
- return buildVector32(Ops, dl, VecTy, DAG);
+ const SDLoc &dl(Op);
+ SmallVector<SDValue,8> Ops;
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i)
+ Ops.push_back(Op.getOperand(i));
+
+ if (BW == 32)
+ return buildVector32(Ops, dl, VecTy, DAG);
+ if (BW == 64)
return buildVector64(Ops, dl, VecTy, DAG);
+
+ if (VecTy == MVT::v8i1 || VecTy == MVT::v4i1 || VecTy == MVT::v2i1) {
+ // For each i1 element in the resulting predicate register, put 1
+ // shifted by the index of the element into a general-purpose register,
+ // then or them together and transfer it back into a predicate register.
+ SDValue Rs[8];
+ SDValue Z = getZero(dl, MVT::i32, DAG);
+ // Always produce 8 bits, repeat inputs if necessary.
+ unsigned Rep = 8 / VecTy.getVectorNumElements();
+ for (unsigned i = 0; i != 8; ++i) {
+ SDValue S = DAG.getConstant(1ull << i, dl, MVT::i32);
+ Rs[i] = DAG.getSelect(dl, MVT::i32, Ops[i/Rep], S, Z);
+ }
+ for (ArrayRef<SDValue> A(Rs); A.size() != 1; A = A.drop_back(A.size()/2)) {
+ for (unsigned i = 0, e = A.size()/2; i != e; ++i)
+ Rs[i] = DAG.getNode(ISD::OR, dl, MVT::i32, Rs[2*i], Rs[2*i+1]);
+ }
+ // Move the value directly to a predicate register.
+ return getInstr(Hexagon::C2_tfrrp, dl, VecTy, {Rs[0]}, DAG);
}
return SDValue();
@@ -2805,14 +2538,64 @@ SDValue
HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
SelectionDAG &DAG) const {
MVT VecTy = ty(Op);
- assert(!Subtarget.useHVXOps() || !Subtarget.isHVXVectorType(VecTy));
-
+ const SDLoc &dl(Op);
if (VecTy.getSizeInBits() == 64) {
assert(Op.getNumOperands() == 2);
- return DAG.getNode(HexagonISD::COMBINE, SDLoc(Op), VecTy, Op.getOperand(1),
+ return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, Op.getOperand(1),
Op.getOperand(0));
}
+ MVT ElemTy = VecTy.getVectorElementType();
+ if (ElemTy == MVT::i1) {
+ assert(VecTy == MVT::v2i1 || VecTy == MVT::v4i1 || VecTy == MVT::v8i1);
+ MVT OpTy = ty(Op.getOperand(0));
+ // Scale is how many times the operands need to be contracted to match
+ // the representation in the target register.
+ unsigned Scale = VecTy.getVectorNumElements() / OpTy.getVectorNumElements();
+ assert(Scale == Op.getNumOperands() && Scale > 1);
+
+ // First, convert all bool vectors to integers, then generate pairwise
+ // inserts to form values of doubled length. Up until there are only
+ // two values left to concatenate, all of these values will fit in a
+ // 32-bit integer, so keep them as i32 to use 32-bit inserts.
+ SmallVector<SDValue,4> Words[2];
+ unsigned IdxW = 0;
+
+ for (SDValue P : Op.getNode()->op_values()) {
+ SDValue W = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, P);
+ for (unsigned R = Scale; R > 1; R /= 2) {
+ W = contractPredicate(W, dl, DAG);
+ W = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
+ DAG.getUNDEF(MVT::i32), W);
+ }
+ W = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, W);
+ Words[IdxW].push_back(W);
+ }
+
+ while (Scale > 2) {
+ SDValue WidthV = DAG.getConstant(64 / Scale, dl, MVT::i32);
+ Words[IdxW ^ 1].clear();
+
+ for (unsigned i = 0, e = Words[IdxW].size(); i != e; i += 2) {
+ SDValue W0 = Words[IdxW][i], W1 = Words[IdxW][i+1];
+ // Insert W1 into W0 right next to the significant bits of W0.
+ SDValue T = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
+ {W0, W1, WidthV, WidthV});
+ Words[IdxW ^ 1].push_back(T);
+ }
+ IdxW ^= 1;
+ Scale /= 2;
+ }
+
+ // Another sanity check. At this point there should only be two words
+ // left, and Scale should be 2.
+ assert(Scale == 2 && Words[IdxW].size() == 2);
+
+ SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
+ Words[IdxW][1], Words[IdxW][0]);
+ return DAG.getNode(HexagonISD::D2P, dl, VecTy, WW);
+ }
+
return SDValue();
}
@@ -2820,10 +2603,6 @@ SDValue
HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
SDValue Vec = Op.getOperand(0);
- MVT VecTy = ty(Vec);
- if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy))
- return LowerHvxExtractElement(Op, DAG);
-
MVT ElemTy = ty(Vec).getVectorElementType();
return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ElemTy, ty(Op), DAG);
}
@@ -2831,31 +2610,20 @@ HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SDValue
HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
- SDValue Vec = Op.getOperand(0);
- MVT VecTy = ty(Vec);
- if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy))
- return LowerHvxExtractSubvector(Op, DAG);
-
- return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ty(Op), ty(Op), DAG);
+ return extractVector(Op.getOperand(0), Op.getOperand(1), SDLoc(Op),
+ ty(Op), ty(Op), DAG);
}
SDValue
HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
- MVT VecTy = ty(Op);
- if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy))
- return LowerHvxInsertElement(Op, DAG);
-
return insertVector(Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
- SDLoc(Op), VecTy.getVectorElementType(), DAG);
+ SDLoc(Op), ty(Op).getVectorElementType(), DAG);
}
SDValue
HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
- if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(ty(Op)))
- return LowerHvxInsertSubvector(Op, DAG);
-
SDValue ValV = Op.getOperand(1);
return insertVector(Op.getOperand(0), ValV, Op.getOperand(2),
SDLoc(Op), ty(ValV), DAG);
@@ -2875,6 +2643,109 @@ HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
}
SDValue
+HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
+ const {
+ LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
+ unsigned HaveAlign = LN->getAlignment();
+ MVT LoadTy = ty(Op);
+ unsigned NeedAlign = Subtarget.getTypeAlignment(LoadTy);
+ if (HaveAlign >= NeedAlign)
+ return Op;
+
+ const SDLoc &dl(Op);
+ const DataLayout &DL = DAG.getDataLayout();
+ LLVMContext &Ctx = *DAG.getContext();
+ unsigned AS = LN->getAddressSpace();
+
+ // If the load aligning is disabled or the load can be broken up into two
+ // smaller legal loads, do the default (target-independent) expansion.
+ bool DoDefault = false;
+ // Handle it in the default way if this is an indexed load.
+ if (!LN->isUnindexed())
+ DoDefault = true;
+
+ if (!AlignLoads) {
+ if (allowsMemoryAccess(Ctx, DL, LN->getMemoryVT(), AS, HaveAlign))
+ return Op;
+ DoDefault = true;
+ }
+ if (!DoDefault && 2*HaveAlign == NeedAlign) {
+ // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
+ MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8*HaveAlign)
+ : MVT::getVectorVT(MVT::i8, HaveAlign);
+ DoDefault = allowsMemoryAccess(Ctx, DL, PartTy, AS, HaveAlign);
+ }
+ if (DoDefault) {
+ std::pair<SDValue, SDValue> P = expandUnalignedLoad(LN, DAG);
+ return DAG.getMergeValues({P.first, P.second}, dl);
+ }
+
+ // The code below generates two loads, both aligned as NeedAlign, and
+ // with the distance of NeedAlign between them. For that to cover the
+ // bits that need to be loaded (and without overlapping), the size of
+ // the loads should be equal to NeedAlign. This is true for all loadable
+ // types, but add an assertion in case something changes in the future.
+ assert(LoadTy.getSizeInBits() == 8*NeedAlign);
+
+ unsigned LoadLen = NeedAlign;
+ SDValue Base = LN->getBasePtr();
+ SDValue Chain = LN->getChain();
+ auto BO = getBaseAndOffset(Base);
+ unsigned BaseOpc = BO.first.getOpcode();
+ if (BaseOpc == HexagonISD::VALIGNADDR && BO.second % LoadLen == 0)
+ return Op;
+
+ if (BO.second % LoadLen != 0) {
+ BO.first = DAG.getNode(ISD::ADD, dl, MVT::i32, BO.first,
+ DAG.getConstant(BO.second % LoadLen, dl, MVT::i32));
+ BO.second -= BO.second % LoadLen;
+ }
+ SDValue BaseNoOff = (BaseOpc != HexagonISD::VALIGNADDR)
+ ? DAG.getNode(HexagonISD::VALIGNADDR, dl, MVT::i32, BO.first,
+ DAG.getConstant(NeedAlign, dl, MVT::i32))
+ : BO.first;
+ SDValue Base0 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second, dl);
+ SDValue Base1 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second+LoadLen, dl);
+
+ MachineMemOperand *WideMMO = nullptr;
+ if (MachineMemOperand *MMO = LN->getMemOperand()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ WideMMO = MF.getMachineMemOperand(MMO->getPointerInfo(), MMO->getFlags(),
+ 2*LoadLen, LoadLen, MMO->getAAInfo(), MMO->getRanges(),
+ MMO->getSyncScopeID(), MMO->getOrdering(),
+ MMO->getFailureOrdering());
+ }
+
+ SDValue Load0 = DAG.getLoad(LoadTy, dl, Chain, Base0, WideMMO);
+ SDValue Load1 = DAG.getLoad(LoadTy, dl, Chain, Base1, WideMMO);
+
+ SDValue Aligned = DAG.getNode(HexagonISD::VALIGN, dl, LoadTy,
+ {Load1, Load0, BaseNoOff.getOperand(0)});
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Load0.getValue(1), Load1.getValue(1));
+ SDValue M = DAG.getMergeValues({Aligned, NewChain}, dl);
+ return M;
+}
+
+SDValue
+HexagonTargetLowering::LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const {
+ const SDLoc &dl(Op);
+ unsigned Opc = Op.getOpcode();
+ SDValue X = Op.getOperand(0), Y = Op.getOperand(1), C = Op.getOperand(2);
+
+ if (Opc == ISD::ADDCARRY)
+ return DAG.getNode(HexagonISD::ADDC, dl, Op.getNode()->getVTList(),
+ { X, Y, C });
+
+ EVT CarryTy = C.getValueType();
+ SDValue SubC = DAG.getNode(HexagonISD::SUBC, dl, Op.getNode()->getVTList(),
+ { X, Y, DAG.getLogicalNOT(dl, C, CarryTy) });
+ SDValue Out[] = { SubC.getValue(0),
+ DAG.getLogicalNOT(dl, SubC.getValue(1), CarryTy) };
+ return DAG.getMergeValues(Out, dl);
+}
+
+SDValue
HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Offset = Op.getOperand(1);
@@ -2904,6 +2775,17 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
unsigned Opc = Op.getOpcode();
+
+ // Handle INLINEASM first.
+ if (Opc == ISD::INLINEASM)
+ return LowerINLINEASM(Op, DAG);
+
+ if (isHvxOperation(Op)) {
+ // If HVX lowering returns nothing, try the default lowering.
+ if (SDValue V = LowerHvxOperation(Op, DAG))
+ return V;
+ }
+
switch (Opc) {
default:
#ifndef NDEBUG
@@ -2919,13 +2801,17 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::BITCAST: return LowerBITCAST(Op, DAG);
+ case ISD::LOAD: return LowerUnalignedLoad(Op, DAG);
+ case ISD::ADDCARRY:
+ case ISD::SUBCARRY: return LowerAddSubCarry(Op, DAG);
case ISD::SRA:
case ISD::SHL:
case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG);
+ case ISD::ROTL: return LowerROTL(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
- // Frame & Return address. Currently unimplemented.
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
@@ -2939,17 +2825,35 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VSELECT: return LowerVSELECT(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
- case ISD::INLINEASM: return LowerINLINEASM(Op, DAG);
case ISD::PREFETCH: return LowerPREFETCH(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
- case ISD::MUL:
- if (Subtarget.useHVXOps())
- return LowerHvxMul(Op, DAG);
break;
}
+
return SDValue();
}
+void
+HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ const SDLoc &dl(N);
+ switch (N->getOpcode()) {
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::SHL:
+ return;
+ case ISD::BITCAST:
+ // Handle a bitcast from v8i1 to i8.
+ if (N->getValueType(0) == MVT::i8) {
+ SDValue P = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32,
+ N->getOperand(0), DAG);
+ Results.push_back(P);
+ }
+ break;
+ }
+}
+
/// Returns relocation base for the given PIC jumptable.
SDValue
HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
@@ -3023,7 +2927,7 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
case 512:
return {0u, &Hexagon::HvxVRRegClass};
case 1024:
- if (Subtarget.hasV60TOps() && Subtarget.useHVX128BOps())
+ if (Subtarget.hasV60Ops() && Subtarget.useHVX128BOps())
return {0u, &Hexagon::HvxVRRegClass};
return {0u, &Hexagon::HvxWRRegClass};
case 2048:
@@ -3042,7 +2946,7 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
- return Subtarget.hasV5TOps();
+ return Subtarget.hasV5Ops();
}
/// isLegalAddressingMode - Return true if the addressing mode represented by
@@ -3104,9 +3008,9 @@ bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
SDValue Callee,
CallingConv::ID CalleeCC,
- bool isVarArg,
- bool isCalleeStructRet,
- bool isCallerStructRet,
+ bool IsVarArg,
+ bool IsCalleeStructRet,
+ bool IsCallerStructRet,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -3137,12 +3041,12 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
}
// Do not tail call optimize vararg calls.
- if (isVarArg)
+ if (IsVarArg)
return false;
// Also avoid tail call optimization if either caller or callee uses struct
// return semantics.
- if (isCalleeStructRet || isCallerStructRet)
+ if (IsCalleeStructRet || IsCallerStructRet)
return false;
// In addition to the cases above, we also disable Tail Call Optimization if
@@ -3185,54 +3089,25 @@ bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
unsigned AS, unsigned Align, bool *Fast) const {
if (Fast)
*Fast = false;
-
- switch (VT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::v64i8:
- case MVT::v128i8:
- case MVT::v256i8:
- case MVT::v32i16:
- case MVT::v64i16:
- case MVT::v128i16:
- case MVT::v16i32:
- case MVT::v32i32:
- case MVT::v64i32:
- return true;
- }
- return false;
+ return Subtarget.isHVXVectorType(VT.getSimpleVT());
}
std::pair<const TargetRegisterClass*, uint8_t>
HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
MVT VT) const {
- const TargetRegisterClass *RRC = nullptr;
+ if (Subtarget.isHVXVectorType(VT, true)) {
+ unsigned BitWidth = VT.getSizeInBits();
+ unsigned VecWidth = Subtarget.getVectorLength() * 8;
- uint8_t Cost = 1;
- switch (VT.SimpleTy) {
- default:
- return TargetLowering::findRepresentativeClass(TRI, VT);
- case MVT::v64i8:
- case MVT::v32i16:
- case MVT::v16i32:
- RRC = &Hexagon::HvxVRRegClass;
- break;
- case MVT::v128i8:
- case MVT::v64i16:
- case MVT::v32i32:
- if (Subtarget.hasV60TOps() && Subtarget.useHVXOps() &&
- Subtarget.useHVX128BOps())
- RRC = &Hexagon::HvxVRRegClass;
- else
- RRC = &Hexagon::HvxWRRegClass;
- break;
- case MVT::v256i8:
- case MVT::v128i16:
- case MVT::v64i32:
- RRC = &Hexagon::HvxWRRegClass;
- break;
+ if (VT.getVectorElementType() == MVT::i1)
+ return std::make_pair(&Hexagon::HvxQRRegClass, 1);
+ if (BitWidth == VecWidth)
+ return std::make_pair(&Hexagon::HvxVRRegClass, 1);
+ assert(BitWidth == 2 * VecWidth);
+ return std::make_pair(&Hexagon::HvxWRRegClass, 1);
}
- return std::make_pair(RRC, Cost);
+
+ return TargetLowering::findRepresentativeClass(TRI, VT);
}
Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 732834b464b4..3d94bd1ff6ed 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -18,12 +18,12 @@
#include "Hexagon.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/Support/MachineValueType.h"
#include <cstdint>
#include <utility>
@@ -36,6 +36,8 @@ namespace HexagonISD {
CONST32 = OP_BEGIN,
CONST32_GP, // For marking data present in GP.
+ ADDC, // Add with carry: (X, Y, Cin) -> (X+Y, Cout).
+ SUBC, // Sub with carry: (X, Y, Cin) -> (X+~Y+Cin, Cout).
ALLOCA,
AT_GOT, // Index in GOT.
@@ -51,18 +53,15 @@ namespace HexagonISD {
CP, // Constant pool.
COMBINE,
- VSPLAT,
+ VSPLAT, // Generic splat, selection depends on argument/return
+ // types.
VASL,
VASR,
VLSR,
+ TSTBIT,
INSERT,
- INSERTRP,
EXTRACTU,
- EXTRACTURP,
- VCOMBINE,
- VPACKE,
- VPACKO,
VEXTRACTW,
VINSERTW0,
VROR,
@@ -70,8 +69,24 @@ namespace HexagonISD {
EH_RETURN,
DCFETCH,
READCYCLE,
+ D2P, // Convert 8-byte value to 8-bit predicate register. [*]
+ P2D, // Convert 8-bit predicate register to 8-byte value. [*]
+ V2Q, // Convert HVX vector to a vector predicate reg. [*]
+ Q2V, // Convert vector predicate to an HVX vector. [*]
+ // [*] The equivalence is defined as "Q <=> (V != 0)",
+ // where the != operation compares bytes.
+ // Note: V != 0 is implemented as V >u 0.
+ QCAT,
+ QTRUE,
+ QFALSE,
VZERO,
-
+ VSPLATW, // HVX splat of a 32-bit word with an arbitrary result type.
+ TYPECAST, // No-op that's used to convert between different legal
+ // types in a register.
+ VALIGN, // Align two vectors (in Op0, Op1) to one that would have
+ // been loaded from address in Op2.
+ VALIGNADDR, // Align vector address: Op0 & -Op1, except when it is
+ // an address in a vector load, then it's a no-op.
OP_END
};
@@ -110,6 +125,10 @@ namespace HexagonISD {
bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
bool isTruncateFree(EVT VT1, EVT VT2) const override;
+ bool isCheapToSpeculateCttz() const override { return true; }
+ bool isCheapToSpeculateCtlz() const override { return true; }
+ bool isCtlzFast() const override { return true; }
+
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
/// Return true if an FMA operation is faster than a pair of mul and add
@@ -127,6 +146,9 @@ namespace HexagonISD {
const override;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
+
const char *getTargetNodeName(unsigned Opcode) const override;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
@@ -137,6 +159,13 @@ namespace HexagonISD {
SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
@@ -284,6 +313,9 @@ namespace HexagonISD {
}
private:
+ void initializeHVXLowering();
+ std::pair<SDValue,int> getBaseAndOffset(SDValue Addr) const;
+
bool getBuildVectorConstInts(ArrayRef<SDValue> Values, MVT VecTy,
SelectionDAG &DAG,
MutableArrayRef<ConstantInt*> Consts) const;
@@ -295,13 +327,19 @@ namespace HexagonISD {
MVT ValTy, MVT ResTy, SelectionDAG &DAG) const;
SDValue insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
const SDLoc &dl, MVT ValTy, SelectionDAG &DAG) const;
+ SDValue expandPredicate(SDValue Vec32, const SDLoc &dl,
+ SelectionDAG &DAG) const;
+ SDValue contractPredicate(SDValue Vec64, const SDLoc &dl,
+ SelectionDAG &DAG) const;
+ SDValue getVectorShiftByInt(SDValue Op, SelectionDAG &DAG) const;
+
bool isUndef(SDValue Op) const {
if (Op.isMachineOpcode())
return Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
return Op.getOpcode() == ISD::UNDEF;
}
- SDValue getNode(unsigned MachineOpc, const SDLoc &dl, MVT Ty,
- ArrayRef<SDValue> Ops, SelectionDAG &DAG) const {
+ SDValue getInstr(unsigned MachineOpc, const SDLoc &dl, MVT Ty,
+ ArrayRef<SDValue> Ops, SelectionDAG &DAG) const {
SDNode *N = DAG.getMachineNode(MachineOpc, dl, Ty, Ops);
return SDValue(N, 0);
}
@@ -328,7 +366,8 @@ namespace HexagonISD {
MVT tyVector(MVT Ty, MVT ElemTy) const {
if (Ty.isVector() && Ty.getVectorElementType() == ElemTy)
return Ty;
- unsigned TyWidth = Ty.getSizeInBits(), ElemWidth = ElemTy.getSizeInBits();
+ unsigned TyWidth = Ty.getSizeInBits();
+ unsigned ElemWidth = ElemTy.getSizeInBits();
assert((TyWidth % ElemWidth) == 0);
return MVT::getVectorVT(ElemTy, TyWidth/ElemWidth);
}
@@ -343,31 +382,66 @@ namespace HexagonISD {
VectorPair opSplit(SDValue Vec, const SDLoc &dl, SelectionDAG &DAG) const;
SDValue opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const;
+ bool isHvxSingleTy(MVT Ty) const;
+ bool isHvxPairTy(MVT Ty) const;
SDValue convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
SelectionDAG &DAG) const;
SDValue getIndexInWord32(SDValue Idx, MVT ElemTy, SelectionDAG &DAG) const;
SDValue getByteShuffle(const SDLoc &dl, SDValue Op0, SDValue Op1,
ArrayRef<int> Mask, SelectionDAG &DAG) const;
- MVT getVecBoolVT() const;
-
- SDValue buildHvxVectorSingle(ArrayRef<SDValue> Values, const SDLoc &dl,
- MVT VecTy, SelectionDAG &DAG) const;
+ SDValue buildHvxVectorReg(ArrayRef<SDValue> Values, const SDLoc &dl,
+ MVT VecTy, SelectionDAG &DAG) const;
SDValue buildHvxVectorPred(ArrayRef<SDValue> Values, const SDLoc &dl,
MVT VecTy, SelectionDAG &DAG) const;
+ SDValue createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
+ unsigned BitBytes, bool ZeroFill,
+ SelectionDAG &DAG) const;
+ SDValue extractHvxElementReg(SDValue VecV, SDValue IdxV, const SDLoc &dl,
+ MVT ResTy, SelectionDAG &DAG) const;
+ SDValue extractHvxElementPred(SDValue VecV, SDValue IdxV, const SDLoc &dl,
+ MVT ResTy, SelectionDAG &DAG) const;
+ SDValue insertHvxElementReg(SDValue VecV, SDValue IdxV, SDValue ValV,
+ const SDLoc &dl, SelectionDAG &DAG) const;
+ SDValue insertHvxElementPred(SDValue VecV, SDValue IdxV, SDValue ValV,
+ const SDLoc &dl, SelectionDAG &DAG) const;
+ SDValue extractHvxSubvectorReg(SDValue VecV, SDValue IdxV, const SDLoc &dl,
+ MVT ResTy, SelectionDAG &DAG) const;
+ SDValue extractHvxSubvectorPred(SDValue VecV, SDValue IdxV, const SDLoc &dl,
+ MVT ResTy, SelectionDAG &DAG) const;
+ SDValue insertHvxSubvectorReg(SDValue VecV, SDValue SubV, SDValue IdxV,
+ const SDLoc &dl, SelectionDAG &DAG) const;
+ SDValue insertHvxSubvectorPred(SDValue VecV, SDValue SubV, SDValue IdxV,
+ const SDLoc &dl, SelectionDAG &DAG) const;
+ SDValue extendHvxVectorPred(SDValue VecV, const SDLoc &dl, MVT ResTy,
+ bool ZeroExt, SelectionDAG &DAG) const;
SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxMul(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const;
std::pair<const TargetRegisterClass*, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT)
const override;
+
+ bool isHvxOperation(SDValue Op) const;
+ SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const;
};
} // end namespace llvm
diff --git a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 51480d09d734..2566194ca9c6 100644
--- a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -10,9 +10,192 @@
#include "HexagonISelLowering.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
+static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
+static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
+static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
+
+
+void
+HexagonTargetLowering::initializeHVXLowering() {
+ if (Subtarget.useHVX64BOps()) {
+ addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass);
+ addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
+ addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
+ addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
+ addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
+ addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
+ // These "short" boolean vector types should be legal because
+ // they will appear as results of vector compares. If they were
+ // not legal, type legalization would try to make them legal
+ // and that would require using operations that do not use or
+ // produce such types. That, in turn, would imply using custom
+ // nodes, which would be unoptimizable by the DAG combiner.
+ // The idea is to rely on target-independent operations as much
+ // as possible.
+ addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
+ addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
+ addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
+ addRegisterClass(MVT::v512i1, &Hexagon::HvxQRRegClass);
+ } else if (Subtarget.useHVX128BOps()) {
+ addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
+ addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
+ addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass);
+ addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass);
+ addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
+ addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
+ addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
+ addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
+ addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
+ addRegisterClass(MVT::v1024i1, &Hexagon::HvxQRRegClass);
+ }
+
+ // Set up operation actions.
+
+ bool Use64b = Subtarget.useHVX64BOps();
+ ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
+ ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
+ MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
+ MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
+
+ auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
+ setOperationAction(Opc, FromTy, Promote);
+ AddPromotedToType(Opc, FromTy, ToTy);
+ };
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal);
+ setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
+
+ for (MVT T : LegalV) {
+ setIndexedLoadAction(ISD::POST_INC, T, Legal);
+ setIndexedStoreAction(ISD::POST_INC, T, Legal);
+
+ setOperationAction(ISD::AND, T, Legal);
+ setOperationAction(ISD::OR, T, Legal);
+ setOperationAction(ISD::XOR, T, Legal);
+ setOperationAction(ISD::ADD, T, Legal);
+ setOperationAction(ISD::SUB, T, Legal);
+ setOperationAction(ISD::CTPOP, T, Legal);
+ setOperationAction(ISD::CTLZ, T, Legal);
+ if (T != ByteV) {
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal);
+ setOperationAction(ISD::BSWAP, T, Legal);
+ }
+
+ setOperationAction(ISD::CTTZ, T, Custom);
+ setOperationAction(ISD::LOAD, T, Custom);
+ setOperationAction(ISD::MUL, T, Custom);
+ setOperationAction(ISD::MULHS, T, Custom);
+ setOperationAction(ISD::MULHU, T, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, T, Custom);
+ // Make concat-vectors custom to handle concats of more than 2 vectors.
+ setOperationAction(ISD::CONCAT_VECTORS, T, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
+ setOperationAction(ISD::ANY_EXTEND, T, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, T, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, T, Custom);
+ if (T != ByteV) {
+ setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom);
+ // HVX only has shifts of words and halfwords.
+ setOperationAction(ISD::SRA, T, Custom);
+ setOperationAction(ISD::SHL, T, Custom);
+ setOperationAction(ISD::SRL, T, Custom);
+
+ // Promote all shuffles to operate on vectors of bytes.
+ setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
+ }
+
+ setCondCodeAction(ISD::SETNE, T, Expand);
+ setCondCodeAction(ISD::SETLE, T, Expand);
+ setCondCodeAction(ISD::SETGE, T, Expand);
+ setCondCodeAction(ISD::SETLT, T, Expand);
+ setCondCodeAction(ISD::SETULE, T, Expand);
+ setCondCodeAction(ISD::SETUGE, T, Expand);
+ setCondCodeAction(ISD::SETULT, T, Expand);
+ }
+
+ for (MVT T : LegalW) {
+ // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
+ // independent) handling of it would convert it to a load, which is
+ // not always the optimal choice.
+ setOperationAction(ISD::BUILD_VECTOR, T, Custom);
+ // Make concat-vectors custom to handle concats of more than 2 vectors.
+ setOperationAction(ISD::CONCAT_VECTORS, T, Custom);
+
+ // Custom-lower these operations for pairs. Expand them into a concat
+ // of the corresponding operations on individual vectors.
+ setOperationAction(ISD::ANY_EXTEND, T, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, T, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, T, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, T, Custom);
+ setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal);
+
+ setOperationAction(ISD::LOAD, T, Custom);
+ setOperationAction(ISD::STORE, T, Custom);
+ setOperationAction(ISD::CTLZ, T, Custom);
+ setOperationAction(ISD::CTTZ, T, Custom);
+ setOperationAction(ISD::CTPOP, T, Custom);
+
+ setOperationAction(ISD::ADD, T, Legal);
+ setOperationAction(ISD::SUB, T, Legal);
+ setOperationAction(ISD::MUL, T, Custom);
+ setOperationAction(ISD::MULHS, T, Custom);
+ setOperationAction(ISD::MULHU, T, Custom);
+ setOperationAction(ISD::AND, T, Custom);
+ setOperationAction(ISD::OR, T, Custom);
+ setOperationAction(ISD::XOR, T, Custom);
+ setOperationAction(ISD::SETCC, T, Custom);
+ setOperationAction(ISD::VSELECT, T, Custom);
+ if (T != ByteW) {
+ setOperationAction(ISD::SRA, T, Custom);
+ setOperationAction(ISD::SHL, T, Custom);
+ setOperationAction(ISD::SRL, T, Custom);
+
+ // Promote all shuffles to operate on vectors of bytes.
+ setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
+ }
+ }
+
+ // Boolean vectors.
+
+ for (MVT T : LegalW) {
+ // Boolean types for vector pairs will overlap with the boolean
+ // types for single vectors, e.g.
+ // v64i8 -> v64i1 (single)
+ // v64i16 -> v64i1 (pair)
+ // Set these actions first, and allow the single actions to overwrite
+ // any duplicates.
+ MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
+ setOperationAction(ISD::SETCC, BoolW, Custom);
+ setOperationAction(ISD::AND, BoolW, Custom);
+ setOperationAction(ISD::OR, BoolW, Custom);
+ setOperationAction(ISD::XOR, BoolW, Custom);
+ }
+
+ for (MVT T : LegalV) {
+ MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
+ setOperationAction(ISD::BUILD_VECTOR, BoolV, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, BoolV, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, BoolV, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, BoolV, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, BoolV, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, BoolV, Custom);
+ setOperationAction(ISD::AND, BoolV, Legal);
+ setOperationAction(ISD::OR, BoolV, Legal);
+ setOperationAction(ISD::XOR, BoolV, Legal);
+ }
+}
+
SDValue
HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
const SDLoc &dl, SelectionDAG &DAG) const {
@@ -75,9 +258,23 @@ HexagonTargetLowering::VectorPair
HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
SelectionDAG &DAG) const {
TypePair Tys = typeSplit(ty(Vec));
+ if (Vec.getOpcode() == HexagonISD::QCAT)
+ return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
}
+bool
+HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
+ return Subtarget.isHVXVectorType(Ty) &&
+ Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
+}
+
+bool
+HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
+ return Subtarget.isHVXVectorType(Ty) &&
+ Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
+}
+
SDValue
HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
SelectionDAG &DAG) const {
@@ -141,36 +338,16 @@ HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
opCastElem(Op1, MVT::i8, DAG), ByteMask);
}
-MVT
-HexagonTargetLowering::getVecBoolVT() const {
- return MVT::getVectorVT(MVT::i1, 8*Subtarget.getVectorLength());
-}
-
SDValue
-HexagonTargetLowering::buildHvxVectorSingle(ArrayRef<SDValue> Values,
- const SDLoc &dl, MVT VecTy,
- SelectionDAG &DAG) const {
+HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
+ const SDLoc &dl, MVT VecTy,
+ SelectionDAG &DAG) const {
unsigned VecLen = Values.size();
MachineFunction &MF = DAG.getMachineFunction();
MVT ElemTy = VecTy.getVectorElementType();
unsigned ElemWidth = ElemTy.getSizeInBits();
unsigned HwLen = Subtarget.getVectorLength();
- SmallVector<ConstantInt*, 128> Consts(VecLen);
- bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
- if (AllConst) {
- if (llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
- return getZero(dl, VecTy, DAG);
-
- ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
- (Constant**)Consts.end());
- Constant *CV = ConstantVector::get(Tmp);
- unsigned Align = HwLen;
- SDValue CP = LowerConstantPool(DAG.getConstantPool(CV, VecTy, Align), DAG);
- return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
- MachinePointerInfo::getConstantPool(MF), Align);
- }
-
unsigned ElemSize = ElemWidth / 8;
assert(ElemSize*VecLen == HwLen);
SmallVector<SDValue,32> Words;
@@ -187,12 +364,47 @@ HexagonTargetLowering::buildHvxVectorSingle(ArrayRef<SDValue> Values,
Words.assign(Values.begin(), Values.end());
}
+ unsigned NumWords = Words.size();
+ bool IsSplat = true, IsUndef = true;
+ SDValue SplatV;
+ for (unsigned i = 0; i != NumWords && IsSplat; ++i) {
+ if (isUndef(Words[i]))
+ continue;
+ IsUndef = false;
+ if (!SplatV.getNode())
+ SplatV = Words[i];
+ else if (SplatV != Words[i])
+ IsSplat = false;
+ }
+ if (IsUndef)
+ return DAG.getUNDEF(VecTy);
+ if (IsSplat) {
+ assert(SplatV.getNode());
+ auto *IdxN = dyn_cast<ConstantSDNode>(SplatV.getNode());
+ if (IdxN && IdxN->isNullValue())
+ return getZero(dl, VecTy, DAG);
+ return DAG.getNode(HexagonISD::VSPLATW, dl, VecTy, SplatV);
+ }
+
+ // Delay recognizing constant vectors until here, so that we can generate
+ // a vsplat.
+ SmallVector<ConstantInt*, 128> Consts(VecLen);
+ bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
+ if (AllConst) {
+ ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
+ (Constant**)Consts.end());
+ Constant *CV = ConstantVector::get(Tmp);
+ unsigned Align = HwLen;
+ SDValue CP = LowerConstantPool(DAG.getConstantPool(CV, VecTy, Align), DAG);
+ return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(MF), Align);
+ }
+
// Construct two halves in parallel, then or them together.
assert(4*Words.size() == Subtarget.getVectorLength());
- SDValue HalfV0 = getNode(Hexagon::V6_vd0, dl, VecTy, {}, DAG);
- SDValue HalfV1 = getNode(Hexagon::V6_vd0, dl, VecTy, {}, DAG);
+ SDValue HalfV0 = getInstr(Hexagon::V6_vd0, dl, VecTy, {}, DAG);
+ SDValue HalfV1 = getInstr(Hexagon::V6_vd0, dl, VecTy, {}, DAG);
SDValue S = DAG.getConstant(4, dl, MVT::i32);
- unsigned NumWords = Words.size();
for (unsigned i = 0; i != NumWords/2; ++i) {
SDValue N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
{HalfV0, Words[i]});
@@ -209,6 +421,95 @@ HexagonTargetLowering::buildHvxVectorSingle(ArrayRef<SDValue> Values,
}
SDValue
+HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
+ unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
+ MVT PredTy = ty(PredV);
+ unsigned HwLen = Subtarget.getVectorLength();
+ MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
+
+ if (Subtarget.isHVXVectorType(PredTy, true)) {
+ // Move the vector predicate SubV to a vector register, and scale it
+ // down to match the representation (bytes per type element) that VecV
+ // uses. The scaling down will pick every 2nd or 4th (every Scale-th
+ // in general) element and put them at the front of the resulting
+ // vector. This subvector will then be inserted into the Q2V of VecV.
+ // To avoid having an operation that generates an illegal type (short
+ // vector), generate a full size vector.
+ //
+ SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
+ SmallVector<int,128> Mask(HwLen);
+ // Scale = BitBytes(PredV) / Given BitBytes.
+ unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
+ unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
+
+ for (unsigned i = 0; i != HwLen; ++i) {
+ unsigned Num = i % Scale;
+ unsigned Off = i / Scale;
+ Mask[BlockLen*Num + Off] = i;
+ }
+ SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
+ if (!ZeroFill)
+ return S;
+ // Fill the bytes beyond BlockLen with 0s.
+ MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
+ SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
+ {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
+ SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
+ return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
+ }
+
+ // Make sure that this is a valid scalar predicate.
+ assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
+
+ unsigned Bytes = 8 / PredTy.getVectorNumElements();
+ SmallVector<SDValue,4> Words[2];
+ unsigned IdxW = 0;
+
+ auto Lo32 = [&DAG, &dl] (SDValue P) {
+ return DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, P);
+ };
+ auto Hi32 = [&DAG, &dl] (SDValue P) {
+ return DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, P);
+ };
+
+ SDValue W0 = isUndef(PredV)
+ ? DAG.getUNDEF(MVT::i64)
+ : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
+ Words[IdxW].push_back(Hi32(W0));
+ Words[IdxW].push_back(Lo32(W0));
+
+ while (Bytes < BitBytes) {
+ IdxW ^= 1;
+ Words[IdxW].clear();
+
+ if (Bytes < 4) {
+ for (const SDValue &W : Words[IdxW ^ 1]) {
+ SDValue T = expandPredicate(W, dl, DAG);
+ Words[IdxW].push_back(Hi32(T));
+ Words[IdxW].push_back(Lo32(T));
+ }
+ } else {
+ for (const SDValue &W : Words[IdxW ^ 1]) {
+ Words[IdxW].push_back(W);
+ Words[IdxW].push_back(W);
+ }
+ }
+ Bytes *= 2;
+ }
+
+ assert(Bytes == BitBytes);
+
+ SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
+ SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
+ for (const SDValue &W : Words[IdxW]) {
+ Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
+ Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
+ }
+
+ return Vec;
+}
+
+SDValue
HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
const SDLoc &dl, MVT VecTy,
SelectionDAG &DAG) const {
@@ -218,6 +519,18 @@ HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
unsigned HwLen = Subtarget.getVectorLength();
assert(VecLen <= HwLen || VecLen == 8*HwLen);
SmallVector<SDValue,128> Bytes;
+ bool AllT = true, AllF = true;
+
+ auto IsTrue = [] (SDValue V) {
+ if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
+ return !N->isNullValue();
+ return false;
+ };
+ auto IsFalse = [] (SDValue V) {
+ if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
+ return N->isNullValue();
+ return false;
+ };
if (VecLen <= HwLen) {
// In the hardware, each bit of a vector predicate corresponds to a byte
@@ -226,8 +539,11 @@ HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
assert(HwLen % VecLen == 0);
unsigned BitBytes = HwLen / VecLen;
for (SDValue V : Values) {
+ AllT &= IsTrue(V);
+ AllF &= IsFalse(V);
+
SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
- : DAG.getConstant(0, dl, MVT::i8);
+ : DAG.getUNDEF(MVT::i8);
for (unsigned B = 0; B != BitBytes; ++B)
Bytes.push_back(Ext);
}
@@ -243,8 +559,11 @@ HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
break;
}
SDValue F = Values[I+B];
+ AllT &= IsTrue(F);
+ AllF &= IsFalse(F);
+
SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
- : DAG.getConstant(0, dl, MVT::i8);
+ : DAG.getUNDEF(MVT::i8);
Bytes.push_back(Ext);
// Verify that the rest of values in the group are the same as the
// first.
@@ -253,53 +572,25 @@ HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
}
}
- MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
- SDValue ByteVec = buildHvxVectorSingle(Bytes, dl, ByteTy, DAG);
- SDValue Cmp = DAG.getSetCC(dl, VecTy, ByteVec, getZero(dl, ByteTy, DAG),
- ISD::SETUGT);
- return Cmp;
-}
-
-SDValue
-HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
- const {
- const SDLoc &dl(Op);
- MVT VecTy = ty(Op);
-
- unsigned Size = Op.getNumOperands();
- SmallVector<SDValue,128> Ops;
- for (unsigned i = 0; i != Size; ++i)
- Ops.push_back(Op.getOperand(i));
-
- if (VecTy.getVectorElementType() == MVT::i1)
- return buildHvxVectorPred(Ops, dl, VecTy, DAG);
+ if (AllT)
+ return DAG.getNode(HexagonISD::QTRUE, dl, VecTy);
+ if (AllF)
+ return DAG.getNode(HexagonISD::QFALSE, dl, VecTy);
- if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) {
- ArrayRef<SDValue> A(Ops);
- MVT SingleTy = typeSplit(VecTy).first;
- SDValue V0 = buildHvxVectorSingle(A.take_front(Size/2), dl, SingleTy, DAG);
- SDValue V1 = buildHvxVectorSingle(A.drop_front(Size/2), dl, SingleTy, DAG);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
- }
-
- return buildHvxVectorSingle(Ops, dl, VecTy, DAG);
+ MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
+ SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
+ return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
}
SDValue
-HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
- const {
- // Change the type of the extracted element to i32.
- SDValue VecV = Op.getOperand(0);
+HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
+ const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
MVT ElemTy = ty(VecV).getVectorElementType();
+
unsigned ElemWidth = ElemTy.getSizeInBits();
assert(ElemWidth >= 8 && ElemWidth <= 32);
(void)ElemWidth;
- const SDLoc &dl(Op);
- SDValue IdxV = Op.getOperand(1);
- if (ty(IdxV) != MVT::i32)
- IdxV = DAG.getBitcast(MVT::i32, IdxV);
-
SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
{VecV, ByteIdx});
@@ -316,13 +607,29 @@ HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
}
SDValue
-HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
- const {
- const SDLoc &dl(Op);
- SDValue VecV = Op.getOperand(0);
- SDValue ValV = Op.getOperand(1);
- SDValue IdxV = Op.getOperand(2);
+HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
+ const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
+ // Implement other return types if necessary.
+ assert(ResTy == MVT::i1);
+
+ unsigned HwLen = Subtarget.getVectorLength();
+ MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
+ SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
+
+ unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
+ SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
+ IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
+
+ SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
+ SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
+ return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
+}
+
+SDValue
+HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
+ SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
MVT ElemTy = ty(VecV).getVectorElementType();
+
unsigned ElemWidth = ElemTy.getSizeInBits();
assert(ElemWidth >= 8 && ElemWidth <= 32);
(void)ElemWidth;
@@ -336,7 +643,7 @@ HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
- {DAG.getConstant(HwLen/4, dl, MVT::i32), MaskV});
+ {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
return TorV;
};
@@ -349,9 +656,8 @@ HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
// 1. Extract the existing word from the target vector.
SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
{ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
- SDValue Ex0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
- {opCastElem(VecV, MVT::i32, DAG), WordIdx});
- SDValue Ext = LowerHvxExtractElement(Ex0, DAG);
+ SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
+ dl, MVT::i32, DAG);
// 2. Treating the extracted word as a 32-bit vector, insert the given
// value into it.
@@ -365,55 +671,531 @@ HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
}
SDValue
+HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
+ SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
+ unsigned HwLen = Subtarget.getVectorLength();
+ MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
+ SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
+
+ unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
+ SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
+ IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
+ ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
+
+ SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
+ return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
+}
+
+SDValue
+HexagonTargetLowering::extractHvxSubvectorReg(SDValue VecV, SDValue IdxV,
+ const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
+ MVT VecTy = ty(VecV);
+ unsigned HwLen = Subtarget.getVectorLength();
+ unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue();
+ MVT ElemTy = VecTy.getVectorElementType();
+ unsigned ElemWidth = ElemTy.getSizeInBits();
+
+ // If the source vector is a vector pair, get the single vector containing
+ // the subvector of interest. The subvector will never overlap two single
+ // vectors.
+ if (isHvxPairTy(VecTy)) {
+ unsigned SubIdx;
+ if (Idx * ElemWidth >= 8*HwLen) {
+ SubIdx = Hexagon::vsub_hi;
+ Idx -= VecTy.getVectorNumElements() / 2;
+ } else {
+ SubIdx = Hexagon::vsub_lo;
+ }
+ VecTy = typeSplit(VecTy).first;
+ VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV);
+ if (VecTy == ResTy)
+ return VecV;
+ }
+
+ // The only meaningful subvectors of a single HVX vector are those that
+ // fit in a scalar register.
+ assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
+
+ MVT WordTy = tyVector(VecTy, MVT::i32);
+ SDValue WordVec = DAG.getBitcast(WordTy, VecV);
+ unsigned WordIdx = (Idx*ElemWidth) / 32;
+
+ SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
+ SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
+ if (ResTy.getSizeInBits() == 32)
+ return DAG.getBitcast(ResTy, W0);
+
+ SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
+ SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
+ SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, {W1, W0});
+ return DAG.getBitcast(ResTy, WW);
+}
+
+SDValue
+HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
+ const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
+ MVT VecTy = ty(VecV);
+ unsigned HwLen = Subtarget.getVectorLength();
+ MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
+ SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
+ // IdxV is required to be a constant.
+ unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue();
+
+ unsigned ResLen = ResTy.getVectorNumElements();
+ unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
+ unsigned Offset = Idx * BitBytes;
+ SDValue Undef = DAG.getUNDEF(ByteTy);
+ SmallVector<int,128> Mask;
+
+ if (Subtarget.isHVXVectorType(ResTy, true)) {
+ // Converting between two vector predicates. Since the result is shorter
+ // than the source, it will correspond to a vector predicate with the
+ // relevant bits replicated. The replication count is the ratio of the
+ // source and target vector lengths.
+ unsigned Rep = VecTy.getVectorNumElements() / ResLen;
+ assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
+ for (unsigned i = 0; i != HwLen/Rep; ++i) {
+ for (unsigned j = 0; j != Rep; ++j)
+ Mask.push_back(i + Offset);
+ }
+ SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
+ return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
+ }
+
+ // Converting between a vector predicate and a scalar predicate. In the
+ // vector predicate, a group of BitBytes bits will correspond to a single
+ // i1 element of the source vector type. Those bits will all have the same
+ // value. The same will be true for ByteVec, where each byte corresponds
+ // to a bit in the vector predicate.
+ // The algorithm is to traverse the ByteVec, going over the i1 values from
+ // the source vector, and generate the corresponding representation in an
+ // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
+ // elements so that the interesting 8 bytes will be in the low end of the
+ // vector.
+ unsigned Rep = 8 / ResLen;
+ // Make sure the output fill the entire vector register, so repeat the
+ // 8-byte groups as many times as necessary.
+ for (unsigned r = 0; r != HwLen/ResLen; ++r) {
+ // This will generate the indexes of the 8 interesting bytes.
+ for (unsigned i = 0; i != ResLen; ++i) {
+ for (unsigned j = 0; j != Rep; ++j)
+ Mask.push_back(Offset + i*BitBytes);
+ }
+ }
+
+ SDValue Zero = getZero(dl, MVT::i32, DAG);
+ SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
+ // Combine the two low words from ShuffV into a v8i8, and byte-compare
+ // them against 0.
+ SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
+ SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
+ {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
+ SDValue Vec64 = DAG.getNode(HexagonISD::COMBINE, dl, MVT::v8i8, {W1, W0});
+ return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
+ {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
+}
+
+SDValue
+HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
+ SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
+ MVT VecTy = ty(VecV);
+ MVT SubTy = ty(SubV);
+ unsigned HwLen = Subtarget.getVectorLength();
+ MVT ElemTy = VecTy.getVectorElementType();
+ unsigned ElemWidth = ElemTy.getSizeInBits();
+
+ bool IsPair = isHvxPairTy(VecTy);
+ MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
+ // The two single vectors that VecV consists of, if it's a pair.
+ SDValue V0, V1;
+ SDValue SingleV = VecV;
+ SDValue PickHi;
+
+ if (IsPair) {
+ V0 = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, VecV);
+ V1 = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, VecV);
+
+ SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
+ dl, MVT::i32);
+ PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
+ if (isHvxSingleTy(SubTy)) {
+ if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
+ unsigned Idx = CN->getZExtValue();
+ assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
+ unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
+ return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
+ }
+ // If IdxV is not a constant, generate the two variants: with the
+ // SubV as the high and as the low subregister, and select the right
+ // pair based on the IdxV.
+ SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
+ SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
+ return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
+ }
+ // The subvector being inserted must be entirely contained in one of
+ // the vectors V0 or V1. Set SingleV to the correct one, and update
+ // IdxV to be the index relative to the beginning of that vector.
+ SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
+ IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
+ SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
+ }
+
+ // The only meaningful subvectors of a single HVX vector are those that
+ // fit in a scalar register.
+ assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
+ // Convert IdxV to be index in bytes.
+ auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
+ if (!IdxN || !IdxN->isNullValue()) {
+ IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
+ DAG.getConstant(ElemWidth/8, dl, MVT::i32));
+ SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
+ }
+ // When inserting a single word, the rotation back to the original position
+ // would be by HwLen-Idx, but if two words are inserted, it will need to be
+ // by (HwLen-4)-Idx.
+ unsigned RolBase = HwLen;
+ if (VecTy.getSizeInBits() == 32) {
+ SDValue V = DAG.getBitcast(MVT::i32, SubV);
+ SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, V);
+ } else {
+ SDValue V = DAG.getBitcast(MVT::i64, SubV);
+ SDValue R0 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, V);
+ SDValue R1 = DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, V);
+ SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
+ SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
+ DAG.getConstant(4, dl, MVT::i32));
+ SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
+ RolBase = HwLen-4;
+ }
+ // If the vector wasn't ror'ed, don't ror it back.
+ if (RolBase != 4 || !IdxN || !IdxN->isNullValue()) {
+ SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
+ SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
+ }
+
+ if (IsPair) {
+ SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
+ SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
+ return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
+ }
+ return SingleV;
+}
+
+SDValue
+HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
+ SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
+ MVT VecTy = ty(VecV);
+ MVT SubTy = ty(SubV);
+ assert(Subtarget.isHVXVectorType(VecTy, true));
+ // VecV is an HVX vector predicate. SubV may be either an HVX vector
+ // predicate as well, or it can be a scalar predicate.
+
+ unsigned VecLen = VecTy.getVectorNumElements();
+ unsigned HwLen = Subtarget.getVectorLength();
+ assert(HwLen % VecLen == 0 && "Unexpected vector type");
+
+ unsigned Scale = VecLen / SubTy.getVectorNumElements();
+ unsigned BitBytes = HwLen / VecLen;
+ unsigned BlockLen = HwLen / Scale;
+
+ MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
+ SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
+ SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
+ SDValue ByteIdx;
+
+ auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
+ if (!IdxN || !IdxN->isNullValue()) {
+ ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
+ DAG.getConstant(BitBytes, dl, MVT::i32));
+ ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
+ }
+
+ // ByteVec is the target vector VecV rotated in such a way that the
+ // subvector should be inserted at index 0. Generate a predicate mask
+ // and use vmux to do the insertion.
+ MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
+ SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
+ {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
+ ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
+ // Rotate ByteVec back, and convert to a vector predicate.
+ if (!IdxN || !IdxN->isNullValue()) {
+ SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
+ SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
+ ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
+ }
+ return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
+}
+
+SDValue
+HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
+ MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
+ // Sign- and any-extending of a vector predicate to a vector register is
+ // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
+ // a vector of 1s (where the 1s are of type matching the vector type).
+ assert(Subtarget.isHVXVectorType(ResTy));
+ if (!ZeroExt)
+ return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
+
+ assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
+ SDValue True = DAG.getNode(HexagonISD::VSPLAT, dl, ResTy,
+ DAG.getConstant(1, dl, MVT::i32));
+ SDValue False = getZero(dl, ResTy, DAG);
+ return DAG.getSelect(dl, ResTy, VecV, True, False);
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
+ const {
+ const SDLoc &dl(Op);
+ MVT VecTy = ty(Op);
+
+ unsigned Size = Op.getNumOperands();
+ SmallVector<SDValue,128> Ops;
+ for (unsigned i = 0; i != Size; ++i)
+ Ops.push_back(Op.getOperand(i));
+
+ if (VecTy.getVectorElementType() == MVT::i1)
+ return buildHvxVectorPred(Ops, dl, VecTy, DAG);
+
+ if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) {
+ ArrayRef<SDValue> A(Ops);
+ MVT SingleTy = typeSplit(VecTy).first;
+ SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG);
+ SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
+ }
+
+ return buildHvxVectorReg(Ops, dl, VecTy, DAG);
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
+ const {
+ // Vector concatenation of two integer (non-bool) vectors does not need
+ // special lowering. Custom-lower concats of bool vectors and expand
+ // concats of more than 2 vectors.
+ MVT VecTy = ty(Op);
+ const SDLoc &dl(Op);
+ unsigned NumOp = Op.getNumOperands();
+ if (VecTy.getVectorElementType() != MVT::i1) {
+ if (NumOp == 2)
+ return Op;
+ // Expand the other cases into a build-vector.
+ SmallVector<SDValue,8> Elems;
+ for (SDValue V : Op.getNode()->ops())
+ DAG.ExtractVectorElements(V, Elems);
+ // A vector of i16 will be broken up into a build_vector of i16's.
+ // This is a problem, since at the time of operation legalization,
+ // all operations are expected to be type-legalized, and i16 is not
+ // a legal type. If any of the extracted elements is not of a valid
+ // type, sign-extend it to a valid one.
+ for (unsigned i = 0, e = Elems.size(); i != e; ++i) {
+ SDValue V = Elems[i];
+ MVT Ty = ty(V);
+ if (!isTypeLegal(Ty)) {
+ EVT NTy = getTypeToTransformTo(*DAG.getContext(), Ty);
+ if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ Elems[i] = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy,
+ V.getOperand(0), V.getOperand(1)),
+ DAG.getValueType(Ty));
+ continue;
+ }
+ // A few less complicated cases.
+ if (V.getOpcode() == ISD::Constant)
+ Elems[i] = DAG.getSExtOrTrunc(V, dl, NTy);
+ else if (V.isUndef())
+ Elems[i] = DAG.getUNDEF(NTy);
+ else
+ llvm_unreachable("Unexpected vector element");
+ }
+ }
+ return DAG.getBuildVector(VecTy, dl, Elems);
+ }
+
+ assert(VecTy.getVectorElementType() == MVT::i1);
+ unsigned HwLen = Subtarget.getVectorLength();
+ assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
+
+ SDValue Op0 = Op.getOperand(0);
+
+ // If the operands are HVX types (i.e. not scalar predicates), then
+ // defer the concatenation, and create QCAT instead.
+ if (Subtarget.isHVXVectorType(ty(Op0), true)) {
+ if (NumOp == 2)
+ return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
+
+ ArrayRef<SDUse> U(Op.getNode()->ops());
+ SmallVector<SDValue,4> SV(U.begin(), U.end());
+ ArrayRef<SDValue> Ops(SV);
+
+ MVT HalfTy = typeSplit(VecTy).first;
+ SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
+ Ops.take_front(NumOp/2));
+ SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
+ Ops.take_back(NumOp/2));
+ return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
+ }
+
+ // Count how many bytes (in a vector register) each bit in VecTy
+ // corresponds to.
+ unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
+
+ SmallVector<SDValue,8> Prefixes;
+ for (SDValue V : Op.getNode()->op_values()) {
+ SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
+ Prefixes.push_back(P);
+ }
+
+ unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
+ MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
+ SDValue S = DAG.getConstant(InpLen*BitBytes, dl, MVT::i32);
+ SDValue Res = getZero(dl, ByteTy, DAG);
+ for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
+ Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
+ Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
+ }
+ return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
+ const {
+ // Change the type of the extracted element to i32.
+ SDValue VecV = Op.getOperand(0);
+ MVT ElemTy = ty(VecV).getVectorElementType();
+ const SDLoc &dl(Op);
+ SDValue IdxV = Op.getOperand(1);
+ if (ElemTy == MVT::i1)
+ return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
+
+ return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
+ const {
+ const SDLoc &dl(Op);
+ SDValue VecV = Op.getOperand(0);
+ SDValue ValV = Op.getOperand(1);
+ SDValue IdxV = Op.getOperand(2);
+ MVT ElemTy = ty(VecV).getVectorElementType();
+ if (ElemTy == MVT::i1)
+ return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
+
+ return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
+}
+
+SDValue
HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
const {
SDValue SrcV = Op.getOperand(0);
MVT SrcTy = ty(SrcV);
- unsigned SrcElems = SrcTy.getVectorNumElements();
+ MVT DstTy = ty(Op);
SDValue IdxV = Op.getOperand(1);
unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue();
- MVT DstTy = ty(Op);
- assert(Idx == 0 || DstTy.getVectorNumElements() % Idx == 0);
+ assert(Idx % DstTy.getVectorNumElements() == 0);
+ (void)Idx;
const SDLoc &dl(Op);
- if (Idx == 0)
- return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, DstTy, SrcV);
- if (Idx == SrcElems/2)
- return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, DstTy, SrcV);
- return SDValue();
+
+ MVT ElemTy = SrcTy.getVectorElementType();
+ if (ElemTy == MVT::i1)
+ return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
+
+ return extractHvxSubvectorReg(SrcV, IdxV, dl, DstTy, DAG);
}
SDValue
HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
const {
- // Idx may be variable.
+ // Idx does not need to be a constant.
+ SDValue VecV = Op.getOperand(0);
+ SDValue ValV = Op.getOperand(1);
SDValue IdxV = Op.getOperand(2);
- auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
- if (!IdxN)
- return SDValue();
- unsigned Idx = IdxN->getZExtValue();
- SDValue DstV = Op.getOperand(0);
- SDValue SrcV = Op.getOperand(1);
- MVT DstTy = ty(DstV);
- MVT SrcTy = ty(SrcV);
- unsigned DstElems = DstTy.getVectorNumElements();
- unsigned SrcElems = SrcTy.getVectorNumElements();
- if (2*SrcElems != DstElems)
- return SDValue();
+ const SDLoc &dl(Op);
+ MVT VecTy = ty(VecV);
+ MVT ElemTy = VecTy.getVectorElementType();
+ if (ElemTy == MVT::i1)
+ return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
+
+ return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
+ // Lower any-extends of boolean vectors to sign-extends, since they
+ // translate directly to Q2V. Zero-extending could also be done equally
+ // fast, but Q2V is used/recognized in more places.
+ // For all other vectors, use zero-extend.
+ MVT ResTy = ty(Op);
+ SDValue InpV = Op.getOperand(0);
+ MVT ElemTy = ty(InpV).getVectorElementType();
+ if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
+ return LowerHvxSignExt(Op, DAG);
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
+ MVT ResTy = ty(Op);
+ SDValue InpV = Op.getOperand(0);
+ MVT ElemTy = ty(InpV).getVectorElementType();
+ if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
+ return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
+ return Op;
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
+ MVT ResTy = ty(Op);
+ SDValue InpV = Op.getOperand(0);
+ MVT ElemTy = ty(InpV).getVectorElementType();
+ if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
+ return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
+ return Op;
+}
+SDValue
+HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
+ // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
+ // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
const SDLoc &dl(Op);
- if (Idx == 0)
- return DAG.getTargetInsertSubreg(Hexagon::vsub_lo, dl, DstTy, DstV, SrcV);
- if (Idx == SrcElems)
- return DAG.getTargetInsertSubreg(Hexagon::vsub_hi, dl, DstTy, DstV, SrcV);
- return SDValue();
+ MVT ResTy = ty(Op);
+ SDValue InpV = Op.getOperand(0);
+ assert(ResTy == ty(InpV));
+
+ // Calculate the vectors of 1 and bitwidth(x).
+ MVT ElemTy = ty(InpV).getVectorElementType();
+ unsigned ElemWidth = ElemTy.getSizeInBits();
+ // Using uint64_t because a shift by 32 can happen.
+ uint64_t Splat1 = 0, SplatW = 0;
+ assert(isPowerOf2_32(ElemWidth) && ElemWidth <= 32);
+ for (unsigned i = 0; i != 32/ElemWidth; ++i) {
+ Splat1 = (Splat1 << ElemWidth) | 1;
+ SplatW = (SplatW << ElemWidth) | ElemWidth;
+ }
+ SDValue Vec1 = DAG.getNode(HexagonISD::VSPLATW, dl, ResTy,
+ DAG.getConstant(uint32_t(Splat1), dl, MVT::i32));
+ SDValue VecW = DAG.getNode(HexagonISD::VSPLATW, dl, ResTy,
+ DAG.getConstant(uint32_t(SplatW), dl, MVT::i32));
+ SDValue VecN1 = DAG.getNode(HexagonISD::VSPLATW, dl, ResTy,
+ DAG.getConstant(-1, dl, MVT::i32));
+ // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
+ // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
+ // it separately in custom combine or selection).
+ SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
+ {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
+ DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
+ return DAG.getNode(ISD::SUB, dl, ResTy,
+ {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
}
SDValue
HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const {
MVT ResTy = ty(Op);
- if (!ResTy.isVector())
- return SDValue();
+ assert(ResTy.isVector() && isHvxSingleTy(ResTy));
const SDLoc &dl(Op);
SmallVector<int,256> ShuffMask;
@@ -423,18 +1205,14 @@ HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const {
SDValue Vt = Op.getOperand(1);
switch (ElemTy.SimpleTy) {
- case MVT::i8:
- case MVT::i16: {
+ case MVT::i8: {
// For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
// V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
// where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
- // For i16, use V6_vmpyhv, which behaves in an analogous way to
- // V6_vmpybv: results Lo and Hi are products of even/odd elements
- // respectively.
MVT ExtTy = typeExtElem(ResTy, 2);
unsigned MpyOpc = ElemTy == MVT::i8 ? Hexagon::V6_vmpybv
: Hexagon::V6_vmpyhv;
- SDValue M = getNode(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG);
+ SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG);
// Discard high halves of the resulting values, collect the low halves.
for (unsigned I = 0; I < VecLen; I += 2) {
@@ -442,18 +1220,24 @@ HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const {
ShuffMask.push_back(I+VecLen); // Pick odd element.
}
VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG);
- return getByteShuffle(dl, P.first, P.second, ShuffMask, DAG);
+ SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG);
+ return DAG.getBitcast(ResTy, BS);
}
+ case MVT::i16:
+ // For i16 there is V6_vmpyih, which acts exactly like the MUL opcode.
+ // (There is also V6_vmpyhv, which behaves in an analogous way to
+ // V6_vmpybv.)
+ return getInstr(Hexagon::V6_vmpyih, dl, ResTy, {Vs, Vt}, DAG);
case MVT::i32: {
// Use the following sequence for signed word multiply:
// T0 = V6_vmpyiowh Vs, Vt
// T1 = V6_vaslw T0, 16
// T2 = V6_vmpyiewuh_acc T1, Vs, Vt
SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
- SDValue T0 = getNode(Hexagon::V6_vmpyiowh, dl, ResTy, {Vs, Vt}, DAG);
- SDValue T1 = getNode(Hexagon::V6_vaslw, dl, ResTy, {T0, S16}, DAG);
- SDValue T2 = getNode(Hexagon::V6_vmpyiewuh_acc, dl, ResTy,
- {T1, Vs, Vt}, DAG);
+ SDValue T0 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {Vs, Vt}, DAG);
+ SDValue T1 = getInstr(Hexagon::V6_vaslw, dl, ResTy, {T0, S16}, DAG);
+ SDValue T2 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy,
+ {T1, Vs, Vt}, DAG);
return T2;
}
default:
@@ -463,78 +1247,109 @@ HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const {
}
SDValue
-HexagonTargetLowering::LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
- MVT VecTy = ty(Op.getOperand(0));
- assert(VecTy == ty(Op.getOperand(1)));
-
- SDValue Cmp = Op.getOperand(2);
- ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get();
- bool Negate = false, Swap = false;
-
- // HVX has instructions for SETEQ, SETGT, SETUGT. The other comparisons
- // can be arranged as operand-swapped/negated versions of these. Since
- // the generated code will have the original CC expressed as
- // (negate (swap-op NewCmp)),
- // the condition code for the NewCmp should be calculated from the original
- // CC by applying these operations in the reverse order.
- //
- // This could also be done through setCondCodeAction, but for negation it
- // uses a xor with a vector of -1s, which it obtains from BUILD_VECTOR.
- // That is far too expensive for what can be done with a single instruction.
-
- switch (CC) {
- case ISD::SETNE: // !eq
- case ISD::SETLE: // !gt
- case ISD::SETGE: // !lt
- case ISD::SETULE: // !ugt
- case ISD::SETUGE: // !ult
- CC = ISD::getSetCCInverse(CC, true);
- Negate = true;
- break;
- default:
- break;
+HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
+ MVT ResTy = ty(Op);
+ assert(ResTy.isVector());
+ const SDLoc &dl(Op);
+ SmallVector<int,256> ShuffMask;
+
+ MVT ElemTy = ResTy.getVectorElementType();
+ unsigned VecLen = ResTy.getVectorNumElements();
+ SDValue Vs = Op.getOperand(0);
+ SDValue Vt = Op.getOperand(1);
+ bool IsSigned = Op.getOpcode() == ISD::MULHS;
+
+ if (ElemTy == MVT::i8 || ElemTy == MVT::i16) {
+ // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...),
+ // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo,
+ // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...).
+ // For i16, use V6_vmpyhv, which behaves in an analogous way to
+ // V6_vmpybv: results Lo and Hi are products of even/odd elements
+ // respectively.
+ MVT ExtTy = typeExtElem(ResTy, 2);
+ unsigned MpyOpc = ElemTy == MVT::i8
+ ? (IsSigned ? Hexagon::V6_vmpybv : Hexagon::V6_vmpyubv)
+ : (IsSigned ? Hexagon::V6_vmpyhv : Hexagon::V6_vmpyuhv);
+ SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG);
+
+ // Discard low halves of the resulting values, collect the high halves.
+ for (unsigned I = 0; I < VecLen; I += 2) {
+ ShuffMask.push_back(I+1); // Pick even element.
+ ShuffMask.push_back(I+VecLen+1); // Pick odd element.
+ }
+ VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG);
+ SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG);
+ return DAG.getBitcast(ResTy, BS);
}
- switch (CC) {
- case ISD::SETLT: // swap gt
- case ISD::SETULT: // swap ugt
- CC = ISD::getSetCCSwappedOperands(CC);
- Swap = true;
- break;
- default:
- break;
+ assert(ElemTy == MVT::i32);
+ SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
+
+ if (IsSigned) {
+ // mulhs(Vs,Vt) =
+ // = [(Hi(Vs)*2^16 + Lo(Vs)) *s (Hi(Vt)*2^16 + Lo(Vt))] >> 32
+ // = [Hi(Vs)*2^16 *s Hi(Vt)*2^16 + Hi(Vs) *su Lo(Vt)*2^16
+ // + Lo(Vs) *us (Hi(Vt)*2^16 + Lo(Vt))] >> 32
+ // = [Hi(Vs) *s Hi(Vt)*2^32 + Hi(Vs) *su Lo(Vt)*2^16
+ // + Lo(Vs) *us Vt] >> 32
+ // The low half of Lo(Vs)*Lo(Vt) will be discarded (it's not added to
+ // anything, so it cannot produce any carry over to higher bits),
+ // so everything in [] can be shifted by 16 without loss of precision.
+ // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + Lo(Vs)*Vt >> 16] >> 16
+ // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + V6_vmpyewuh(Vs,Vt)] >> 16
+ // Denote Hi(Vs) = Vs':
+ // = [Vs'*s Hi(Vt)*2^16 + Vs' *su Lo(Vt) + V6_vmpyewuh(Vt,Vs)] >> 16
+ // = Vs'*s Hi(Vt) + (V6_vmpyiewuh(Vs',Vt) + V6_vmpyewuh(Vt,Vs)) >> 16
+ SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, ResTy, {Vt, Vs}, DAG);
+ // Get Vs':
+ SDValue S0 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {Vs, S16}, DAG);
+ SDValue T1 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy,
+ {T0, S0, Vt}, DAG);
+ // Shift by 16:
+ SDValue S2 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {T1, S16}, DAG);
+ // Get Vs'*Hi(Vt):
+ SDValue T2 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {S0, Vt}, DAG);
+ // Add:
+ SDValue T3 = DAG.getNode(ISD::ADD, dl, ResTy, {S2, T2});
+ return T3;
}
- assert(CC == ISD::SETEQ || CC == ISD::SETGT || CC == ISD::SETUGT);
+ // Unsigned mulhw. (Would expansion using signed mulhw be better?)
- MVT ElemTy = VecTy.getVectorElementType();
- unsigned ElemWidth = ElemTy.getSizeInBits();
- assert(isPowerOf2_32(ElemWidth));
-
- auto getIdx = [] (unsigned Code) {
- static const unsigned Idx[] = { ISD::SETEQ, ISD::SETGT, ISD::SETUGT };
- for (unsigned I = 0, E = array_lengthof(Idx); I != E; ++I)
- if (Code == Idx[I])
- return I;
- llvm_unreachable("Unhandled CondCode");
+ auto LoVec = [&DAG,ResTy,dl] (SDValue Pair) {
+ return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, ResTy, Pair);
};
-
- static unsigned OpcTable[3][3] = {
- // SETEQ SETGT, SETUGT
- /* Byte */ { Hexagon::V6_veqb, Hexagon::V6_vgtb, Hexagon::V6_vgtub },
- /* Half */ { Hexagon::V6_veqh, Hexagon::V6_vgth, Hexagon::V6_vgtuh },
- /* Word */ { Hexagon::V6_veqw, Hexagon::V6_vgtw, Hexagon::V6_vgtuw }
+ auto HiVec = [&DAG,ResTy,dl] (SDValue Pair) {
+ return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, ResTy, Pair);
};
- unsigned CmpOpc = OpcTable[Log2_32(ElemWidth)-3][getIdx(CC)];
-
- MVT ResTy = ty(Op);
- const SDLoc &dl(Op);
- SDValue OpL = Swap ? Op.getOperand(1) : Op.getOperand(0);
- SDValue OpR = Swap ? Op.getOperand(0) : Op.getOperand(1);
- SDValue CmpV = getNode(CmpOpc, dl, ResTy, {OpL, OpR}, DAG);
- return Negate ? getNode(Hexagon::V6_pred_not, dl, ResTy, {CmpV}, DAG)
- : CmpV;
+ MVT PairTy = typeJoin({ResTy, ResTy});
+ SDValue P = getInstr(Hexagon::V6_lvsplatw, dl, ResTy,
+ {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
+ // Multiply-unsigned halfwords:
+ // LoVec = Vs.uh[2i] * Vt.uh[2i],
+ // HiVec = Vs.uh[2i+1] * Vt.uh[2i+1]
+ SDValue T0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, Vt}, DAG);
+ // The low halves in the LoVec of the pair can be discarded. They are
+ // not added to anything (in the full-precision product), so they cannot
+ // produce a carry into the higher bits.
+ SDValue T1 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {LoVec(T0), S16}, DAG);
+ // Swap low and high halves in Vt, and do the halfword multiplication
+ // to get products Vs.uh[2i] * Vt.uh[2i+1] and Vs.uh[2i+1] * Vt.uh[2i].
+ SDValue D0 = getInstr(Hexagon::V6_vdelta, dl, ResTy, {Vt, P}, DAG);
+ SDValue T2 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, D0}, DAG);
+ // T2 has mixed products of halfwords: Lo(Vt)*Hi(Vs) and Hi(Vt)*Lo(Vs).
+ // These products are words, but cannot be added directly because the
+ // sums could overflow. Add these products, by halfwords, where each sum
+ // of a pair of halfwords gives a word.
+ SDValue T3 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
+ {LoVec(T2), HiVec(T2)}, DAG);
+ // Add the high halfwords from the products of the low halfwords.
+ SDValue T4 = DAG.getNode(ISD::ADD, dl, ResTy, {T1, LoVec(T3)});
+ SDValue T5 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {T4, S16}, DAG);
+ SDValue T6 = DAG.getNode(ISD::ADD, dl, ResTy, {HiVec(T0), HiVec(T3)});
+ SDValue T7 = DAG.getNode(ISD::ADD, dl, ResTy, {T5, T6});
+ return T7;
}
SDValue
@@ -543,3 +1358,163 @@ HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
return DAG.getZeroExtendVectorInReg(Op.getOperand(0), SDLoc(Op), ty(Op));
}
+
+SDValue
+HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
+ if (SDValue S = getVectorShiftByInt(Op, DAG))
+ return S;
+ return Op;
+}
+
+SDValue
+HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const {
+ assert(!Op.isMachineOpcode());
+ SmallVector<SDValue,2> OpsL, OpsH;
+ const SDLoc &dl(Op);
+
+ auto SplitVTNode = [&DAG,this] (const VTSDNode *N) {
+ MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
+ SDValue TV = DAG.getValueType(Ty);
+ return std::make_pair(TV, TV);
+ };
+
+ for (SDValue A : Op.getNode()->ops()) {
+ VectorPair P = Subtarget.isHVXVectorType(ty(A), true)
+ ? opSplit(A, dl, DAG)
+ : std::make_pair(A, A);
+ // Special case for type operand.
+ if (Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
+ P = SplitVTNode(N);
+ }
+ OpsL.push_back(P.first);
+ OpsH.push_back(P.second);
+ }
+
+ MVT ResTy = ty(Op);
+ MVT HalfTy = typeSplit(ResTy).first;
+ SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
+ SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
+ SDValue S = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, L, H);
+ return S;
+}
+
+SDValue
+HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
+ LSBaseSDNode *BN = cast<LSBaseSDNode>(Op.getNode());
+ assert(BN->isUnindexed());
+ MVT MemTy = BN->getMemoryVT().getSimpleVT();
+ if (!isHvxPairTy(MemTy))
+ return Op;
+
+ const SDLoc &dl(Op);
+ unsigned HwLen = Subtarget.getVectorLength();
+ MVT SingleTy = typeSplit(MemTy).first;
+ SDValue Chain = BN->getChain();
+ SDValue Base0 = BN->getBasePtr();
+ SDValue Base1 = DAG.getMemBasePlusOffset(Base0, HwLen, dl);
+
+ MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
+ if (MachineMemOperand *MMO = BN->getMemOperand()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MOp0 = MF.getMachineMemOperand(MMO, 0, HwLen);
+ MOp1 = MF.getMachineMemOperand(MMO, HwLen, HwLen);
+ }
+
+ unsigned MemOpc = BN->getOpcode();
+ SDValue NewOp;
+
+ if (MemOpc == ISD::LOAD) {
+ SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
+ SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1);
+ NewOp = DAG.getMergeValues(
+ { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Load0.getValue(1), Load1.getValue(1)) }, dl);
+ } else {
+ assert(MemOpc == ISD::STORE);
+ VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
+ SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
+ SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
+ NewOp = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
+ }
+
+ return NewOp;
+}
+
+SDValue
+HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
+ unsigned Opc = Op.getOpcode();
+ bool IsPairOp = isHvxPairTy(ty(Op)) ||
+ llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
+ return isHvxPairTy(ty(V));
+ });
+
+ if (IsPairOp) {
+ switch (Opc) {
+ default:
+ break;
+ case ISD::LOAD:
+ case ISD::STORE:
+ return SplitHvxMemOp(Op, DAG);
+ case ISD::CTPOP:
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SRA:
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SETCC:
+ case ISD::VSELECT:
+ case ISD::SIGN_EXTEND_INREG:
+ return SplitHvxPairOp(Op, DAG);
+ }
+ }
+
+ switch (Opc) {
+ default:
+ break;
+ case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
+ case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
+ case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
+ case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
+
+ case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
+ case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
+ case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
+ case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
+ case ISD::SRA:
+ case ISD::SHL:
+ case ISD::SRL: return LowerHvxShift(Op, DAG);
+ case ISD::MUL: return LowerHvxMul(Op, DAG);
+ case ISD::MULHS:
+ case ISD::MULHU: return LowerHvxMulh(Op, DAG);
+ case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
+ case ISD::SETCC:
+ case ISD::INTRINSIC_VOID: return Op;
+ // Unaligned loads will be handled by the default lowering.
+ case ISD::LOAD: return SDValue();
+ }
+#ifndef NDEBUG
+ Op.dumpr(&DAG);
+#endif
+ llvm_unreachable("Unhandled HVX operation");
+}
+
+bool
+HexagonTargetLowering::isHvxOperation(SDValue Op) const {
+ // If the type of the result, or any operand type are HVX vector types,
+ // this is an HVX operation.
+ return Subtarget.isHVXVectorType(ty(Op), true) ||
+ llvm::any_of(Op.getNode()->ops(),
+ [this] (SDValue V) {
+ return Subtarget.isHVXVectorType(ty(V), true);
+ });
+}
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/lib/Target/Hexagon/HexagonInstrFormatsV60.td
index 14bda0e0107d..1347a655353f 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV60.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV60.td
@@ -19,4 +19,4 @@ class CVI_VA_Resource<dag outs, dag ins, string asmstr,
list<dag> pattern = [], string cstr = "",
InstrItinClass itin = CVI_VA>
: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA>,
- OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+ OpcodeHexagon, Requires<[HasV60, UseHVX]>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index b82a0157e81f..6019c7c5d024 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -34,7 +34,6 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
@@ -49,6 +48,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -134,7 +134,7 @@ static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB,
MachineBasicBlock::const_instr_iterator MIE) {
unsigned Count = 0;
for (; MIB != MIE; ++MIB) {
- if (!MIB->isDebugValue())
+ if (!MIB->isDebugInstr())
++Count;
}
return Count;
@@ -144,9 +144,9 @@ static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB,
/// On Hexagon, we have two instructions used to set-up the hardware loop
/// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions
/// to indicate the end of a loop.
-static MachineInstr *findLoopInstr(MachineBasicBlock *BB, unsigned EndLoopOp,
- MachineBasicBlock *TargetBB,
- SmallPtrSet<MachineBasicBlock *, 8> &Visited) {
+MachineInstr *HexagonInstrInfo::findLoopInstr(MachineBasicBlock *BB,
+ unsigned EndLoopOp, MachineBasicBlock *TargetBB,
+ SmallPtrSet<MachineBasicBlock *, 8> &Visited) const {
unsigned LOOPi;
unsigned LOOPr;
if (EndLoopOp == Hexagon::ENDLOOP0) {
@@ -240,41 +240,41 @@ static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) {
unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
switch (MI.getOpcode()) {
- default:
- break;
- case Hexagon::L2_loadri_io:
- case Hexagon::L2_loadrd_io:
- case Hexagon::V6_vL32b_ai:
- case Hexagon::V6_vL32b_nt_ai:
- case Hexagon::V6_vL32Ub_ai:
- case Hexagon::LDriw_pred:
- case Hexagon::LDriw_mod:
- case Hexagon::PS_vloadrq_ai:
- case Hexagon::PS_vloadrw_ai:
- case Hexagon::PS_vloadrw_nt_ai: {
- const MachineOperand OpFI = MI.getOperand(1);
- if (!OpFI.isFI())
- return 0;
- const MachineOperand OpOff = MI.getOperand(2);
- if (!OpOff.isImm() || OpOff.getImm() != 0)
- return 0;
- FrameIndex = OpFI.getIndex();
- return MI.getOperand(0).getReg();
- }
+ default:
+ break;
+ case Hexagon::L2_loadri_io:
+ case Hexagon::L2_loadrd_io:
+ case Hexagon::V6_vL32b_ai:
+ case Hexagon::V6_vL32b_nt_ai:
+ case Hexagon::V6_vL32Ub_ai:
+ case Hexagon::LDriw_pred:
+ case Hexagon::LDriw_ctr:
+ case Hexagon::PS_vloadrq_ai:
+ case Hexagon::PS_vloadrw_ai:
+ case Hexagon::PS_vloadrw_nt_ai: {
+ const MachineOperand OpFI = MI.getOperand(1);
+ if (!OpFI.isFI())
+ return 0;
+ const MachineOperand OpOff = MI.getOperand(2);
+ if (!OpOff.isImm() || OpOff.getImm() != 0)
+ return 0;
+ FrameIndex = OpFI.getIndex();
+ return MI.getOperand(0).getReg();
+ }
- case Hexagon::L2_ploadrit_io:
- case Hexagon::L2_ploadrif_io:
- case Hexagon::L2_ploadrdt_io:
- case Hexagon::L2_ploadrdf_io: {
- const MachineOperand OpFI = MI.getOperand(2);
- if (!OpFI.isFI())
- return 0;
- const MachineOperand OpOff = MI.getOperand(3);
- if (!OpOff.isImm() || OpOff.getImm() != 0)
- return 0;
- FrameIndex = OpFI.getIndex();
- return MI.getOperand(0).getReg();
- }
+ case Hexagon::L2_ploadrit_io:
+ case Hexagon::L2_ploadrif_io:
+ case Hexagon::L2_ploadrdt_io:
+ case Hexagon::L2_ploadrdf_io: {
+ const MachineOperand OpFI = MI.getOperand(2);
+ if (!OpFI.isFI())
+ return 0;
+ const MachineOperand OpOff = MI.getOperand(3);
+ if (!OpOff.isImm() || OpOff.getImm() != 0)
+ return 0;
+ FrameIndex = OpFI.getIndex();
+ return MI.getOperand(0).getReg();
+ }
}
return 0;
@@ -288,48 +288,84 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
switch (MI.getOpcode()) {
- default:
- break;
- case Hexagon::S2_storerb_io:
- case Hexagon::S2_storerh_io:
- case Hexagon::S2_storeri_io:
- case Hexagon::S2_storerd_io:
- case Hexagon::V6_vS32b_ai:
- case Hexagon::V6_vS32Ub_ai:
- case Hexagon::STriw_pred:
- case Hexagon::STriw_mod:
- case Hexagon::PS_vstorerq_ai:
- case Hexagon::PS_vstorerw_ai: {
- const MachineOperand &OpFI = MI.getOperand(0);
- if (!OpFI.isFI())
- return 0;
- const MachineOperand &OpOff = MI.getOperand(1);
- if (!OpOff.isImm() || OpOff.getImm() != 0)
- return 0;
- FrameIndex = OpFI.getIndex();
- return MI.getOperand(2).getReg();
+ default:
+ break;
+ case Hexagon::S2_storerb_io:
+ case Hexagon::S2_storerh_io:
+ case Hexagon::S2_storeri_io:
+ case Hexagon::S2_storerd_io:
+ case Hexagon::V6_vS32b_ai:
+ case Hexagon::V6_vS32Ub_ai:
+ case Hexagon::STriw_pred:
+ case Hexagon::STriw_ctr:
+ case Hexagon::PS_vstorerq_ai:
+ case Hexagon::PS_vstorerw_ai: {
+ const MachineOperand &OpFI = MI.getOperand(0);
+ if (!OpFI.isFI())
+ return 0;
+ const MachineOperand &OpOff = MI.getOperand(1);
+ if (!OpOff.isImm() || OpOff.getImm() != 0)
+ return 0;
+ FrameIndex = OpFI.getIndex();
+ return MI.getOperand(2).getReg();
+ }
+
+ case Hexagon::S2_pstorerbt_io:
+ case Hexagon::S2_pstorerbf_io:
+ case Hexagon::S2_pstorerht_io:
+ case Hexagon::S2_pstorerhf_io:
+ case Hexagon::S2_pstorerit_io:
+ case Hexagon::S2_pstorerif_io:
+ case Hexagon::S2_pstorerdt_io:
+ case Hexagon::S2_pstorerdf_io: {
+ const MachineOperand &OpFI = MI.getOperand(1);
+ if (!OpFI.isFI())
+ return 0;
+ const MachineOperand &OpOff = MI.getOperand(2);
+ if (!OpOff.isImm() || OpOff.getImm() != 0)
+ return 0;
+ FrameIndex = OpFI.getIndex();
+ return MI.getOperand(3).getReg();
+ }
}
- case Hexagon::S2_pstorerbt_io:
- case Hexagon::S2_pstorerbf_io:
- case Hexagon::S2_pstorerht_io:
- case Hexagon::S2_pstorerhf_io:
- case Hexagon::S2_pstorerit_io:
- case Hexagon::S2_pstorerif_io:
- case Hexagon::S2_pstorerdt_io:
- case Hexagon::S2_pstorerdf_io: {
- const MachineOperand &OpFI = MI.getOperand(1);
- if (!OpFI.isFI())
- return 0;
- const MachineOperand &OpOff = MI.getOperand(2);
- if (!OpOff.isImm() || OpOff.getImm() != 0)
- return 0;
- FrameIndex = OpFI.getIndex();
- return MI.getOperand(3).getReg();
+ return 0;
+}
+
+/// This function checks if the instruction or bundle of instructions
+/// has load from stack slot and returns frameindex and machine memory
+/// operand of that instruction if true.
+bool HexagonInstrInfo::hasLoadFromStackSlot(const MachineInstr &MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+ if (MI.isBundle()) {
+ const MachineBasicBlock *MBB = MI.getParent();
+ MachineBasicBlock::const_instr_iterator MII = MI.getIterator();
+ for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII)
+ if (TargetInstrInfo::hasLoadFromStackSlot(*MII, MMO, FrameIndex))
+ return true;
+ return false;
}
+
+ return TargetInstrInfo::hasLoadFromStackSlot(MI, MMO, FrameIndex);
+}
+
+/// This function checks if the instruction or bundle of instructions
+/// has store to stack slot and returns frameindex and machine memory
+/// operand of that instruction if true.
+bool HexagonInstrInfo::hasStoreToStackSlot(const MachineInstr &MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const {
+ if (MI.isBundle()) {
+ const MachineBasicBlock *MBB = MI.getParent();
+ MachineBasicBlock::const_instr_iterator MII = MI.getIterator();
+ for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII)
+ if (TargetInstrInfo::hasStoreToStackSlot(*MII, MMO, FrameIndex))
+ return true;
+ return false;
}
- return 0;
+ return TargetInstrInfo::hasStoreToStackSlot(MI, MMO, FrameIndex);
}
/// This function can analyze one/two way branching only and should (mostly) be
@@ -383,7 +419,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
I = MBB.instr_end();
--I;
- while (I->isDebugValue()) {
+ while (I->isDebugInstr()) {
if (I == MBB.instr_begin())
return false;
--I;
@@ -394,7 +430,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
// Delete the J2_jump if it's equivalent to a fall-through.
if (AllowModify && JumpToBlock &&
MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
- DEBUG(dbgs() << "\nErasing the jump to successor block\n";);
+ LLVM_DEBUG(dbgs() << "\nErasing the jump to successor block\n";);
I->eraseFromParent();
I = MBB.instr_end();
if (I == MBB.instr_begin())
@@ -463,8 +499,8 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
Cond.push_back(LastInst->getOperand(1));
return false;
}
- DEBUG(dbgs() << "\nCant analyze " << printMBBReference(MBB)
- << " with one jump\n";);
+ LLVM_DEBUG(dbgs() << "\nCant analyze " << printMBBReference(MBB)
+ << " with one jump\n";);
// Otherwise, don't know what this is.
return true;
}
@@ -511,8 +547,8 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
FBB = LastInst->getOperand(0).getMBB();
return false;
}
- DEBUG(dbgs() << "\nCant analyze " << printMBBReference(MBB)
- << " with two jumps";);
+ LLVM_DEBUG(dbgs() << "\nCant analyze " << printMBBReference(MBB)
+ << " with two jumps";);
// Otherwise, can't handle this.
return true;
}
@@ -521,12 +557,12 @@ unsigned HexagonInstrInfo::removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved) const {
assert(!BytesRemoved && "code size not handled");
- DEBUG(dbgs() << "\nRemoving branches out of " << printMBBReference(MBB));
+ LLVM_DEBUG(dbgs() << "\nRemoving branches out of " << printMBBReference(MBB));
MachineBasicBlock::iterator I = MBB.end();
unsigned Count = 0;
while (I != MBB.begin()) {
--I;
- if (I->isDebugValue())
+ if (I->isDebugInstr())
continue;
// Only removing branches from end of MBB.
if (!I->isBranch())
@@ -593,7 +629,8 @@ unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB,
// (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset)
// (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset)
unsigned Flags1 = getUndefRegState(Cond[1].isUndef());
- DEBUG(dbgs() << "\nInserting NVJump for " << printMBBReference(MBB););
+ LLVM_DEBUG(dbgs() << "\nInserting NVJump for "
+ << printMBBReference(MBB););
if (Cond[2].isReg()) {
unsigned Flags2 = getUndefRegState(Cond[2].isUndef());
BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
@@ -864,7 +901,7 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
.addFrameIndex(FI).addImm(0)
.addReg(SrcReg, KillFlag).addMemOperand(MMO);
} else if (Hexagon::ModRegsRegClass.hasSubClassEq(RC)) {
- BuildMI(MBB, I, DL, get(Hexagon::STriw_mod))
+ BuildMI(MBB, I, DL, get(Hexagon::STriw_ctr))
.addFrameIndex(FI).addImm(0)
.addReg(SrcReg, KillFlag).addMemOperand(MMO);
} else if (Hexagon::HvxQRRegClass.hasSubClassEq(RC)) {
@@ -926,7 +963,7 @@ void HexagonInstrInfo::loadRegFromStackSlot(
BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
} else if (Hexagon::ModRegsRegClass.hasSubClassEq(RC)) {
- BuildMI(MBB, I, DL, get(Hexagon::LDriw_mod), DestReg)
+ BuildMI(MBB, I, DL, get(Hexagon::LDriw_ctr), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
} else if (Hexagon::HvxQRRegClass.hasSubClassEq(RC)) {
BuildMI(MBB, I, DL, get(Hexagon::PS_vloadrq_ai), DestReg)
@@ -980,6 +1017,20 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
DebugLoc DL = MI.getDebugLoc();
unsigned Opc = MI.getOpcode();
+ auto RealCirc = [&](unsigned Opc, bool HasImm, unsigned MxOp) {
+ unsigned Mx = MI.getOperand(MxOp).getReg();
+ unsigned CSx = (Mx == Hexagon::M0 ? Hexagon::CS0 : Hexagon::CS1);
+ BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrrcr), CSx)
+ .add(MI.getOperand((HasImm ? 5 : 4)));
+ auto MIB = BuildMI(MBB, MI, DL, get(Opc)).add(MI.getOperand(0))
+ .add(MI.getOperand(1)).add(MI.getOperand(2)).add(MI.getOperand(3));
+ if (HasImm)
+ MIB.add(MI.getOperand(4));
+ MIB.addReg(CSx, RegState::Implicit);
+ MBB.erase(MI);
+ return true;
+ };
+
switch (Opc) {
case TargetOpcode::COPY: {
MachineOperand &MD = MI.getOperand(0);
@@ -1088,6 +1139,28 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MBB.erase(MI);
return true;
}
+ case Hexagon::PS_qtrue: {
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_veqw), MI.getOperand(0).getReg())
+ .addReg(Hexagon::V0, RegState::Undef)
+ .addReg(Hexagon::V0, RegState::Undef);
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::PS_qfalse: {
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vgtw), MI.getOperand(0).getReg())
+ .addReg(Hexagon::V0, RegState::Undef)
+ .addReg(Hexagon::V0, RegState::Undef);
+ MBB.erase(MI);
+ return true;
+ }
+ case Hexagon::PS_vdd0: {
+ unsigned Vd = MI.getOperand(0).getReg();
+ BuildMI(MBB, MI, DL, get(Hexagon::V6_vsubw_dv), Vd)
+ .addReg(Vd, RegState::Undef)
+ .addReg(Vd, RegState::Undef);
+ MBB.erase(MI);
+ return true;
+ }
case Hexagon::PS_vmulw: {
// Expand a 64-bit vector multiply into 2 32-bit scalar multiplies.
unsigned DstReg = MI.getOperand(0).getReg();
@@ -1344,6 +1417,50 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MBB.erase(MI);
return true;
+ case Hexagon::PS_loadrub_pci:
+ return RealCirc(Hexagon::L2_loadrub_pci, /*HasImm*/true, /*MxOp*/4);
+ case Hexagon::PS_loadrb_pci:
+ return RealCirc(Hexagon::L2_loadrb_pci, /*HasImm*/true, /*MxOp*/4);
+ case Hexagon::PS_loadruh_pci:
+ return RealCirc(Hexagon::L2_loadruh_pci, /*HasImm*/true, /*MxOp*/4);
+ case Hexagon::PS_loadrh_pci:
+ return RealCirc(Hexagon::L2_loadrh_pci, /*HasImm*/true, /*MxOp*/4);
+ case Hexagon::PS_loadri_pci:
+ return RealCirc(Hexagon::L2_loadri_pci, /*HasImm*/true, /*MxOp*/4);
+ case Hexagon::PS_loadrd_pci:
+ return RealCirc(Hexagon::L2_loadrd_pci, /*HasImm*/true, /*MxOp*/4);
+ case Hexagon::PS_loadrub_pcr:
+ return RealCirc(Hexagon::L2_loadrub_pcr, /*HasImm*/false, /*MxOp*/3);
+ case Hexagon::PS_loadrb_pcr:
+ return RealCirc(Hexagon::L2_loadrb_pcr, /*HasImm*/false, /*MxOp*/3);
+ case Hexagon::PS_loadruh_pcr:
+ return RealCirc(Hexagon::L2_loadruh_pcr, /*HasImm*/false, /*MxOp*/3);
+ case Hexagon::PS_loadrh_pcr:
+ return RealCirc(Hexagon::L2_loadrh_pcr, /*HasImm*/false, /*MxOp*/3);
+ case Hexagon::PS_loadri_pcr:
+ return RealCirc(Hexagon::L2_loadri_pcr, /*HasImm*/false, /*MxOp*/3);
+ case Hexagon::PS_loadrd_pcr:
+ return RealCirc(Hexagon::L2_loadrd_pcr, /*HasImm*/false, /*MxOp*/3);
+ case Hexagon::PS_storerb_pci:
+ return RealCirc(Hexagon::S2_storerb_pci, /*HasImm*/true, /*MxOp*/3);
+ case Hexagon::PS_storerh_pci:
+ return RealCirc(Hexagon::S2_storerh_pci, /*HasImm*/true, /*MxOp*/3);
+ case Hexagon::PS_storerf_pci:
+ return RealCirc(Hexagon::S2_storerf_pci, /*HasImm*/true, /*MxOp*/3);
+ case Hexagon::PS_storeri_pci:
+ return RealCirc(Hexagon::S2_storeri_pci, /*HasImm*/true, /*MxOp*/3);
+ case Hexagon::PS_storerd_pci:
+ return RealCirc(Hexagon::S2_storerd_pci, /*HasImm*/true, /*MxOp*/3);
+ case Hexagon::PS_storerb_pcr:
+ return RealCirc(Hexagon::S2_storerb_pcr, /*HasImm*/false, /*MxOp*/2);
+ case Hexagon::PS_storerh_pcr:
+ return RealCirc(Hexagon::S2_storerh_pcr, /*HasImm*/false, /*MxOp*/2);
+ case Hexagon::PS_storerf_pcr:
+ return RealCirc(Hexagon::S2_storerf_pcr, /*HasImm*/false, /*MxOp*/2);
+ case Hexagon::PS_storeri_pcr:
+ return RealCirc(Hexagon::S2_storeri_pcr, /*HasImm*/false, /*MxOp*/2);
+ case Hexagon::PS_storerd_pcr:
+ return RealCirc(Hexagon::S2_storerd_pcr, /*HasImm*/false, /*MxOp*/2);
}
return false;
@@ -1393,7 +1510,7 @@ bool HexagonInstrInfo::PredicateInstruction(
MachineInstr &MI, ArrayRef<MachineOperand> Cond) const {
if (Cond.empty() || isNewValueJump(Cond[0].getImm()) ||
isEndLoopN(Cond[0].getImm())) {
- DEBUG(dbgs() << "\nCannot predicate:"; MI.dump(););
+ LLVM_DEBUG(dbgs() << "\nCannot predicate:"; MI.dump(););
return false;
}
int Opc = MI.getOpcode();
@@ -1483,7 +1600,7 @@ bool HexagonInstrInfo::isPredicable(const MachineInstr &MI) const {
}
// HVX loads are not predicable on v60, but are on v62.
- if (!Subtarget.hasV62TOps()) {
+ if (!Subtarget.hasV62Ops()) {
switch (MI.getOpcode()) {
case Hexagon::V6_vL32b_ai:
case Hexagon::V6_vL32b_pi:
@@ -1518,7 +1635,7 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
// considered a scheduling hazard, which is wrong. It should be the actual
// instruction preceding the dbg_value instruction(s), just like it is
// when debug info is not present.
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
return false;
// Throwing call is a boundary.
@@ -1586,7 +1703,7 @@ HexagonInstrInfo::CreateTargetPostRAHazardRecognizer(
return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
}
-/// \brief For a comparison instruction, return the source registers in
+/// For a comparison instruction, return the source registers in
/// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
@@ -1836,6 +1953,10 @@ bool HexagonInstrInfo::isAccumulator(const MachineInstr &MI) const {
return((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask);
}
+bool HexagonInstrInfo::isBaseImmOffset(const MachineInstr &MI) const {
+ return getAddrMode(MI) == HexagonII::BaseImmOffset;
+}
+
bool HexagonInstrInfo::isComplex(const MachineInstr &MI) const {
return !isTC1(MI) && !isTC2Early(MI) && !MI.getDesc().mayLoad() &&
!MI.getDesc().mayStore() &&
@@ -2139,13 +2260,13 @@ bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI,
bool isLate = isLateResultInstr(LRMI);
bool isEarly = isEarlySourceInstr(ESMI);
- DEBUG(dbgs() << "V60" << (isLate ? "-LR " : " -- "));
- DEBUG(LRMI.dump());
- DEBUG(dbgs() << "V60" << (isEarly ? "-ES " : " -- "));
- DEBUG(ESMI.dump());
+ LLVM_DEBUG(dbgs() << "V60" << (isLate ? "-LR " : " -- "));
+ LLVM_DEBUG(LRMI.dump());
+ LLVM_DEBUG(dbgs() << "V60" << (isEarly ? "-ES " : " -- "));
+ LLVM_DEBUG(ESMI.dump());
if (isLate && isEarly) {
- DEBUG(dbgs() << "++Is Late Result feeding Early Source\n");
+ LLVM_DEBUG(dbgs() << "++Is Late Result feeding Early Source\n");
return true;
}
@@ -2472,6 +2593,13 @@ bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, int Offset) const {
case MVT::i16:
case MVT::i32:
case MVT::i64:
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v2i16:
+ case MVT::v2i32:
+ case MVT::v4i8:
+ case MVT::v4i16:
+ case MVT::v8i8:
return isInt<4>(Count);
// For HVX vectors the auto-inc is s3
case MVT::v64i8:
@@ -2599,8 +2727,8 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
// any size. Later pass knows how to handle it.
case Hexagon::STriw_pred:
case Hexagon::LDriw_pred:
- case Hexagon::STriw_mod:
- case Hexagon::LDriw_mod:
+ case Hexagon::STriw_ctr:
+ case Hexagon::LDriw_ctr:
return true;
case Hexagon::PS_fi:
@@ -2754,7 +2882,7 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1,
return false;
}
-/// \brief Get the base register and byte offset of a load/store instr.
+/// Get the base register and byte offset of a load/store instr.
bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt,
unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI)
const {
@@ -2765,7 +2893,7 @@ bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt,
return BaseReg != 0;
}
-/// \brief Can these instructions execute at the same time in a bundle.
+/// Can these instructions execute at the same time in a bundle.
bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First,
const MachineInstr &Second) const {
if (Second.mayStore() && First.getOpcode() == Hexagon::S2_allocframe) {
@@ -2860,11 +2988,14 @@ bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B)
bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr &MI) const {
const uint64_t F = MI.getDesc().TSFlags;
return ((F >> HexagonII::mayCVLoadPos) & HexagonII::mayCVLoadMask) &&
- Subtarget.hasV60TOps();
+ Subtarget.hasV60Ops();
}
// Returns true, if a ST insn can be promoted to a new-value store.
bool HexagonInstrInfo::mayBeNewStore(const MachineInstr &MI) const {
+ if (MI.mayStore() && !Subtarget.useNewValueStores())
+ return false;
+
const uint64_t F = MI.getDesc().TSFlags;
return (F >> HexagonII::mayNVStorePos) & HexagonII::mayNVStoreMask;
}
@@ -2917,10 +3048,29 @@ bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI,
return false;
}
- // Hexagon Programmer's Reference says that decbin, memw_locked, and
- // memd_locked cannot be used as .new as well,
- // but we don't seem to have these instructions defined.
- return MI.getOpcode() != Hexagon::A4_tlbmatch;
+ // Instruction that produce late predicate cannot be used as sources of
+ // dot-new.
+ switch (MI.getOpcode()) {
+ case Hexagon::A4_addp_c:
+ case Hexagon::A4_subp_c:
+ case Hexagon::A4_tlbmatch:
+ case Hexagon::A5_ACS:
+ case Hexagon::F2_sfinvsqrta:
+ case Hexagon::F2_sfrecipa:
+ case Hexagon::J2_endloop0:
+ case Hexagon::J2_endloop01:
+ case Hexagon::J2_ploop1si:
+ case Hexagon::J2_ploop1sr:
+ case Hexagon::J2_ploop2si:
+ case Hexagon::J2_ploop2sr:
+ case Hexagon::J2_ploop3si:
+ case Hexagon::J2_ploop3sr:
+ case Hexagon::S2_cabacdecbin:
+ case Hexagon::S2_storew_locked:
+ case Hexagon::S4_stored_locked:
+ return false;
+ }
+ return true;
}
bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const {
@@ -3047,7 +3197,7 @@ SmallVector<MachineInstr*, 2> HexagonInstrInfo::getBranchingInstrs(
I = MBB.instr_end();
--I;
- while (I->isDebugValue()) {
+ while (I->isDebugInstr()) {
if (I == MBB.instr_begin())
return Jumpers;
--I;
@@ -3496,7 +3646,7 @@ int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const {
assert(NewOp >= 0 && "Couldn't change new-value store to its old form.");
}
- if (Subtarget.hasV60TOps())
+ if (Subtarget.hasV60Ops())
return NewOp;
// Subtargets prior to V60 didn't support 'taken' forms of predicated jumps.
@@ -3893,9 +4043,9 @@ int HexagonInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
// Get DefIdx and UseIdx for super registers.
- MachineOperand DefMO = DefMI.getOperand(DefIdx);
+ const MachineOperand &DefMO = DefMI.getOperand(DefIdx);
- if (HRI.isPhysicalRegister(DefMO.getReg())) {
+ if (DefMO.isReg() && HRI.isPhysicalRegister(DefMO.getReg())) {
if (DefMO.isImplicit()) {
for (MCSuperRegIterator SR(DefMO.getReg(), &HRI); SR.isValid(); ++SR) {
int Idx = DefMI.findRegisterDefOperandIdx(*SR, false, false, &HRI);
@@ -3906,7 +4056,7 @@ int HexagonInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
}
}
- MachineOperand UseMO = UseMI.getOperand(UseIdx);
+ const MachineOperand &UseMO = UseMI.getOperand(UseIdx);
if (UseMO.isImplicit()) {
for (MCSuperRegIterator SR(UseMO.getReg(), &HRI); SR.isValid(); ++SR) {
int Idx = UseMI.findRegisterUseOperandIdx(*SR, false, &HRI);
@@ -4057,7 +4207,7 @@ bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond,
return false;
assert(Cond.size() == 2);
if (isNewValueJump(Cond[0].getImm()) || Cond[1].isMBB()) {
- DEBUG(dbgs() << "No predregs for new-value jumps/endloop");
+ LLVM_DEBUG(dbgs() << "No predregs for new-value jumps/endloop");
return false;
}
PredReg = Cond[1].getReg();
@@ -4084,7 +4234,7 @@ short HexagonInstrInfo::getRegForm(const MachineInstr &MI) const {
// use a constant extender, which requires another 4 bytes.
// For debug instructions and prolog labels, return 0.
unsigned HexagonInstrInfo::getSize(const MachineInstr &MI) const {
- if (MI.isDebugValue() || MI.isPosition())
+ if (MI.isDebugInstr() || MI.isPosition())
return 0;
unsigned Size = MI.getDesc().getSize();
@@ -4159,9 +4309,9 @@ void HexagonInstrInfo::immediateExtend(MachineInstr &MI) const {
bool HexagonInstrInfo::invertAndChangeJumpTarget(
MachineInstr &MI, MachineBasicBlock *NewTarget) const {
- DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to "
- << printMBBReference(*NewTarget);
- MI.dump(););
+ LLVM_DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to "
+ << printMBBReference(*NewTarget);
+ MI.dump(););
assert(MI.isBranch());
unsigned NewOpcode = getInvertedPredicatedOpcode(MI.getOpcode());
int TargetPos = MI.getNumOperands() - 1;
@@ -4189,8 +4339,9 @@ void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const {
for (unsigned insn = TargetOpcode::GENERIC_OP_END+1;
insn < Hexagon::INSTRUCTION_LIST_END; ++insn) {
NewMI = BuildMI(B, I, DL, get(insn));
- DEBUG(dbgs() << "\n" << getName(NewMI->getOpcode()) <<
- " Class: " << NewMI->getDesc().getSchedClass());
+ LLVM_DEBUG(dbgs() << "\n"
+ << getName(NewMI->getOpcode())
+ << " Class: " << NewMI->getDesc().getSchedClass());
NewMI->eraseFromParent();
}
/* --- The code above is used to generate complete set of Hexagon Insn --- */
@@ -4200,7 +4351,7 @@ void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const {
// p -> NotP
// NotP -> P
bool HexagonInstrInfo::reversePredSense(MachineInstr &MI) const {
- DEBUG(dbgs() << "\nTrying to reverse pred. sense of:"; MI.dump());
+ LLVM_DEBUG(dbgs() << "\nTrying to reverse pred. sense of:"; MI.dump());
MI.setDesc(get(getInvertedPredicatedOpcode(MI.getOpcode())));
return true;
}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 4530d3b999cc..96b4ffaba02f 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -18,9 +18,9 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/MachineValueType.h"
#include <cstdint>
#include <vector>
@@ -66,6 +66,20 @@ public:
unsigned isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
+ /// Check if the instruction or the bundle of instructions has
+ /// load from stack slots. Return the frameindex and machine memory operand
+ /// if true.
+ bool hasLoadFromStackSlot(const MachineInstr &MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const override;
+
+ /// Check if the instruction or the bundle of instructions has
+ /// store to stack slots. Return the frameindex and machine memory operand
+ /// if true.
+ bool hasStoreToStackSlot(const MachineInstr &MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const override;
+
/// Analyze the branching code at the end of MBB, returning
/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
/// implemented for a target). Upon success, this returns false and returns
@@ -122,8 +136,8 @@ public:
bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
MachineInstr *&CmpInst) const override;
- /// Generate code to reduce the loop iteration by one and check if the loop is
- /// finished. Return the value/register of the the new loop count. We need
+ /// Generate code to reduce the loop iteration by one and check if the loop
+ /// is finished. Return the value/register of the new loop count. We need
/// this function when peeling off one or more iterations of a loop. This
/// function assumes the nth iteration is peeled first.
unsigned reduceLoopCount(MachineBasicBlock &MBB,
@@ -201,7 +215,7 @@ public:
/// anything was changed.
bool expandPostRAPseudo(MachineInstr &MI) const override;
- /// \brief Get the base register and byte offset of a load/store instr.
+ /// Get the base register and byte offset of a load/store instr.
bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
int64_t &Offset,
const TargetRegisterInfo *TRI) const override;
@@ -332,7 +346,11 @@ public:
/// HexagonInstrInfo specifics.
unsigned createVR(MachineFunction *MF, MVT VT) const;
+ MachineInstr *findLoopInstr(MachineBasicBlock *BB, unsigned EndLoopOp,
+ MachineBasicBlock *TargetBB,
+ SmallPtrSet<MachineBasicBlock *, 8> &Visited) const;
+ bool isBaseImmOffset(const MachineInstr &MI) const;
bool isAbsoluteSet(const MachineInstr &MI) const;
bool isAccumulator(const MachineInstr &MI) const;
bool isAddrModeWithOffset(const MachineInstr &MI) const;
diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td
index 1df143de6e80..b25e316709c5 100644
--- a/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -807,7 +807,6 @@ def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32_0ImmPred, s8_0ImmPred>;
// Shift halfword
def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>;
def : T_R_pat<A2_asrh, int_hexagon_A2_asrh>;
-def : T_R_pat<A2_asrh, int_hexagon_SI_to_SXTHI_asrh>;
// Sign/zero extend
def : T_R_pat<A2_sxth, int_hexagon_A2_sxth>;
@@ -1353,11 +1352,11 @@ class T_stb_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Val>
: Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru),
(MI I32:$Rs, I32:$Ru, Val:$Rt)>;
-def : T_stb_pat <S2_storerh_pbr, int_hexagon_brev_sth, I32>;
-def : T_stb_pat <S2_storerb_pbr, int_hexagon_brev_stb, I32>;
-def : T_stb_pat <S2_storeri_pbr, int_hexagon_brev_stw, I32>;
-def : T_stb_pat <S2_storerf_pbr, int_hexagon_brev_sthhi, I32>;
-def : T_stb_pat <S2_storerd_pbr, int_hexagon_brev_std, I64>;
+def : T_stb_pat <S2_storerh_pbr, int_hexagon_S2_storerh_pbr, I32>;
+def : T_stb_pat <S2_storerb_pbr, int_hexagon_S2_storerb_pbr, I32>;
+def : T_stb_pat <S2_storeri_pbr, int_hexagon_S2_storeri_pbr, I32>;
+def : T_stb_pat <S2_storerf_pbr, int_hexagon_S2_storerf_pbr, I32>;
+def : T_stb_pat <S2_storerd_pbr, int_hexagon_S2_storerd_pbr, I64>;
class T_stc_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Imm, PatLeaf Val>
: Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s),
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV5.td b/lib/Target/Hexagon/HexagonIntrinsicsV5.td
index f27a63e20e61..29f67cffcf89 100644
--- a/lib/Target/Hexagon/HexagonIntrinsicsV5.td
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV5.td
@@ -9,7 +9,7 @@
//Rdd[+]=vrmpybsu(Rss,Rtt)
//Rdd[+]=vrmpybuu(Rss,Rtt)
-let Predicates = [HasV5T] in {
+let Predicates = [HasV5] in {
def : T_PP_pat <M5_vrmpybsu, int_hexagon_M5_vrmpybsu>;
def : T_PP_pat <M5_vrmpybuu, int_hexagon_M5_vrmpybuu>;
diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index fd602257934a..f9ed03909233 100644
--- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -56,7 +57,7 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils.h"
#include <algorithm>
#include <array>
#include <cassert>
@@ -243,8 +244,8 @@ namespace {
const Value *V;
};
- raw_ostream &operator<< (raw_ostream &OS, const PE &P) LLVM_ATTRIBUTE_USED;
- raw_ostream &operator<< (raw_ostream &OS, const PE &P) {
+ LLVM_ATTRIBUTE_USED
+ raw_ostream &operator<<(raw_ostream &OS, const PE &P) {
P.C.print(OS, P.V ? P.V : P.C.Root);
return OS;
}
@@ -608,9 +609,9 @@ namespace {
unsigned getInverseMxN(unsigned QP);
Value *generate(BasicBlock::iterator At, ParsedValues &PV);
- void setupSimplifier();
+ void setupPreSimplifier(Simplifier &S);
+ void setupPostSimplifier(Simplifier &S);
- Simplifier Simp;
Loop *CurLoop;
const DataLayout &DL;
const DominatorTree &DT;
@@ -985,6 +986,7 @@ bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
case Instruction::Xor:
case Instruction::LShr: // Shift right is ok.
case Instruction::Select:
+ case Instruction::Trunc:
return true;
case Instruction::ICmp:
if (CmpInst *CI = cast<CmpInst>(In))
@@ -998,6 +1000,8 @@ bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
IntegerType *DestTy, BasicBlock *LoopB) {
+ Type *OrigTy = In->getType();
+
// Leave boolean values alone.
if (!In->getType()->isIntegerTy(1))
In->mutateType(DestTy);
@@ -1028,6 +1032,14 @@ void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
Z->eraseFromParent();
return;
}
+ if (TruncInst *T = dyn_cast<TruncInst>(In)) {
+ IntegerType *TruncTy = cast<IntegerType>(OrigTy);
+ Value *Mask = ConstantInt::get(DestTy, (1u << TruncTy->getBitWidth()) - 1);
+ Value *And = IRBuilder<>(In).CreateAnd(T->getOperand(0), Mask);
+ T->replaceAllUsesWith(And);
+ T->eraseFromParent();
+ return;
+ }
// Promote immediates.
for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
@@ -1050,14 +1062,11 @@ bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
// Check if the exit values have types that are no wider than the type
// that we want to promote to.
unsigned DestBW = DestTy->getBitWidth();
- for (Instruction &In : *ExitB) {
- PHINode *P = dyn_cast<PHINode>(&In);
- if (!P)
- break;
- if (P->getNumIncomingValues() != 1)
+ for (PHINode &P : ExitB->phis()) {
+ if (P.getNumIncomingValues() != 1)
return false;
- assert(P->getIncomingBlock(0) == LoopB);
- IntegerType *T = dyn_cast<IntegerType>(P->getType());
+ assert(P.getIncomingBlock(0) == LoopB);
+ IntegerType *T = dyn_cast<IntegerType>(P.getType());
if (!T || T->getBitWidth() > DestBW)
return false;
}
@@ -1572,8 +1581,8 @@ static bool hasZeroSignBit(const Value *V) {
return false;
}
-void PolynomialMultiplyRecognize::setupSimplifier() {
- Simp.addRule("sink-zext",
+void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) {
+ S.addRule("sink-zext",
// Sink zext past bitwise operations.
[](Instruction *I, LLVMContext &Ctx) -> Value* {
if (I->getOpcode() != Instruction::ZExt)
@@ -1594,7 +1603,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() {
B.CreateZExt(T->getOperand(0), I->getType()),
B.CreateZExt(T->getOperand(1), I->getType()));
});
- Simp.addRule("xor/and -> and/xor",
+ S.addRule("xor/and -> and/xor",
// (xor (and x a) (and y a)) -> (and (xor x y) a)
[](Instruction *I, LLVMContext &Ctx) -> Value* {
if (I->getOpcode() != Instruction::Xor)
@@ -1612,7 +1621,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() {
return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
And0->getOperand(1));
});
- Simp.addRule("sink binop into select",
+ S.addRule("sink binop into select",
// (Op (select c x y) z) -> (select c (Op x z) (Op y z))
// (Op x (select c y z)) -> (select c (Op x y) (Op x z))
[](Instruction *I, LLVMContext &Ctx) -> Value* {
@@ -1638,7 +1647,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() {
}
return nullptr;
});
- Simp.addRule("fold select-select",
+ S.addRule("fold select-select",
// (select c (select c x y) z) -> (select c x z)
// (select c x (select c y z)) -> (select c x z)
[](Instruction *I, LLVMContext &Ctx) -> Value* {
@@ -1657,7 +1666,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() {
}
return nullptr;
});
- Simp.addRule("or-signbit -> xor-signbit",
+ S.addRule("or-signbit -> xor-signbit",
// (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0)
[](Instruction *I, LLVMContext &Ctx) -> Value* {
if (I->getOpcode() != Instruction::Or)
@@ -1669,7 +1678,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() {
return nullptr;
return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb);
});
- Simp.addRule("sink lshr into binop",
+ S.addRule("sink lshr into binop",
// (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c))
[](Instruction *I, LLVMContext &Ctx) -> Value* {
if (I->getOpcode() != Instruction::LShr)
@@ -1691,7 +1700,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() {
B.CreateLShr(BitOp->getOperand(0), S),
B.CreateLShr(BitOp->getOperand(1), S));
});
- Simp.addRule("expose bitop-const",
+ S.addRule("expose bitop-const",
// (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b))
[](Instruction *I, LLVMContext &Ctx) -> Value* {
auto IsBitOp = [](unsigned Op) -> bool {
@@ -1720,16 +1729,44 @@ void PolynomialMultiplyRecognize::setupSimplifier() {
});
}
+void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) {
+ S.addRule("(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a",
+ [](Instruction *I, LLVMContext &Ctx) -> Value* {
+ if (I->getOpcode() != Instruction::And)
+ return nullptr;
+ Instruction *Xor = dyn_cast<Instruction>(I->getOperand(0));
+ ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (!Xor || !C0)
+ return nullptr;
+ if (Xor->getOpcode() != Instruction::Xor)
+ return nullptr;
+ Instruction *And0 = dyn_cast<Instruction>(Xor->getOperand(0));
+ Instruction *And1 = dyn_cast<Instruction>(Xor->getOperand(1));
+ // Pick the first non-null and.
+ if (!And0 || And0->getOpcode() != Instruction::And)
+ std::swap(And0, And1);
+ ConstantInt *C1 = dyn_cast<ConstantInt>(And0->getOperand(1));
+ if (!C1)
+ return nullptr;
+ uint32_t V0 = C0->getZExtValue();
+ uint32_t V1 = C1->getZExtValue();
+ if (V0 != (V0 & V1))
+ return nullptr;
+ IRBuilder<> B(Ctx);
+ return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1), C0);
+ });
+}
+
bool PolynomialMultiplyRecognize::recognize() {
- DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
- << *CurLoop << '\n');
+ LLVM_DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
+ << *CurLoop << '\n');
// Restrictions:
// - The loop must consist of a single block.
// - The iteration count must be known at compile-time.
// - The loop must have an induction variable starting from 0, and
// incremented in each iteration of the loop.
BasicBlock *LoopB = CurLoop->getHeader();
- DEBUG(dbgs() << "Loop header:\n" << *LoopB);
+ LLVM_DEBUG(dbgs() << "Loop header:\n" << *LoopB);
if (LoopB != CurLoop->getLoopLatch())
return false;
@@ -1749,10 +1786,12 @@ bool PolynomialMultiplyRecognize::recognize() {
Value *CIV = getCountIV(LoopB);
ParsedValues PV;
+ Simplifier PreSimp;
PV.IterCount = IterCount;
- DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount << '\n');
+ LLVM_DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount
+ << '\n');
- setupSimplifier();
+ setupPreSimplifier(PreSimp);
// Perform a preliminary scan of select instructions to see if any of them
// looks like a generator of the polynomial multiply steps. Assume that a
@@ -1775,9 +1814,9 @@ bool PolynomialMultiplyRecognize::recognize() {
continue;
Simplifier::Context C(SI);
- Value *T = Simp.simplify(C);
+ Value *T = PreSimp.simplify(C);
SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI;
- DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n');
+ LLVM_DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n');
if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) {
FoundPreScan = true;
if (SelI != SI) {
@@ -1790,7 +1829,7 @@ bool PolynomialMultiplyRecognize::recognize() {
}
if (!FoundPreScan) {
- DEBUG(dbgs() << "Have not found candidates for pmpy\n");
+ LLVM_DEBUG(dbgs() << "Have not found candidates for pmpy\n");
return false;
}
@@ -1801,6 +1840,24 @@ bool PolynomialMultiplyRecognize::recognize() {
// wide as the target's pmpy instruction.
if (!promoteTypes(LoopB, ExitB))
return false;
+ // Run post-promotion simplifications.
+ Simplifier PostSimp;
+ setupPostSimplifier(PostSimp);
+ for (Instruction &In : *LoopB) {
+ SelectInst *SI = dyn_cast<SelectInst>(&In);
+ if (!SI || !FeedsPHI(SI))
+ continue;
+ Simplifier::Context C(SI);
+ Value *T = PostSimp.simplify(C);
+ SelectInst *SelI = dyn_cast_or_null<SelectInst>(T);
+ if (SelI != SI) {
+ Value *NewSel = C.materialize(LoopB, SI->getIterator());
+ SI->replaceAllUsesWith(NewSel);
+ RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
+ }
+ break;
+ }
+
if (!convertShiftsToLeft(LoopB, ExitB, IterCount))
return false;
cleanupLoopBody(LoopB);
@@ -1812,14 +1869,14 @@ bool PolynomialMultiplyRecognize::recognize() {
SelectInst *SelI = dyn_cast<SelectInst>(&In);
if (!SelI)
continue;
- DEBUG(dbgs() << "scanSelect: " << *SelI << '\n');
+ LLVM_DEBUG(dbgs() << "scanSelect: " << *SelI << '\n');
FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false);
if (FoundScan)
break;
}
assert(FoundScan);
- DEBUG({
+ LLVM_DEBUG({
StringRef PP = (PV.M ? "(P+M)" : "P");
if (!PV.Inv)
dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";
@@ -1913,7 +1970,7 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
// Get the location that may be stored across the loop. Since the access
// is strided positively through memory, we say that the modified location
// starts at the pointer and has infinite size.
- uint64_t AccessSize = MemoryLocation::UnknownSize;
+ LocationSize AccessSize = MemoryLocation::UnknownSize;
// If the loop iterates a fixed number of times, we can refine the access
// size to be exactly the size of the memset, which is (BECount+1)*StoreSize
@@ -2083,7 +2140,6 @@ CleanupAndExit:
// pointer size if it isn't already.
LLVMContext &Ctx = SI->getContext();
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
- unsigned Alignment = std::min(SI->getAlignment(), LI->getAlignment());
DebugLoc DLoc = SI->getDebugLoc();
const SCEV *NumBytesS =
@@ -2217,12 +2273,14 @@ CleanupAndExit:
: CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy);
NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});
} else {
- NewCall = CondBuilder.CreateMemMove(StoreBasePtr, LoadBasePtr,
- NumBytes, Alignment);
+ NewCall = CondBuilder.CreateMemMove(StoreBasePtr, SI->getAlignment(),
+ LoadBasePtr, LI->getAlignment(),
+ NumBytes);
}
} else {
- NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr,
- NumBytes, Alignment);
+ NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(),
+ LoadBasePtr, LI->getAlignment(),
+ NumBytes);
// Okay, the memcpy has been formed. Zap the original store and
// anything that feeds into it.
RecursivelyDeleteTriviallyDeadInstructions(SI, TLI);
@@ -2230,15 +2288,16 @@ CleanupAndExit:
NewCall->setDebugLoc(DLoc);
- DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")
- << *NewCall << "\n"
- << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
- << " from store ptr=" << *StoreEv << " at: " << *SI << "\n");
+ LLVM_DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")
+ << *NewCall << "\n"
+ << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"
+ << " from store ptr=" << *StoreEv << " at: " << *SI
+ << "\n");
return true;
}
-// \brief Check if the instructions in Insts, together with their dependencies
+// Check if the instructions in Insts, together with their dependencies
// cover the loop in the sense that the loop could be safely eliminated once
// the instructions in Insts are removed.
bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index b1c549aa13fa..74c550ce8226 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
@@ -47,26 +48,46 @@ using namespace llvm;
static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure",
cl::Hidden, cl::ZeroOrMore, cl::init(false));
+static cl::opt<bool> UseNewerCandidate("use-newer-candidate",
+ cl::Hidden, cl::ZeroOrMore, cl::init(true));
+
static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level",
cl::Hidden, cl::ZeroOrMore, cl::init(1));
-static cl::opt<bool> TopUseShorterTie("top-use-shorter-tie",
- cl::Hidden, cl::ZeroOrMore, cl::init(false));
-
-static cl::opt<bool> BotUseShorterTie("bot-use-shorter-tie",
- cl::Hidden, cl::ZeroOrMore, cl::init(false));
-
-static cl::opt<bool> DisableTCTie("disable-tc-tie",
- cl::Hidden, cl::ZeroOrMore, cl::init(false));
-
// Check if the scheduler should penalize instructions that are available to
// early due to a zero-latency dependence.
static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden,
cl::ZeroOrMore, cl::init(true));
-/// Save the last formed packet
-void VLIWResourceModel::savePacket() {
- OldPacket = Packet;
+// This value is used to determine if a register class is a high pressure set.
+// We compute the maximum number of registers needed and divided by the total
+// available. Then, we compare the result to this value.
+static cl::opt<float> RPThreshold("hexagon-reg-pressure", cl::Hidden,
+ cl::init(0.75f), cl::desc("High register pressure threhold."));
+
+/// Return true if there is a dependence between SUd and SUu.
+static bool hasDependence(const SUnit *SUd, const SUnit *SUu,
+ const HexagonInstrInfo &QII) {
+ if (SUd->Succs.size() == 0)
+ return false;
+
+ // Enable .cur formation.
+ if (QII.mayBeCurLoad(*SUd->getInstr()))
+ return false;
+
+ if (QII.canExecuteInBundle(*SUd->getInstr(), *SUu->getInstr()))
+ return false;
+
+ for (const auto &S : SUd->Succs) {
+ // Since we do not add pseudos to packets, might as well
+ // ignore order dependencies.
+ if (S.isCtrl())
+ continue;
+
+ if (S.getSUnit() == SUu && S.getLatency() > 0)
+ return true;
+ }
+ return false;
}
/// Check if scheduling of this SU is possible
@@ -74,7 +95,7 @@ void VLIWResourceModel::savePacket() {
/// It is _not_ precise (statefull), it is more like
/// another heuristic. Many corner cases are figured
/// empirically.
-bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
+bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) {
if (!SU || !SU->getInstr())
return false;
@@ -94,49 +115,39 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
break;
}
- MachineFunction &MF = *SU->getInstr()->getParent()->getParent();
- auto &QII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+ MachineBasicBlock *MBB = SU->getInstr()->getParent();
+ auto &QST = MBB->getParent()->getSubtarget<HexagonSubtarget>();
+ const auto &QII = *QST.getInstrInfo();
// Now see if there are no other dependencies to instructions already
// in the packet.
- for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
- if (Packet[i]->Succs.size() == 0)
- continue;
-
- // Enable .cur formation.
- if (QII.mayBeCurLoad(*Packet[i]->getInstr()))
- continue;
-
- for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
- E = Packet[i]->Succs.end(); I != E; ++I) {
- // Since we do not add pseudos to packets, might as well
- // ignore order dependencies.
- if (I->isCtrl())
- continue;
-
- if (I->getSUnit() == SU)
+ if (IsTop) {
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+ if (hasDependence(Packet[i], SU, QII))
+ return false;
+ } else {
+ for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+ if (hasDependence(SU, Packet[i], QII))
return false;
- }
}
return true;
}
/// Keep track of available resources.
-bool VLIWResourceModel::reserveResources(SUnit *SU) {
+bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) {
bool startNewCycle = false;
// Artificially reset state.
if (!SU) {
ResourcesModel->clearResources();
- savePacket();
Packet.clear();
TotalPackets++;
return false;
}
- // If this SU does not fit in the packet
+ // If this SU does not fit in the packet or the packet is now full
// start a new one.
- if (!isResourceAvailable(SU)) {
+ if (!isResourceAvailable(SU, IsTop) ||
+ Packet.size() >= SchedModel->getIssueWidth()) {
ResourcesModel->clearResources();
- savePacket();
Packet.clear();
TotalPackets++;
startNewCycle = true;
@@ -161,24 +172,14 @@ bool VLIWResourceModel::reserveResources(SUnit *SU) {
Packet.push_back(SU);
#ifndef NDEBUG
- DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
+ LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
- DEBUG(dbgs() << "\t[" << i << "] SU(");
- DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
- DEBUG(Packet[i]->getInstr()->dump());
+ LLVM_DEBUG(dbgs() << "\t[" << i << "] SU(");
+ LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t");
+ LLVM_DEBUG(Packet[i]->getInstr()->dump());
}
#endif
- // If packet is now full, reset the state so in the next cycle
- // we start fresh.
- if (Packet.size() >= SchedModel->getIssueWidth()) {
- ResourcesModel->clearResources();
- savePacket();
- Packet.clear();
- TotalPackets++;
- startNewCycle = true;
- }
-
return startNewCycle;
}
@@ -186,37 +187,43 @@ bool VLIWResourceModel::reserveResources(SUnit *SU) {
/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
/// only includes instructions that have DAG nodes, not scheduling boundaries.
void VLIWMachineScheduler::schedule() {
- DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
- << printMBBReference(*BB) << " " << BB->getName() << " in_func "
- << BB->getParent()->getName() << " at loop depth "
- << MLI->getLoopDepth(BB) << " \n");
+ LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW "
+ << printMBBReference(*BB) << " " << BB->getName()
+ << " in_func " << BB->getParent()->getName()
+ << " at loop depth " << MLI->getLoopDepth(BB) << " \n");
buildDAGWithRegPressure();
+ Topo.InitDAGTopologicalSorting();
+
+ // Postprocess the DAG to add platform-specific artificial dependencies.
+ postprocessDAG();
+
SmallVector<SUnit*, 8> TopRoots, BotRoots;
findRootsAndBiasEdges(TopRoots, BotRoots);
// Initialize the strategy before modifying the DAG.
SchedImpl->initialize(this);
- DEBUG(unsigned maxH = 0;
- for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
- if (SUnits[su].getHeight() > maxH)
- maxH = SUnits[su].getHeight();
- dbgs() << "Max Height " << maxH << "\n";);
- DEBUG(unsigned maxD = 0;
- for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
- if (SUnits[su].getDepth() > maxD)
- maxD = SUnits[su].getDepth();
- dbgs() << "Max Depth " << maxD << "\n";);
- DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
- SUnits[su].dumpAll(this));
+ LLVM_DEBUG(unsigned maxH = 0;
+ for (unsigned su = 0, e = SUnits.size(); su != e;
+ ++su) if (SUnits[su].getHeight() > maxH) maxH =
+ SUnits[su].getHeight();
+ dbgs() << "Max Height " << maxH << "\n";);
+ LLVM_DEBUG(unsigned maxD = 0;
+ for (unsigned su = 0, e = SUnits.size(); su != e;
+ ++su) if (SUnits[su].getDepth() > maxD) maxD =
+ SUnits[su].getDepth();
+ dbgs() << "Max Depth " << maxD << "\n";);
+ LLVM_DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su]
+ .dumpAll(this));
initQueues(TopRoots, BotRoots);
bool IsTopNode = false;
while (true) {
- DEBUG(dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
+ LLVM_DEBUG(
+ dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
SUnit *SU = SchedImpl->pickNode(IsTopNode);
if (!SU) break;
@@ -225,16 +232,16 @@ void VLIWMachineScheduler::schedule() {
scheduleMI(SU, IsTopNode);
- updateQueues(SU, IsTopNode);
-
// Notify the scheduling strategy after updating the DAG.
SchedImpl->schedNode(SU, IsTopNode);
+
+ updateQueues(SU, IsTopNode);
}
assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
placeDebugValues();
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) << " ***\n";
dumpSchedule();
@@ -264,6 +271,15 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel());
+ const std::vector<unsigned> &MaxPressure =
+ DAG->getRegPressure().MaxSetPressure;
+ HighPressureSets.assign(MaxPressure.size(), 0);
+ for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) {
+ unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i);
+ HighPressureSets[i] =
+ ((float) MaxPressure[i] > ((float) Limit * RPThreshold));
+ }
+
assert((!ForceTopDown || !ForceBottomUp) &&
"-misched-topdown incompatible with -misched-bottomup");
}
@@ -364,8 +380,8 @@ void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() {
}
CheckPending = true;
- DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
- << CurrCycle << '\n');
+ LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle "
+ << CurrCycle << '\n');
}
/// Move the boundary of scheduled code by one SUnit.
@@ -383,18 +399,18 @@ void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) {
}
// Update DFA model.
- startNewCycle = ResourceModel->reserveResources(SU);
+ startNewCycle = ResourceModel->reserveResources(SU, isTop());
// Check the instruction group dispatch limit.
// TODO: Check if this SU must end a dispatch group.
IssueCount += SchedModel->getNumMicroOps(SU->getInstr());
if (startNewCycle) {
- DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
+ LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
bumpCycle();
}
else
- DEBUG(dbgs() << "*** IssueCount " << IssueCount
- << " at cycle " << CurrCycle << '\n');
+ LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle "
+ << CurrCycle << '\n');
}
/// Release pending ready nodes in to the available queue. This makes them
@@ -443,10 +459,18 @@ SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
if (CheckPending)
releasePending();
- for (unsigned i = 0; Available.empty(); ++i) {
+ auto AdvanceCycle = [this]() {
+ if (Available.empty())
+ return true;
+ if (Available.size() == 1 && Pending.size() > 0)
+ return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) ||
+ getWeakLeft(*Available.begin(), isTop()) != 0;
+ return false;
+ };
+ for (unsigned i = 0; AdvanceCycle(); ++i) {
assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
"permanent hazard"); (void)i;
- ResourceModel->reserveResources(nullptr);
+ ResourceModel->reserveResources(nullptr, isTop());
bumpCycle();
releasePending();
}
@@ -520,13 +544,31 @@ static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) {
return true;
}
+/// Check if the instruction changes the register pressure of a register in the
+/// high pressure set. The function returns a negative value if the pressure
+/// decreases and a positive value is the pressure increases. If the instruction
+/// doesn't use a high pressure register or doesn't change the register
+/// pressure, then return 0.
+int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) {
+ PressureDiff &PD = DAG->getPressureDiff(SU);
+ for (auto &P : PD) {
+ if (!P.isValid())
+ continue;
+ // The pressure differences are computed bottom-up, so the comparision for
+ // an increase is positive in the bottom direction, but negative in the
+ // top-down direction.
+ if (HighPressureSets[P.getPSet()])
+ return (isBotUp ? P.getUnitInc() : -P.getUnitInc());
+ }
+ return 0;
+}
+
// Constants used to denote relative importance of
// heuristic components for cost computation.
static const unsigned PriorityOne = 200;
static const unsigned PriorityTwo = 50;
static const unsigned PriorityThree = 75;
static const unsigned ScaleTwo = 10;
-static const unsigned FactorOne = 2;
/// Single point to compute overall scheduling cost.
/// TODO: More heuristics will be used soon.
@@ -541,20 +583,23 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
if (!SU || SU->isScheduled)
return ResCount;
- MachineInstr &Instr = *SU->getInstr();
-
- DEBUG(if (verbose) dbgs() << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
+ LLVM_DEBUG(if (verbose) dbgs()
+ << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
// Forced priority is high.
if (SU->isScheduleHigh) {
ResCount += PriorityOne;
- DEBUG(dbgs() << "H|");
+ LLVM_DEBUG(dbgs() << "H|");
}
+ unsigned IsAvailableAmt = 0;
// Critical path first.
if (Q.getID() == TopQID) {
- ResCount += (SU->getHeight() * ScaleTwo);
+ if (Top.isLatencyBound(SU)) {
+ LLVM_DEBUG(if (verbose) dbgs() << "LB|");
+ ResCount += (SU->getHeight() * ScaleTwo);
+ }
- DEBUG(if (verbose) {
+ LLVM_DEBUG(if (verbose) {
std::stringstream dbgstr;
dbgstr << "h" << std::setw(3) << SU->getHeight() << "|";
dbgs() << dbgstr.str();
@@ -562,16 +607,19 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
// If resources are available for it, multiply the
// chance of scheduling.
- if (Top.ResourceModel->isResourceAvailable(SU)) {
- ResCount <<= FactorOne;
- ResCount += PriorityThree;
- DEBUG(if (verbose) dbgs() << "A|");
+ if (Top.ResourceModel->isResourceAvailable(SU, true)) {
+ IsAvailableAmt = (PriorityTwo + PriorityThree);
+ ResCount += IsAvailableAmt;
+ LLVM_DEBUG(if (verbose) dbgs() << "A|");
} else
- DEBUG(if (verbose) dbgs() << " |");
+ LLVM_DEBUG(if (verbose) dbgs() << " |");
} else {
- ResCount += (SU->getDepth() * ScaleTwo);
+ if (Bot.isLatencyBound(SU)) {
+ LLVM_DEBUG(if (verbose) dbgs() << "LB|");
+ ResCount += (SU->getDepth() * ScaleTwo);
+ }
- DEBUG(if (verbose) {
+ LLVM_DEBUG(if (verbose) {
std::stringstream dbgstr;
dbgstr << "d" << std::setw(3) << SU->getDepth() << "|";
dbgs() << dbgstr.str();
@@ -579,12 +627,12 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
// If resources are available for it, multiply the
// chance of scheduling.
- if (Bot.ResourceModel->isResourceAvailable(SU)) {
- ResCount <<= FactorOne;
- ResCount += PriorityThree;
- DEBUG(if (verbose) dbgs() << "A|");
+ if (Bot.ResourceModel->isResourceAvailable(SU, false)) {
+ IsAvailableAmt = (PriorityTwo + PriorityThree);
+ ResCount += IsAvailableAmt;
+ LLVM_DEBUG(if (verbose) dbgs() << "A|");
} else
- DEBUG(if (verbose) dbgs() << " |");
+ LLVM_DEBUG(if (verbose) dbgs() << " |");
}
unsigned NumNodesBlocking = 0;
@@ -593,18 +641,20 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
// Look at all of the successors of this node.
// Count the number of nodes that
// this node is the sole unscheduled node for.
- for (const SDep &SI : SU->Succs)
- if (isSingleUnscheduledPred(SI.getSUnit(), SU))
- ++NumNodesBlocking;
+ if (Top.isLatencyBound(SU))
+ for (const SDep &SI : SU->Succs)
+ if (isSingleUnscheduledPred(SI.getSUnit(), SU))
+ ++NumNodesBlocking;
} else {
// How many unscheduled predecessors block this node?
- for (const SDep &PI : SU->Preds)
- if (isSingleUnscheduledSucc(PI.getSUnit(), SU))
- ++NumNodesBlocking;
+ if (Bot.isLatencyBound(SU))
+ for (const SDep &PI : SU->Preds)
+ if (isSingleUnscheduledSucc(PI.getSUnit(), SU))
+ ++NumNodesBlocking;
}
ResCount += (NumNodesBlocking * ScaleTwo);
- DEBUG(if (verbose) {
+ LLVM_DEBUG(if (verbose) {
std::stringstream dbgstr;
dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|";
dbgs() << dbgstr.str();
@@ -619,10 +669,17 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
// Decrease priority slightly if register pressure would increase over the
// current maximum.
ResCount -= (Delta.CurrentMax.getUnitInc()*PriorityTwo);
- DEBUG(if (verbose) {
- dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
- << Delta.CriticalMax.getUnitInc() <<"/"
- << Delta.CurrentMax.getUnitInc() << ")|";
+ // If there are register pressure issues, then we remove the value added for
+ // the instruction being available. The rationale is that we really don't
+ // want to schedule an instruction that causes a spill.
+ if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 &&
+ (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() ||
+ Delta.CurrentMax.getUnitInc()))
+ ResCount -= IsAvailableAmt;
+ LLVM_DEBUG(if (verbose) {
+ dbgs() << "RP " << Delta.Excess.getUnitInc() << "/"
+ << Delta.CriticalMax.getUnitInc() << "/"
+ << Delta.CurrentMax.getUnitInc() << ")|";
});
}
@@ -631,53 +688,39 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>();
auto &QII = *QST.getInstrInfo();
if (SU->isInstr() && QII.mayBeCurLoad(*SU->getInstr())) {
- if (Q.getID() == TopQID && Top.ResourceModel->isResourceAvailable(SU)) {
+ if (Q.getID() == TopQID &&
+ Top.ResourceModel->isResourceAvailable(SU, true)) {
ResCount += PriorityTwo;
- DEBUG(if (verbose) dbgs() << "C|");
+ LLVM_DEBUG(if (verbose) dbgs() << "C|");
} else if (Q.getID() == BotQID &&
- Bot.ResourceModel->isResourceAvailable(SU)) {
+ Bot.ResourceModel->isResourceAvailable(SU, false)) {
ResCount += PriorityTwo;
- DEBUG(if (verbose) dbgs() << "C|");
+ LLVM_DEBUG(if (verbose) dbgs() << "C|");
}
}
// Give preference to a zero latency instruction if the dependent
// instruction is in the current packet.
- if (Q.getID() == TopQID) {
+ if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) {
for (const SDep &PI : SU->Preds) {
if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() &&
PI.getLatency() == 0 &&
Top.ResourceModel->isInPacket(PI.getSUnit())) {
ResCount += PriorityThree;
- DEBUG(if (verbose) dbgs() << "Z|");
+ LLVM_DEBUG(if (verbose) dbgs() << "Z|");
}
}
- } else {
+ } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) {
for (const SDep &SI : SU->Succs) {
if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() &&
SI.getLatency() == 0 &&
Bot.ResourceModel->isInPacket(SI.getSUnit())) {
ResCount += PriorityThree;
- DEBUG(if (verbose) dbgs() << "Z|");
+ LLVM_DEBUG(if (verbose) dbgs() << "Z|");
}
}
}
- // Give less preference to an instruction that will cause a stall with
- // an instruction in the previous packet.
- if (QII.isHVXVec(Instr)) {
- // Check for stalls in the previous packet.
- if (Q.getID() == TopQID) {
- for (auto J : Top.ResourceModel->OldPacket)
- if (QII.producesStall(*J->getInstr(), Instr))
- ResCount -= PriorityOne;
- } else {
- for (auto J : Bot.ResourceModel->OldPacket)
- if (QII.producesStall(Instr, *J->getInstr()))
- ResCount -= PriorityOne;
- }
- }
-
// If the instruction has a non-zero latency dependence with an instruction in
// the current packet, then it should not be scheduled yet. The case occurs
// when the dependent instruction is scheduled in a new packet, so the
@@ -689,7 +732,7 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
if (PI.getLatency() > 0 &&
Top.ResourceModel->isInPacket(PI.getSUnit())) {
ResCount -= PriorityOne;
- DEBUG(if (verbose) dbgs() << "D|");
+ LLVM_DEBUG(if (verbose) dbgs() << "D|");
}
}
} else {
@@ -697,13 +740,13 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
if (SI.getLatency() > 0 &&
Bot.ResourceModel->isInPacket(SI.getSUnit())) {
ResCount -= PriorityOne;
- DEBUG(if (verbose) dbgs() << "D|");
+ LLVM_DEBUG(if (verbose) dbgs() << "D|");
}
}
}
}
- DEBUG(if (verbose) {
+ LLVM_DEBUG(if (verbose) {
std::stringstream dbgstr;
dbgstr << "Total " << std::setw(4) << ResCount << ")";
dbgs() << dbgstr.str();
@@ -718,11 +761,12 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
/// DAG building. To adjust for the current scheduling location we need to
/// maintain the number of vreg uses remaining to be top-scheduled.
ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
-pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
+pickNodeFromQueue(VLIWSchedBoundary &Zone, const RegPressureTracker &RPTracker,
SchedCandidate &Candidate) {
- DEBUG(if (SchedDebugVerboseLevel > 1)
- readyQueueVerboseDump(RPTracker, Candidate, Q);
- else Q.dump(););
+ ReadyQueue &Q = Zone.Available;
+ LLVM_DEBUG(if (SchedDebugVerboseLevel > 1)
+ readyQueueVerboseDump(RPTracker, Candidate, Q);
+ else Q.dump(););
// getMaxPressureDelta temporarily modifies the tracker.
RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
@@ -739,7 +783,7 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
// Initialize the candidate if needed.
if (!Candidate.SU) {
- DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
+ LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost));
Candidate.SU = *I;
Candidate.RPDelta = RPDelta;
Candidate.SCost = CurrentCost;
@@ -747,9 +791,23 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
continue;
}
+ // Choose node order for negative cost candidates. There is no good
+ // candidate in this case.
+ if (CurrentCost < 0 && Candidate.SCost < 0) {
+ if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum)
+ || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
+ LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = NodeOrder;
+ }
+ continue;
+ }
+
// Best cost.
if (CurrentCost > Candidate.SCost) {
- DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
+ LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost));
Candidate.SU = *I;
Candidate.RPDelta = RPDelta;
Candidate.SCost = CurrentCost;
@@ -757,65 +815,53 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
continue;
}
- // Tie breaker using Timing Class.
- if (!DisableTCTie) {
- auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>();
- auto &QII = *QST.getInstrInfo();
-
- const MachineInstr *MI = (*I)->getInstr();
- const MachineInstr *CandI = Candidate.SU->getInstr();
- const InstrItineraryData *InstrItins = QST.getInstrItineraryData();
-
- unsigned InstrLatency = QII.getInstrTimingClassLatency(InstrItins, *MI);
- unsigned CandLatency = QII.getInstrTimingClassLatency(InstrItins, *CandI);
- DEBUG(dbgs() << "TC Tie Breaker Cand: "
- << CandLatency << " Instr:" << InstrLatency << "\n"
- << *MI << *CandI << "\n");
- if (Q.getID() == TopQID && CurrentCost == Candidate.SCost) {
- if (InstrLatency < CandLatency && TopUseShorterTie) {
- Candidate.SU = *I;
- Candidate.RPDelta = RPDelta;
- Candidate.SCost = CurrentCost;
- FoundCandidate = BestCost;
- DEBUG(dbgs() << "Used top shorter tie breaker\n");
- continue;
- } else if (InstrLatency > CandLatency && !TopUseShorterTie) {
- Candidate.SU = *I;
- Candidate.RPDelta = RPDelta;
- Candidate.SCost = CurrentCost;
- FoundCandidate = BestCost;
- DEBUG(dbgs() << "Used top longer tie breaker\n");
- continue;
- }
- } else if (Q.getID() == BotQID && CurrentCost == Candidate.SCost) {
- if (InstrLatency < CandLatency && BotUseShorterTie) {
- Candidate.SU = *I;
- Candidate.RPDelta = RPDelta;
- Candidate.SCost = CurrentCost;
- FoundCandidate = BestCost;
- DEBUG(dbgs() << "Used Bot shorter tie breaker\n");
- continue;
- } else if (InstrLatency > CandLatency && !BotUseShorterTie) {
- Candidate.SU = *I;
- Candidate.RPDelta = RPDelta;
- Candidate.SCost = CurrentCost;
- FoundCandidate = BestCost;
- DEBUG(dbgs() << "Used Bot longer tie breaker\n");
- continue;
- }
+ // Choose an instruction that does not depend on an artificial edge.
+ unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID));
+ unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID));
+ if (CurrWeak != CandWeak) {
+ if (CurrWeak < CandWeak) {
+ LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = Weak;
}
+ continue;
}
- if (CurrentCost == Candidate.SCost) {
- if ((Q.getID() == TopQID &&
- (*I)->Succs.size() > Candidate.SU->Succs.size()) ||
- (Q.getID() == BotQID &&
- (*I)->Preds.size() < Candidate.SU->Preds.size())) {
- DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
+ if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) {
+ unsigned CurrSize, CandSize;
+ if (Q.getID() == TopQID) {
+ CurrSize = (*I)->Succs.size();
+ CandSize = Candidate.SU->Succs.size();
+ } else {
+ CurrSize = (*I)->Preds.size();
+ CandSize = Candidate.SU->Preds.size();
+ }
+ if (CurrSize > CandSize) {
+ LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost));
Candidate.SU = *I;
Candidate.RPDelta = RPDelta;
Candidate.SCost = CurrentCost;
FoundCandidate = BestCost;
+ }
+ // Keep the old candidate if it's a better candidate. That is, don't use
+ // the subsequent tie breaker.
+ if (CurrSize != CandSize)
+ continue;
+ }
+
+ // Tie breaker.
+ // To avoid scheduling indeterminism, we need a tie breaker
+ // for the case when cost is identical for two nodes.
+ if (UseNewerCandidate && CurrentCost == Candidate.SCost) {
+ if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum)
+ || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) {
+ LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost));
+ Candidate.SU = *I;
+ Candidate.RPDelta = RPDelta;
+ Candidate.SCost = CurrentCost;
+ FoundCandidate = NodeOrder;
continue;
}
}
@@ -833,18 +879,18 @@ SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
// Schedule as far as possible in the direction of no choice. This is most
// efficient, but also provides the best heuristics for CriticalPSets.
if (SUnit *SU = Bot.pickOnlyChoice()) {
- DEBUG(dbgs() << "Picked only Bottom\n");
+ LLVM_DEBUG(dbgs() << "Picked only Bottom\n");
IsTopNode = false;
return SU;
}
if (SUnit *SU = Top.pickOnlyChoice()) {
- DEBUG(dbgs() << "Picked only Top\n");
+ LLVM_DEBUG(dbgs() << "Picked only Top\n");
IsTopNode = true;
return SU;
}
SchedCandidate BotCand;
// Prefer bottom scheduling when heuristics are silent.
- CandResult BotResult = pickNodeFromQueue(Bot.Available,
+ CandResult BotResult = pickNodeFromQueue(Bot,
DAG->getBotRPTracker(), BotCand);
assert(BotResult != NoCand && "failed to find the first candidate");
@@ -856,40 +902,40 @@ SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
// increase pressure for one of the excess PSets, then schedule in that
// direction first to provide more freedom in the other direction.
if (BotResult == SingleExcess || BotResult == SingleCritical) {
- DEBUG(dbgs() << "Prefered Bottom Node\n");
+ LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n");
IsTopNode = false;
return BotCand.SU;
}
// Check if the top Q has a better candidate.
SchedCandidate TopCand;
- CandResult TopResult = pickNodeFromQueue(Top.Available,
+ CandResult TopResult = pickNodeFromQueue(Top,
DAG->getTopRPTracker(), TopCand);
assert(TopResult != NoCand && "failed to find the first candidate");
if (TopResult == SingleExcess || TopResult == SingleCritical) {
- DEBUG(dbgs() << "Prefered Top Node\n");
+ LLVM_DEBUG(dbgs() << "Prefered Top Node\n");
IsTopNode = true;
return TopCand.SU;
}
// If either Q has a single candidate that minimizes pressure above the
// original region's pressure pick it.
if (BotResult == SingleMax) {
- DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
+ LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n");
IsTopNode = false;
return BotCand.SU;
}
if (TopResult == SingleMax) {
- DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
+ LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n");
IsTopNode = true;
return TopCand.SU;
}
if (TopCand.SCost > BotCand.SCost) {
- DEBUG(dbgs() << "Prefered Top Node Cost\n");
+ LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n");
IsTopNode = true;
return TopCand.SU;
}
// Otherwise prefer the bottom candidate in node order.
- DEBUG(dbgs() << "Prefered Bottom in Node order\n");
+ LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n");
IsTopNode = false;
return BotCand.SU;
}
@@ -907,7 +953,7 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
if (!SU) {
SchedCandidate TopCand;
CandResult TopResult =
- pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
+ pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
assert(TopResult != NoCand && "failed to find the first candidate");
(void)TopResult;
SU = TopCand.SU;
@@ -918,7 +964,7 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
if (!SU) {
SchedCandidate BotCand;
CandResult BotResult =
- pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
+ pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
assert(BotResult != NoCand && "failed to find the first candidate");
(void)BotResult;
SU = BotCand.SU;
@@ -932,10 +978,11 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
if (SU->isBottomReady())
Bot.removeReady(SU);
- DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
- << " Scheduling Instruction in cycle "
- << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
- SU->dump(DAG));
+ LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
+ << " Scheduling instruction in cycle "
+ << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " ("
+ << reportPackets() << ")\n";
+ SU->dump(DAG));
return SU;
}
@@ -945,10 +992,10 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
/// does.
void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
if (IsTopNode) {
- SU->TopReadyCycle = Top.CurrCycle;
Top.bumpNode(SU);
+ SU->TopReadyCycle = Top.CurrCycle;
} else {
- SU->BotReadyCycle = Bot.CurrCycle;
Bot.bumpNode(SU);
+ SU->BotReadyCycle = Bot.CurrCycle;
}
}
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
index bf7fe2d484a2..585a7858ad2b 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -49,9 +49,6 @@ class VLIWResourceModel {
unsigned TotalPackets = 0;
public:
- /// Save the last formed packet.
- std::vector<SUnit *> OldPacket;
-
VLIWResourceModel(const TargetSubtargetInfo &STI, const TargetSchedModel *SM)
: SchedModel(SM) {
ResourcesModel = STI.getInstrInfo()->CreateTargetScheduleState(STI);
@@ -62,8 +59,6 @@ public:
Packet.resize(SchedModel->getIssueWidth());
Packet.clear();
- OldPacket.resize(SchedModel->getIssueWidth());
- OldPacket.clear();
ResourcesModel->clearResources();
}
@@ -84,9 +79,8 @@ public:
ResourcesModel->clearResources();
}
- bool isResourceAvailable(SUnit *SU);
- bool reserveResources(SUnit *SU);
- void savePacket();
+ bool isResourceAvailable(SUnit *SU, bool IsTop);
+ bool reserveResources(SUnit *SU, bool IsTop);
unsigned getTotalPackets() const { return TotalPackets; }
bool isInPacket(SUnit *SU) const { return is_contained(Packet, SU); }
};
@@ -102,6 +96,9 @@ public:
/// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
/// time to do some work.
void schedule() override;
+
+ RegisterClassInfo *getRegClassInfo() { return RegClassInfo; }
+ int getBBSize() { return BB->size(); }
};
//===----------------------------------------------------------------------===//
@@ -129,7 +126,7 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy {
/// Represent the type of SchedCandidate found within a single queue.
enum CandResult {
NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure,
- BestCost};
+ BestCost, Weak};
/// Each Scheduling boundary is associated with ready queues. It tracks the
/// current cycle in whichever direction at has moved, and maintains the state
@@ -147,6 +144,7 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy {
unsigned CurrCycle = 0;
unsigned IssueCount = 0;
+ unsigned CriticalPathLength = 0;
/// MinReadyCycle - Cycle of the soonest available instruction.
unsigned MinReadyCycle = std::numeric_limits<unsigned>::max();
@@ -168,7 +166,27 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy {
void init(VLIWMachineScheduler *dag, const TargetSchedModel *smodel) {
DAG = dag;
SchedModel = smodel;
+ CurrCycle = 0;
IssueCount = 0;
+ // Initialize the critical path length limit, which used by the scheduling
+ // cost model to determine the value for scheduling an instruction. We use
+ // a slightly different heuristic for small and large functions. For small
+ // functions, it's important to use the height/depth of the instruction.
+ // For large functions, prioritizing by height or depth increases spills.
+ CriticalPathLength = DAG->getBBSize() / SchedModel->getIssueWidth();
+ if (DAG->getBBSize() < 50)
+ // We divide by two as a cheap and simple heuristic to reduce the
+ // critcal path length, which increases the priority of using the graph
+ // height/depth in the scheduler's cost computation.
+ CriticalPathLength >>= 1;
+ else {
+ // For large basic blocks, we prefer a larger critical path length to
+ // decrease the priority of using the graph height/depth.
+ unsigned MaxPath = 0;
+ for (auto &SU : DAG->SUnits)
+ MaxPath = std::max(MaxPath, isTop() ? SU.getHeight() : SU.getDepth());
+ CriticalPathLength = std::max(CriticalPathLength, MaxPath) + 1;
+ }
}
bool isTop() const {
@@ -188,6 +206,13 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy {
void removeReady(SUnit *SU);
SUnit *pickOnlyChoice();
+
+ bool isLatencyBound(SUnit *SU) {
+ if (CurrCycle >= CriticalPathLength)
+ return true;
+ unsigned PathLength = isTop() ? SU->getHeight() : SU->getDepth();
+ return CriticalPathLength - CurrCycle <= PathLength;
+ }
};
VLIWMachineScheduler *DAG = nullptr;
@@ -197,6 +222,9 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy {
VLIWSchedBoundary Top;
VLIWSchedBoundary Bot;
+ /// List of pressure sets that have a high pressure level in the region.
+ std::vector<bool> HighPressureSets;
+
public:
/// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
enum {
@@ -217,7 +245,7 @@ public:
void releaseBottomNode(SUnit *SU) override;
- unsigned ReportPackets() {
+ unsigned reportPackets() {
return Top.ResourceModel->getTotalPackets() +
Bot.ResourceModel->getTotalPackets();
}
@@ -225,11 +253,13 @@ public:
protected:
SUnit *pickNodeBidrectional(bool &IsTopNode);
+ int pressureChange(const SUnit *SU, bool isBotUp);
+
int SchedulingCost(ReadyQueue &Q,
SUnit *SU, SchedCandidate &Candidate,
RegPressureDelta &Delta, bool verbose);
- CandResult pickNodeFromQueue(ReadyQueue &Q,
+ CandResult pickNodeFromQueue(VLIWSchedBoundary &Zone,
const RegPressureTracker &RPTracker,
SchedCandidate &Candidate);
#ifndef NDEBUG
diff --git a/lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td b/lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td
index 718d3ac7d45a..c29a75e6fe74 100644
--- a/lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td
+++ b/lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td
@@ -7,80 +7,80 @@
//
//===----------------------------------------------------------------------===//
-def: Pat<(int_hexagon_A6_vcmpbeq_notany DoubleRegs:$src1, DoubleRegs:$src2), (A6_vcmpbeq_notany DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV65T]>;
-def: Pat<(int_hexagon_V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vasruwuhsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vasruhubsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vasruhubrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vabsb HvxVR:$src1), (V6_vabsb HvxVR:$src1)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vabsb_128B HvxVR:$src1), (V6_vabsb HvxVR:$src1)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vabsb_sat HvxVR:$src1), (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vabsb_sat_128B HvxVR:$src1), (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vaslh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vasrh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vavguw HvxVR:$src1, HvxVR:$src2), (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vavguw_128B HvxVR:$src1, HvxVR:$src2), (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vavguwrnd HvxVR:$src1, HvxVR:$src2), (V6_vavguwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vavguwrnd_128B HvxVR:$src1, HvxVR:$src2), (V6_vavguwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vavgb HvxVR:$src1, HvxVR:$src2), (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vavgb_128B HvxVR:$src1, HvxVR:$src2), (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vavgbrnd HvxVR:$src1, HvxVR:$src2), (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vavgbrnd_128B HvxVR:$src1, HvxVR:$src2), (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vlut4 HvxVR:$src1, DoubleRegs:$src2), (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vlut4_128B HvxVR:$src1, DoubleRegs:$src2), (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vnavgb HvxVR:$src1, HvxVR:$src2), (V6_vnavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vnavgb_128B HvxVR:$src1, HvxVR:$src2), (V6_vnavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vmpabuu HvxWR:$src1, IntRegs:$src2), (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vmpabuu_128B HvxWR:$src1, IntRegs:$src2), (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3), (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vmpabuu_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3), (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vmpahhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vmpauhuhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vmpsuhuhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vmpyh_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vmpyuhe HvxVR:$src1, IntRegs:$src2), (V6_vmpyuhe HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vmpyuhe_128B HvxVR:$src1, IntRegs:$src2), (V6_vmpyuhe HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vmpyuhe_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vprefixqb HvxQR:$src1), (V6_vprefixqb HvxQR:$src1)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vprefixqb_128B HvxQR:$src1), (V6_vprefixqb HvxQR:$src1)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vprefixqh HvxQR:$src1), (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vprefixqh_128B HvxQR:$src1), (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vprefixqw HvxQR:$src1), (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vprefixqw_128B HvxQR:$src1), (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vrmpyub_rtt_128B HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vrmpyub_rtt_acc_128B HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vrmpybub_rtt_128B HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vrmpybub_rtt_acc_128B HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5), (V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vscattermw_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vscattermh_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vscattermw_add_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vscattermh_add_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vscattermwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vscattermhq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vscattermhw_128B IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vscattermhw_add_128B IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vscattermhwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5), (V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vdd0), (V6_vdd0)>, Requires<[HasV65T, UseHVX]>;
-def: Pat<(int_hexagon_V6_vdd0_128B), (V6_vdd0)>, Requires<[HasV65T, UseHVX]>;
+def: Pat<(int_hexagon_A6_vcmpbeq_notany DoubleRegs:$src1, DoubleRegs:$src2), (A6_vcmpbeq_notany DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV65]>;
+def: Pat<(int_hexagon_V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vasruwuhsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vasruhubsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vasruhubrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vabsb HvxVR:$src1), (V6_vabsb HvxVR:$src1)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vabsb_128B HvxVR:$src1), (V6_vabsb HvxVR:$src1)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vabsb_sat HvxVR:$src1), (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vabsb_sat_128B HvxVR:$src1), (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vaslh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vasrh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vavguw HvxVR:$src1, HvxVR:$src2), (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vavguw_128B HvxVR:$src1, HvxVR:$src2), (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vavguwrnd HvxVR:$src1, HvxVR:$src2), (V6_vavguwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vavguwrnd_128B HvxVR:$src1, HvxVR:$src2), (V6_vavguwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vavgb HvxVR:$src1, HvxVR:$src2), (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vavgb_128B HvxVR:$src1, HvxVR:$src2), (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vavgbrnd HvxVR:$src1, HvxVR:$src2), (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vavgbrnd_128B HvxVR:$src1, HvxVR:$src2), (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vlut4 HvxVR:$src1, DoubleRegs:$src2), (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vlut4_128B HvxVR:$src1, DoubleRegs:$src2), (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vnavgb HvxVR:$src1, HvxVR:$src2), (V6_vnavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vnavgb_128B HvxVR:$src1, HvxVR:$src2), (V6_vnavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vmpabuu HvxWR:$src1, IntRegs:$src2), (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vmpabuu_128B HvxWR:$src1, IntRegs:$src2), (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3), (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vmpabuu_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3), (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vmpahhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vmpauhuhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vmpsuhuhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vmpyh_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vmpyuhe HvxVR:$src1, IntRegs:$src2), (V6_vmpyuhe HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vmpyuhe_128B HvxVR:$src1, IntRegs:$src2), (V6_vmpyuhe HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vmpyuhe_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vprefixqb HvxQR:$src1), (V6_vprefixqb HvxQR:$src1)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vprefixqb_128B HvxQR:$src1), (V6_vprefixqb HvxQR:$src1)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vprefixqh HvxQR:$src1), (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vprefixqh_128B HvxQR:$src1), (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vprefixqw HvxQR:$src1), (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vprefixqw_128B HvxQR:$src1), (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vrmpyub_rtt_128B HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vrmpyub_rtt_acc_128B HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vrmpybub_rtt_128B HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vrmpybub_rtt_acc_128B HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5), (V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermw_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermh_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermw_add_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermh_add_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhw_128B IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhw_add_128B IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vscattermhwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5), (V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vdd0), (V6_vdd0)>, Requires<[HasV65, UseHVX]>;
+def: Pat<(int_hexagon_V6_vdd0_128B), (V6_vdd0)>, Requires<[HasV65, UseHVX]>;
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index ffa447cc1311..f2a6627c99be 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -16,7 +16,7 @@
// The basic approach looks for sequence of predicated jump, compare instruciton
// that genereates the predicate and, the feeder to the predicate. Once it finds
-// all, it collapses compare and jump instruction into a new valu jump
+// all, it collapses compare and jump instruction into a new value jump
// intstructions.
//
//===----------------------------------------------------------------------===//
@@ -24,6 +24,7 @@
#include "Hexagon.h"
#include "HexagonInstrInfo.h"
#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@@ -95,7 +96,7 @@ namespace {
const HexagonInstrInfo *QII;
const HexagonRegisterInfo *QRI;
- /// \brief A handle to the branch probability pass.
+ /// A handle to the branch probability pass.
const MachineBranchProbabilityInfo *MBPI;
bool isNewValueJumpCandidate(const MachineInstr &MI) const;
@@ -142,8 +143,24 @@ static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII,
if (QII->isSolo(*II))
return false;
- // Make sure there there is no 'def' or 'use' of any of the uses of
- // feeder insn between it's definition, this MI and jump, jmpInst
+ if (QII->isFloat(*II))
+ return false;
+
+ // Make sure that the (unique) def operand is a register from IntRegs.
+ bool HadDef = false;
+ for (const MachineOperand &Op : II->operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ if (HadDef)
+ return false;
+ HadDef = true;
+ if (!Hexagon::IntRegsRegClass.contains(Op.getReg()))
+ return false;
+ }
+ assert(HadDef);
+
+ // Make sure there is no 'def' or 'use' of any of the uses of
+ // feeder insn between its definition, this MI and jump, jmpInst
// skipping compare, cmpInst.
// Here's the example.
// r21=memub(r22+r24<<#0)
@@ -270,8 +287,8 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
if (cmpReg1 == cmpOp2)
return false;
- // Make sure that that second register is not from COPY
- // At machine code level, we don't need this, but if we decide
+ // Make sure that the second register is not from COPY
+ // at machine code level, we don't need this, but if we decide
// to move new value jump prior to RA, we would be needing this.
MachineRegisterInfo &MRI = MF.getRegInfo();
if (secondReg && !TargetRegisterInfo::isPhysicalRegister(cmpOp2)) {
@@ -285,7 +302,7 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
// and satisfy the following conditions.
++II;
for (MachineBasicBlock::iterator localII = II; localII != end; ++localII) {
- if (localII->isDebugValue())
+ if (localII->isDebugInstr())
continue;
// Check 1.
@@ -431,8 +448,8 @@ bool HexagonNewValueJump::isNewValueJumpCandidate(
}
bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
- DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n"
- << "********** Function: " << MF.getName() << "\n");
+ LLVM_DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n"
+ << "********** Function: " << MF.getName() << "\n");
if (skipFunction(MF.getFunction()))
return false;
@@ -445,9 +462,9 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
MF.getSubtarget().getRegisterInfo());
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
- if (DisableNewValueJumps) {
+ if (DisableNewValueJumps ||
+ !MF.getSubtarget<HexagonSubtarget>().useNewValueJumps())
return false;
- }
int nvjCount = DbgNVJCount;
int nvjGenerated = 0;
@@ -457,9 +474,10 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
MBBb != MBBe; ++MBBb) {
MachineBasicBlock *MBB = &*MBBb;
- DEBUG(dbgs() << "** dumping bb ** " << MBB->getNumber() << "\n");
- DEBUG(MBB->dump());
- DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n");
+ LLVM_DEBUG(dbgs() << "** dumping bb ** " << MBB->getNumber() << "\n");
+ LLVM_DEBUG(MBB->dump());
+ LLVM_DEBUG(dbgs() << "\n"
+ << "********** dumping instr bottom up **********\n");
bool foundJump = false;
bool foundCompare = false;
bool invertPredicate = false;
@@ -477,14 +495,14 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin();
MII != E;) {
MachineInstr &MI = *--MII;
- if (MI.isDebugValue()) {
+ if (MI.isDebugInstr()) {
continue;
}
if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated))
break;
- DEBUG(dbgs() << "Instr: "; MI.dump(); dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Instr: "; MI.dump(); dbgs() << "\n");
if (!foundJump && (MI.getOpcode() == Hexagon::J2_jumpt ||
MI.getOpcode() == Hexagon::J2_jumptpt ||
@@ -505,7 +523,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
// operands, the following check on the kill flag would suffice.
// if(!jmpInstr->getOperand(0).isKill()) break;
- // This predicate register is live out out of BB
+ // This predicate register is live out of BB
// this would only work if we can actually use Live
// variable analysis on phy regs - but LLVM does not
// provide LV analysis on phys regs.
diff --git a/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index 4738a4d32409..29c044b3b729 100644
--- a/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Pass.h"
@@ -78,7 +79,9 @@ private:
using MISetType = DenseSet<MachineInstr *>;
using InstrEvalMap = DenseMap<MachineInstr *, bool>;
+ MachineRegisterInfo *MRI = nullptr;
const HexagonInstrInfo *HII = nullptr;
+ const HexagonRegisterInfo *HRI = nullptr;
MachineDominatorTree *MDT = nullptr;
DataFlowGraph *DFG = nullptr;
DataFlowGraph::DefStackMap DefM;
@@ -88,11 +91,16 @@ private:
bool processBlock(NodeAddr<BlockNode *> BA);
bool xformUseMI(MachineInstr *TfrMI, MachineInstr *UseMI,
NodeAddr<UseNode *> UseN, unsigned UseMOnum);
+ bool processAddUses(NodeAddr<StmtNode *> AddSN, MachineInstr *AddMI,
+ const NodeList &UNodeList);
+ bool updateAddUses(MachineInstr *AddMI, MachineInstr *UseMI);
bool analyzeUses(unsigned DefR, const NodeList &UNodeList,
InstrEvalMap &InstrEvalResult, short &SizeInc);
bool hasRepForm(MachineInstr &MI, unsigned TfrDefR);
bool canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN, MachineInstr &MI,
const NodeList &UNodeList);
+ bool isSafeToExtLR(NodeAddr<StmtNode *> SN, MachineInstr *MI,
+ unsigned LRExtReg, const NodeList &UNodeList);
void getAllRealUses(NodeAddr<StmtNode *> SN, NodeList &UNodeList);
bool allValidCandidates(NodeAddr<StmtNode *> SA, NodeList &UNodeList);
short getBaseWithLongOffset(const MachineInstr &MI) const;
@@ -101,6 +109,7 @@ private:
bool changeLoad(MachineInstr *OldMI, MachineOperand ImmOp, unsigned ImmOpNum);
bool changeAddAsl(NodeAddr<UseNode *> AddAslUN, MachineInstr *AddAslMI,
const MachineOperand &ImmOp, unsigned ImmOpNum);
+ bool isValidOffset(MachineInstr *MI, int Offset);
};
} // end anonymous namespace
@@ -208,7 +217,7 @@ bool HexagonOptAddrMode::allValidCandidates(NodeAddr<StmtNode *> SA,
NodeSet Visited, Defs;
const auto &P = LV->getAllReachingDefsRec(UR, UN, Visited, Defs);
if (!P.second) {
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "*** Unable to collect all reaching defs for use ***\n"
<< PrintNode<UseNode*>(UN, *DFG) << '\n'
<< "The program's complexity may exceed the limits.\n";
@@ -217,7 +226,7 @@ bool HexagonOptAddrMode::allValidCandidates(NodeAddr<StmtNode *> SA,
}
const auto &ReachingDefs = P.first;
if (ReachingDefs.size() > 1) {
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "*** Multiple Reaching Defs found!!! ***\n";
for (auto DI : ReachingDefs) {
NodeAddr<UseNode *> DA = DFG->addr<UseNode *>(DI);
@@ -235,15 +244,15 @@ bool HexagonOptAddrMode::allValidCandidates(NodeAddr<StmtNode *> SA,
void HexagonOptAddrMode::getAllRealUses(NodeAddr<StmtNode *> SA,
NodeList &UNodeList) {
for (NodeAddr<DefNode *> DA : SA.Addr->members_if(DFG->IsDef, *DFG)) {
- DEBUG(dbgs() << "\t\t[DefNode]: " << Print<NodeAddr<DefNode *>>(DA, *DFG)
- << "\n");
+ LLVM_DEBUG(dbgs() << "\t\t[DefNode]: "
+ << Print<NodeAddr<DefNode *>>(DA, *DFG) << "\n");
RegisterRef DR = DFG->getPRI().normalize(DA.Addr->getRegRef(*DFG));
auto UseSet = LV->getAllReachedUses(DR, DA);
for (auto UI : UseSet) {
NodeAddr<UseNode *> UA = DFG->addr<UseNode *>(UI);
- DEBUG({
+ LLVM_DEBUG({
NodeAddr<StmtNode *> TempIA = UA.Addr->getOwner(*DFG);
dbgs() << "\t\t\t[Reached Use]: "
<< Print<NodeAddr<InstrNode *>>(TempIA, *DFG) << "\n";
@@ -253,8 +262,8 @@ void HexagonOptAddrMode::getAllRealUses(NodeAddr<StmtNode *> SA,
NodeAddr<PhiNode *> PA = UA.Addr->getOwner(*DFG);
NodeId id = PA.Id;
const Liveness::RefMap &phiUse = LV->getRealUses(id);
- DEBUG(dbgs() << "\t\t\t\tphi real Uses"
- << Print<Liveness::RefMap>(phiUse, *DFG) << "\n");
+ LLVM_DEBUG(dbgs() << "\t\t\t\tphi real Uses"
+ << Print<Liveness::RefMap>(phiUse, *DFG) << "\n");
if (!phiUse.empty()) {
for (auto I : phiUse) {
if (!DFG->getPRI().alias(RegisterRef(I.first), DR))
@@ -272,6 +281,153 @@ void HexagonOptAddrMode::getAllRealUses(NodeAddr<StmtNode *> SA,
}
}
+bool HexagonOptAddrMode::isSafeToExtLR(NodeAddr<StmtNode *> SN,
+ MachineInstr *MI, unsigned LRExtReg,
+ const NodeList &UNodeList) {
+ RegisterRef LRExtRR;
+ NodeId LRExtRegRD = 0;
+ // Iterate through all the UseNodes in SN and find the reaching def
+ // for the LRExtReg.
+ for (NodeAddr<UseNode *> UA : SN.Addr->members_if(DFG->IsUse, *DFG)) {
+ RegisterRef RR = UA.Addr->getRegRef(*DFG);
+ if (LRExtReg == RR.Reg) {
+ LRExtRR = RR;
+ LRExtRegRD = UA.Addr->getReachingDef();
+ }
+ }
+
+ for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) {
+ NodeAddr<UseNode *> UA = *I;
+ NodeAddr<InstrNode *> IA = UA.Addr->getOwner(*DFG);
+ // The reaching def of LRExtRR at load/store node should be same as the
+ // one reaching at the SN.
+ if (UA.Addr->getFlags() & NodeAttrs::PhiRef)
+ return false;
+ NodeAddr<RefNode*> AA = LV->getNearestAliasedRef(LRExtRR, IA);
+ if ((DFG->IsDef(AA) && AA.Id != LRExtRegRD) ||
+ AA.Addr->getReachingDef() != LRExtRegRD) {
+ LLVM_DEBUG(
+ dbgs() << "isSafeToExtLR: Returning false; another reaching def\n");
+ return false;
+ }
+
+ MachineInstr *UseMI = NodeAddr<StmtNode *>(IA).Addr->getCode();
+ NodeAddr<DefNode *> LRExtRegDN = DFG->addr<DefNode *>(LRExtRegRD);
+ // Reaching Def to LRExtReg can't be a phi.
+ if ((LRExtRegDN.Addr->getFlags() & NodeAttrs::PhiRef) &&
+ MI->getParent() != UseMI->getParent())
+ return false;
+ }
+ return true;
+}
+
+bool HexagonOptAddrMode::isValidOffset(MachineInstr *MI, int Offset) {
+ unsigned AlignMask = 0;
+ switch (HII->getMemAccessSize(*MI)) {
+ case HexagonII::MemAccessSize::DoubleWordAccess:
+ AlignMask = 0x7;
+ break;
+ case HexagonII::MemAccessSize::WordAccess:
+ AlignMask = 0x3;
+ break;
+ case HexagonII::MemAccessSize::HalfWordAccess:
+ AlignMask = 0x1;
+ break;
+ case HexagonII::MemAccessSize::ByteAccess:
+ AlignMask = 0x0;
+ break;
+ default:
+ return false;
+ }
+
+ if ((AlignMask & Offset) != 0)
+ return false;
+ return HII->isValidOffset(MI->getOpcode(), Offset, HRI, false);
+}
+
+bool HexagonOptAddrMode::processAddUses(NodeAddr<StmtNode *> AddSN,
+ MachineInstr *AddMI,
+ const NodeList &UNodeList) {
+
+ unsigned AddDefR = AddMI->getOperand(0).getReg();
+ for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) {
+ NodeAddr<UseNode *> UN = *I;
+ NodeAddr<StmtNode *> SN = UN.Addr->getOwner(*DFG);
+ MachineInstr *MI = SN.Addr->getCode();
+ const MCInstrDesc &MID = MI->getDesc();
+ if ((!MID.mayLoad() && !MID.mayStore()) ||
+ HII->getAddrMode(*MI) != HexagonII::BaseImmOffset ||
+ HII->isHVXVec(*MI))
+ return false;
+
+ MachineOperand BaseOp = MID.mayLoad() ? MI->getOperand(1)
+ : MI->getOperand(0);
+
+ if (!BaseOp.isReg() || BaseOp.getReg() != AddDefR)
+ return false;
+
+ MachineOperand OffsetOp = MID.mayLoad() ? MI->getOperand(2)
+ : MI->getOperand(1);
+ if (!OffsetOp.isImm())
+ return false;
+
+ int64_t newOffset = OffsetOp.getImm() + AddMI->getOperand(2).getImm();
+ if (!isValidOffset(MI, newOffset))
+ return false;
+
+ // Since we'll be extending the live range of Rt in the following example,
+ // make sure that is safe. another definition of Rt doesn't exist between 'add'
+ // and load/store instruction.
+ //
+ // Ex: Rx= add(Rt,#10)
+ // memw(Rx+#0) = Rs
+ // will be replaced with => memw(Rt+#10) = Rs
+ unsigned BaseReg = AddMI->getOperand(1).getReg();
+ if (!isSafeToExtLR(AddSN, AddMI, BaseReg, UNodeList))
+ return false;
+ }
+
+ // Update all the uses of 'add' with the appropriate base and offset
+ // values.
+ bool Changed = false;
+ for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) {
+ NodeAddr<UseNode *> UseN = *I;
+ assert(!(UseN.Addr->getFlags() & NodeAttrs::PhiRef) &&
+ "Found a PhiRef node as a real reached use!!");
+
+ NodeAddr<StmtNode *> OwnerN = UseN.Addr->getOwner(*DFG);
+ MachineInstr *UseMI = OwnerN.Addr->getCode();
+ LLVM_DEBUG(dbgs() << "\t\t[MI <BB#" << UseMI->getParent()->getNumber()
+ << ">]: " << *UseMI << "\n");
+ Changed |= updateAddUses(AddMI, UseMI);
+ }
+
+ if (Changed)
+ Deleted.insert(AddMI);
+
+ return Changed;
+}
+
+bool HexagonOptAddrMode::updateAddUses(MachineInstr *AddMI,
+ MachineInstr *UseMI) {
+ const MachineOperand ImmOp = AddMI->getOperand(2);
+ const MachineOperand AddRegOp = AddMI->getOperand(1);
+ unsigned newReg = AddRegOp.getReg();
+ const MCInstrDesc &MID = UseMI->getDesc();
+
+ MachineOperand &BaseOp = MID.mayLoad() ? UseMI->getOperand(1)
+ : UseMI->getOperand(0);
+ MachineOperand &OffsetOp = MID.mayLoad() ? UseMI->getOperand(2)
+ : UseMI->getOperand(1);
+ BaseOp.setReg(newReg);
+ BaseOp.setIsUndef(AddRegOp.isUndef());
+ BaseOp.setImplicit(AddRegOp.isImplicit());
+ OffsetOp.setImm(ImmOp.getImm() + OffsetOp.getImm());
+ MRI->clearKillFlags(newReg);
+
+ return true;
+}
+
bool HexagonOptAddrMode::analyzeUses(unsigned tfrDefR,
const NodeList &UNodeList,
InstrEvalMap &InstrEvalResult,
@@ -296,7 +452,7 @@ bool HexagonOptAddrMode::analyzeUses(unsigned tfrDefR,
} else if (MI.getOpcode() == Hexagon::S2_addasl_rrri) {
NodeList AddaslUseList;
- DEBUG(dbgs() << "\nGetting ReachedUses for === " << MI << "\n");
+ LLVM_DEBUG(dbgs() << "\nGetting ReachedUses for === " << MI << "\n");
getAllRealUses(SN, AddaslUseList);
// Process phi nodes.
if (allValidCandidates(SN, AddaslUseList) &&
@@ -360,8 +516,8 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp,
} else
Changed = false;
- DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
- DEBUG(dbgs() << "[TO]: " << *MIB << "\n");
+ LLVM_DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
+ LLVM_DEBUG(dbgs() << "[TO]: " << *MIB << "\n");
} else if (ImmOpNum == 2 && OldMI->getOperand(3).getImm() == 0) {
short NewOpCode = HII->changeAddrMode_rr_io(*OldMI);
assert(NewOpCode >= 0 && "Invalid New opcode\n");
@@ -371,8 +527,8 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp,
MIB.add(ImmOp);
OpStart = 4;
Changed = true;
- DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
- DEBUG(dbgs() << "[TO]: " << *MIB << "\n");
+ LLVM_DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
+ LLVM_DEBUG(dbgs() << "[TO]: " << *MIB << "\n");
}
if (Changed)
@@ -413,8 +569,8 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
OpStart = 3;
}
Changed = true;
- DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
- DEBUG(dbgs() << "[TO]: " << *MIB << "\n");
+ LLVM_DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
+ LLVM_DEBUG(dbgs() << "[TO]: " << *MIB << "\n");
} else if (ImmOpNum == 1 && OldMI->getOperand(2).getImm() == 0) {
short NewOpCode = HII->changeAddrMode_rr_io(*OldMI);
assert(NewOpCode >= 0 && "Invalid New opcode\n");
@@ -423,8 +579,8 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
MIB.add(ImmOp);
OpStart = 3;
Changed = true;
- DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
- DEBUG(dbgs() << "[TO]: " << *MIB << "\n");
+ LLVM_DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
+ LLVM_DEBUG(dbgs() << "[TO]: " << *MIB << "\n");
}
if (Changed)
for (unsigned i = OpStart; i < OpEnd; ++i)
@@ -447,7 +603,7 @@ bool HexagonOptAddrMode::changeAddAsl(NodeAddr<UseNode *> AddAslUN,
unsigned ImmOpNum) {
NodeAddr<StmtNode *> SA = AddAslUN.Addr->getOwner(*DFG);
- DEBUG(dbgs() << "Processing addasl :" << *AddAslMI << "\n");
+ LLVM_DEBUG(dbgs() << "Processing addasl :" << *AddAslMI << "\n");
NodeList UNodeList;
getAllRealUses(SA, UNodeList);
@@ -458,11 +614,11 @@ bool HexagonOptAddrMode::changeAddAsl(NodeAddr<UseNode *> AddAslUN,
"Can't transform this 'AddAsl' instruction!");
NodeAddr<StmtNode *> UseIA = UseUN.Addr->getOwner(*DFG);
- DEBUG(dbgs() << "[InstrNode]: " << Print<NodeAddr<InstrNode *>>(UseIA, *DFG)
- << "\n");
+ LLVM_DEBUG(dbgs() << "[InstrNode]: "
+ << Print<NodeAddr<InstrNode *>>(UseIA, *DFG) << "\n");
MachineInstr *UseMI = UseIA.Addr->getCode();
- DEBUG(dbgs() << "[MI <" << printMBBReference(*UseMI->getParent())
- << ">]: " << *UseMI << "\n");
+ LLVM_DEBUG(dbgs() << "[MI <" << printMBBReference(*UseMI->getParent())
+ << ">]: " << *UseMI << "\n");
const MCInstrDesc &UseMID = UseMI->getDesc();
assert(HII->getAddrMode(*UseMI) == HexagonII::BaseImmOffset);
@@ -534,13 +690,15 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) {
NodeAddr<StmtNode *> SA = IA;
MachineInstr *MI = SA.Addr->getCode();
- if (MI->getOpcode() != Hexagon::A2_tfrsi ||
- !MI->getOperand(1).isGlobal())
- continue;
+ if ((MI->getOpcode() != Hexagon::A2_tfrsi ||
+ !MI->getOperand(1).isGlobal()) &&
+ (MI->getOpcode() != Hexagon::A2_addi ||
+ !MI->getOperand(2).isImm() || HII->isConstExtended(*MI)))
+ continue;
- DEBUG(dbgs() << "[Analyzing " << HII->getName(MI->getOpcode()) << "]: "
- << *MI << "\n\t[InstrNode]: "
- << Print<NodeAddr<InstrNode *>>(IA, *DFG) << '\n');
+ LLVM_DEBUG(dbgs() << "[Analyzing " << HII->getName(MI->getOpcode())
+ << "]: " << *MI << "\n\t[InstrNode]: "
+ << Print<NodeAddr<InstrNode *>>(IA, *DFG) << '\n');
NodeList UNodeList;
getAllRealUses(SA, UNodeList);
@@ -548,6 +706,21 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) {
if (!allValidCandidates(SA, UNodeList))
continue;
+ // Analyze all uses of 'add'. If the output of 'add' is used as an address
+ // in the base+immediate addressing mode load/store instructions, see if
+ // they can be updated to use the immediate value as an offet. Thus,
+ // providing us the opportunity to eliminate 'add'.
+ // Ex: Rx= add(Rt,#12)
+ // memw(Rx+#0) = Rs
+ // This can be replaced with memw(Rt+#12) = Rs
+ //
+ // This transformation is only performed if all uses can be updated and
+ // the offset isn't required to be constant extended.
+ if (MI->getOpcode() == Hexagon::A2_addi) {
+ Changed |= processAddUses(SA, MI, UNodeList);
+ continue;
+ }
+
short SizeInc = 0;
unsigned DefR = MI->getOperand(0).getReg();
InstrEvalMap InstrEvalResult;
@@ -561,8 +734,9 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) {
bool KeepTfr = false;
- DEBUG(dbgs() << "\t[Total reached uses] : " << UNodeList.size() << "\n");
- DEBUG(dbgs() << "\t[Processing Reached Uses] ===\n");
+ LLVM_DEBUG(dbgs() << "\t[Total reached uses] : " << UNodeList.size()
+ << "\n");
+ LLVM_DEBUG(dbgs() << "\t[Processing Reached Uses] ===\n");
for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) {
NodeAddr<UseNode *> UseN = *I;
assert(!(UseN.Addr->getFlags() & NodeAttrs::PhiRef) &&
@@ -570,8 +744,8 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) {
NodeAddr<StmtNode *> OwnerN = UseN.Addr->getOwner(*DFG);
MachineInstr *UseMI = OwnerN.Addr->getCode();
- DEBUG(dbgs() << "\t\t[MI <" << printMBBReference(*UseMI->getParent())
- << ">]: " << *UseMI << "\n");
+ LLVM_DEBUG(dbgs() << "\t\t[MI <" << printMBBReference(*UseMI->getParent())
+ << ">]: " << *UseMI << "\n");
int UseMOnum = -1;
unsigned NumOperands = UseMI->getNumOperands();
@@ -580,9 +754,11 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) {
if (op.isReg() && op.isUse() && DefR == op.getReg())
UseMOnum = j;
}
- assert(UseMOnum >= 0 && "Invalid reached use!");
+ // It is possible that the register will not be found in any operand.
+ // This could happen, for example, when DefR = R4, but the used
+ // register is D2.
- if (InstrEvalResult[UseMI])
+ if (UseMOnum >= 0 && InstrEvalResult[UseMI])
// Change UseMI if replacement is possible.
Changed |= xformUseMI(MI, UseMI, UseN, UseMOnum);
else
@@ -600,27 +776,27 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
auto &HST = MF.getSubtarget<HexagonSubtarget>();
- auto &MRI = MF.getRegInfo();
+ MRI = &MF.getRegInfo();
HII = HST.getInstrInfo();
+ HRI = HST.getRegisterInfo();
const auto &MDF = getAnalysis<MachineDominanceFrontier>();
MDT = &getAnalysis<MachineDominatorTree>();
- const auto &TRI = *MF.getSubtarget().getRegisterInfo();
const TargetOperandInfo TOI(*HII);
- DataFlowGraph G(MF, *HII, TRI, *MDT, MDF, TOI);
+ DataFlowGraph G(MF, *HII, *HRI, *MDT, MDF, TOI);
// Need to keep dead phis because we can propagate uses of registers into
// nodes dominated by those would-be phis.
G.build(BuildOptions::KeepDeadPhis);
DFG = &G;
- Liveness L(MRI, *DFG);
+ Liveness L(*MRI, *DFG);
L.computePhiInfo();
LV = &L;
Deleted.clear();
NodeAddr<FuncNode *> FA = DFG->getFunc();
- DEBUG(dbgs() << "==== [RefMap#]=====:\n "
- << Print<NodeAddr<FuncNode *>>(FA, *DFG) << "\n");
+ LLVM_DEBUG(dbgs() << "==== [RefMap#]=====:\n "
+ << Print<NodeAddr<FuncNode *>>(FA, *DFG) << "\n");
for (NodeAddr<BlockNode *> BA : FA.Addr->members(*DFG))
Changed |= processBlock(BA);
diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td
index cdc2085986a5..384fda4ce39a 100644
--- a/lib/Target/Hexagon/HexagonPatterns.td
+++ b/lib/Target/Hexagon/HexagonPatterns.td
@@ -100,6 +100,17 @@ def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>;
def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>;
def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>;
+def SDTVecVecIntOp:
+ SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>,
+ SDTCisVT<3,i32>]>;
+
+def HexagonVALIGN: SDNode<"HexagonISD::VALIGN", SDTVecVecIntOp>;
+def HexagonVALIGNADDR: SDNode<"HexagonISD::VALIGNADDR", SDTIntUnaryOp>;
+
+def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru),
+ (HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>;
+def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>;
+
// Pattern fragments to extract the low and high subregisters from a
// 64-bit value.
def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>;
@@ -109,16 +120,6 @@ def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{
return isOrEquivalentToAdd(N);
}]>;
-def IsVecOff : PatLeaf<(i32 imm), [{
- int32_t V = N->getSExtValue();
- int32_t VecSize = HRI->getSpillSize(Hexagon::HvxVRRegClass);
- assert(isPowerOf2_32(VecSize));
- if ((uint32_t(V) & (uint32_t(VecSize)-1)) != 0)
- return false;
- int32_t L = Log2_32(VecSize);
- return isInt<4>(V >> L);
-}]>;
-
def IsPow2_32: PatLeaf<(i32 imm), [{
uint32_t V = N->getZExtValue();
return isPowerOf2_32(V);
@@ -214,7 +215,7 @@ def NegImm32: SDNodeXForm<imm, [{
// Helpers for type promotions/contractions.
def I1toI32: OutPatFrag<(ops node:$Rs), (C2_muxii (i1 $Rs), 1, 0)>;
-def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_tfrrp (i32 $Rs)))>;
+def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_cmpgtui (i32 $Rs), (i32 0)))>;
def ToZext64: OutPatFrag<(ops node:$Rs), (i64 (A4_combineir 0, (i32 $Rs)))>;
def ToSext64: OutPatFrag<(ops node:$Rs), (i64 (A2_sxtw (i32 $Rs)))>;
@@ -249,23 +250,6 @@ def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off),
(PS_fi (i32 AddrFI:$Rs), imm:$off)>;
-def alignedload: PatFrag<(ops node:$a), (load $a), [{
- return isAlignedMemNode(dyn_cast<MemSDNode>(N));
-}]>;
-
-def unalignedload: PatFrag<(ops node:$a), (load $a), [{
- return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
-}]>;
-
-def alignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{
- return isAlignedMemNode(dyn_cast<MemSDNode>(N));
-}]>;
-
-def unalignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{
- return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
-}]>;
-
-
// Converters from unary/binary SDNode to PatFrag.
class pf1<SDNode Op> : PatFrag<(ops node:$a), (Op node:$a)>;
class pf2<SDNode Op> : PatFrag<(ops node:$a, node:$b), (Op node:$a, node:$b)>;
@@ -274,7 +258,7 @@ class Not2<PatFrag P>
: PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>;
class Su<PatFrag Op>
- : PatFrag<Op.Operands, Op.Fragment, [{ return hasOneUse(N); }],
+ : PatFrag<Op.Operands, !head(Op.Fragments), [{ return hasOneUse(N); }],
Op.OperandTransform>;
// Main selection macros.
@@ -298,9 +282,9 @@ class AccRRI_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op,
(MI RegPred:$Rx, RegPred:$Rs, imm:$I)>;
class AccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op,
- PatFrag RsPred, PatFrag RtPred>
- : Pat<(AccOp RsPred:$Rx, (Op RsPred:$Rs, RtPred:$Rt)),
- (MI RsPred:$Rx, RsPred:$Rs, RtPred:$Rt)>;
+ PatFrag RxPred, PatFrag RsPred, PatFrag RtPred>
+ : Pat<(AccOp RxPred:$Rx, (Op RsPred:$Rs, RtPred:$Rt)),
+ (MI RxPred:$Rx, RsPred:$Rs, RtPred:$Rt)>;
multiclass SelMinMax_pats<PatFrag CmpOp, PatFrag Val,
InstHexagon InstA, InstHexagon InstB> {
@@ -316,6 +300,7 @@ def Add: pf2<add>; def And: pf2<and>; def Sra: pf2<sra>;
def Sub: pf2<sub>; def Or: pf2<or>; def Srl: pf2<srl>;
def Mul: pf2<mul>; def Xor: pf2<xor>; def Shl: pf2<shl>;
+def Rol: pf2<rotl>;
// --(1) Immediate -------------------------------------------------------
//
@@ -363,7 +348,7 @@ def ToI32: OutPatFrag<(ops node:$V), (A2_tfrsi $V)>;
// --(2) Type cast -------------------------------------------------------
//
-let Predicates = [HasV5T] in {
+let Predicates = [HasV5] in {
def: OpR_R_pat<F2_conv_sf2df, pf1<fpextend>, f64, F32>;
def: OpR_R_pat<F2_conv_df2sf, pf1<fpround>, f32, F64>;
@@ -389,7 +374,7 @@ let Predicates = [HasV5T] in {
}
// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
-let Predicates = [HasV5T] in {
+let Predicates = [HasV5] in {
def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>;
def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>;
def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>;
@@ -422,9 +407,14 @@ def: Pat<(i64 (sext I1:$Pu)),
(Combinew (C2_muxii PredRegs:$Pu, -1, 0),
(C2_muxii PredRegs:$Pu, -1, 0))>;
-def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>;
-def: Pat<(i32 (zext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>;
-def: Pat<(i64 (zext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
+def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>;
+def: Pat<(i32 (zext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>;
+def: Pat<(i64 (zext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
+def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>;
+def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>;
+def: Pat<(v4i8 (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>;
+def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>;
+def: Pat<(v8i8 (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>;
def: Pat<(i64 (sext I32:$Rs)), (A2_sxtw I32:$Rs)>;
def: Pat<(Zext64 I32:$Rs), (ToZext64 $Rs)>;
@@ -441,6 +431,20 @@ let AddedComplexity = 20 in {
def: Pat<(i32 (anyext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>;
def: Pat<(i64 (anyext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>;
+def Vsplatpi: OutPatFrag<(ops node:$V),
+ (Combinew (A2_tfrsi $V), (A2_tfrsi $V))>;
+def: Pat<(v8i8 (zext V8I1:$Pu)),
+ (A2_andp (C2_mask V8I1:$Pu), (Vsplatpi (i32 0x01010101)))>;
+def: Pat<(v4i16 (zext V4I1:$Pu)),
+ (A2_andp (C2_mask V4I1:$Pu), (Vsplatpi (i32 0x00010001)))>;
+def: Pat<(v2i32 (zext V2I1:$Pu)),
+ (A2_andp (C2_mask V2I1:$Pu), (A2_combineii (i32 1), (i32 1)))>;
+
+def: Pat<(v4i8 (zext V4I1:$Pu)),
+ (A2_andir (LoReg (C2_mask V4I1:$Pu)), (i32 0x01010101))>;
+def: Pat<(v2i16 (zext V2I1:$Pu)),
+ (A2_andir (LoReg (C2_mask V2I1:$Pu)), (i32 0x00010001))>;
+
def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>;
@@ -475,25 +479,40 @@ def: Pat<(v2i16 (trunc V2I32:$Rs)),
//
def: Pat<(not I1:$Ps), (C2_not I1:$Ps)>;
+def: Pat<(not V8I1:$Ps), (C2_not V8I1:$Ps)>;
def: Pat<(add I1:$Ps, -1), (C2_not I1:$Ps)>;
-def: OpR_RR_pat<C2_and, And, i1, I1>;
-def: OpR_RR_pat<C2_or, Or, i1, I1>;
-def: OpR_RR_pat<C2_xor, Xor, i1, I1>;
-def: OpR_RR_pat<C2_andn, Not2<And>, i1, I1>;
-def: OpR_RR_pat<C2_orn, Not2<Or>, i1, I1>;
+multiclass BoolOpR_RR_pat<InstHexagon MI, PatFrag Op> {
+ def: OpR_RR_pat<MI, Op, i1, I1>;
+ def: OpR_RR_pat<MI, Op, v2i1, V2I1>;
+ def: OpR_RR_pat<MI, Op, v4i1, V4I1>;
+ def: OpR_RR_pat<MI, Op, v8i1, V8I1>;
+}
+
+multiclass BoolAccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op> {
+ def: AccRRR_pat<MI, AccOp, Op, I1, I1, I1>;
+ def: AccRRR_pat<MI, AccOp, Op, V2I1, V2I1, V2I1>;
+ def: AccRRR_pat<MI, AccOp, Op, V4I1, V4I1, V4I1>;
+ def: AccRRR_pat<MI, AccOp, Op, V8I1, V8I1, V8I1>;
+}
+
+defm: BoolOpR_RR_pat<C2_and, And>;
+defm: BoolOpR_RR_pat<C2_or, Or>;
+defm: BoolOpR_RR_pat<C2_xor, Xor>;
+defm: BoolOpR_RR_pat<C2_andn, Not2<And>>;
+defm: BoolOpR_RR_pat<C2_orn, Not2<Or>>;
// op(Ps, op(Pt, Pu))
-def: AccRRR_pat<C4_and_and, And, Su<And>, I1, I1>;
-def: AccRRR_pat<C4_and_or, And, Su<Or>, I1, I1>;
-def: AccRRR_pat<C4_or_and, Or, Su<And>, I1, I1>;
-def: AccRRR_pat<C4_or_or, Or, Su<Or>, I1, I1>;
+defm: BoolAccRRR_pat<C4_and_and, And, Su<And>>;
+defm: BoolAccRRR_pat<C4_and_or, And, Su<Or>>;
+defm: BoolAccRRR_pat<C4_or_and, Or, Su<And>>;
+defm: BoolAccRRR_pat<C4_or_or, Or, Su<Or>>;
// op(Ps, op(Pt, ~Pu))
-def: AccRRR_pat<C4_and_andn, And, Su<Not2<And>>, I1, I1>;
-def: AccRRR_pat<C4_and_orn, And, Su<Not2<Or>>, I1, I1>;
-def: AccRRR_pat<C4_or_andn, Or, Su<Not2<And>>, I1, I1>;
-def: AccRRR_pat<C4_or_orn, Or, Su<Not2<Or>>, I1, I1>;
+defm: BoolAccRRR_pat<C4_and_andn, And, Su<Not2<And>>>;
+defm: BoolAccRRR_pat<C4_and_orn, And, Su<Not2<Or>>>;
+defm: BoolAccRRR_pat<C4_or_andn, Or, Su<Not2<And>>>;
+defm: BoolAccRRR_pat<C4_or_orn, Or, Su<Not2<Or>>>;
// --(5) Compare ---------------------------------------------------------
@@ -519,7 +538,7 @@ def: Pat<(i1 (setult I32:$Rs, u32_0ImmPred:$u9)),
// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones
// that reverse the order of the operands.
class RevCmp<PatFrag F>
- : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment, F.PredicateCode,
+ : PatFrag<(ops node:$rhs, node:$lhs), !head(F.Fragments), F.PredicateCode,
F.OperandTransform>;
def: OpR_RR_pat<C2_cmpeq, seteq, i1, I32>;
@@ -563,7 +582,7 @@ def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, v2i1, V2I32>;
def: OpR_RR_pat<A2_vcmpwgtu, setugt, i1, V2I32>;
def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>;
-let Predicates = [HasV5T] in {
+let Predicates = [HasV5] in {
def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>;
def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>;
def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>;
@@ -598,27 +617,40 @@ def: Pat<(i1 (setle I32:$Rs, anyimm:$u5)),
def: Pat<(i1 (setule I32:$Rs, anyimm:$u5)),
(C2_not (C2_cmpgtui I32:$Rs, imm:$u5))>;
-def: Pat<(i1 (setne I32:$Rs, I32:$Rt)),
- (C2_not (C2_cmpeq I32:$Rs, I32:$Rt))>;
-def: Pat<(i1 (setle I32:$Rs, I32:$Rt)),
- (C2_not (C2_cmpgt I32:$Rs, I32:$Rt))>;
-def: Pat<(i1 (setule I32:$Rs, I32:$Rt)),
- (C2_not (C2_cmpgtu I32:$Rs, I32:$Rt))>;
-def: Pat<(i1 (setge I32:$Rs, I32:$Rt)),
- (C2_not (C2_cmpgt I32:$Rt, I32:$Rs))>;
-def: Pat<(i1 (setuge I32:$Rs, I32:$Rt)),
- (C2_not (C2_cmpgtu I32:$Rt, I32:$Rs))>;
-
-def: Pat<(i1 (setle I64:$Rs, I64:$Rt)),
- (C2_not (C2_cmpgtp I64:$Rs, I64:$Rt))>;
-def: Pat<(i1 (setne I64:$Rs, I64:$Rt)),
- (C2_not (C2_cmpeqp I64:$Rs, I64:$Rt))>;
-def: Pat<(i1 (setge I64:$Rs, I64:$Rt)),
- (C2_not (C2_cmpgtp I64:$Rt, I64:$Rs))>;
-def: Pat<(i1 (setuge I64:$Rs, I64:$Rt)),
- (C2_not (C2_cmpgtup I64:$Rt, I64:$Rs))>;
-def: Pat<(i1 (setule I64:$Rs, I64:$Rt)),
- (C2_not (C2_cmpgtup I64:$Rs, I64:$Rt))>;
+class OpmR_RR_pat<PatFrag Output, PatFrag Op, ValueType ResType,
+ PatFrag RsPred, PatFrag RtPred = RsPred>
+ : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)),
+ (Output RsPred:$Rs, RtPred:$Rt)>;
+
+class Outn<InstHexagon MI>
+ : OutPatFrag<(ops node:$Rs, node:$Rt),
+ (C2_not (MI $Rs, $Rt))>;
+
+def: OpmR_RR_pat<Outn<C2_cmpeq>, setne, i1, I32>;
+def: OpmR_RR_pat<Outn<C2_cmpgt>, setle, i1, I32>;
+def: OpmR_RR_pat<Outn<C2_cmpgtu>, setule, i1, I32>;
+def: OpmR_RR_pat<Outn<C2_cmpgt>, RevCmp<setge>, i1, I32>;
+def: OpmR_RR_pat<Outn<C2_cmpgtu>, RevCmp<setuge>, i1, I32>;
+def: OpmR_RR_pat<Outn<C2_cmpeqp>, setne, i1, I64>;
+def: OpmR_RR_pat<Outn<C2_cmpgtp>, setle, i1, I64>;
+def: OpmR_RR_pat<Outn<C2_cmpgtup>, setule, i1, I64>;
+def: OpmR_RR_pat<Outn<C2_cmpgtp>, RevCmp<setge>, i1, I64>;
+def: OpmR_RR_pat<Outn<C2_cmpgtup>, RevCmp<setuge>, i1, I64>;
+def: OpmR_RR_pat<Outn<A2_vcmpbeq>, setne, v8i1, V8I8>;
+def: OpmR_RR_pat<Outn<A4_vcmpbgt>, setle, v8i1, V8I8>;
+def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, setule, v8i1, V8I8>;
+def: OpmR_RR_pat<Outn<A4_vcmpbgt>, RevCmp<setge>, v8i1, V8I8>;
+def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, RevCmp<setuge>, v8i1, V8I8>;
+def: OpmR_RR_pat<Outn<A2_vcmpheq>, setne, v4i1, V4I16>;
+def: OpmR_RR_pat<Outn<A2_vcmphgt>, setle, v4i1, V4I16>;
+def: OpmR_RR_pat<Outn<A2_vcmphgtu>, setule, v4i1, V4I16>;
+def: OpmR_RR_pat<Outn<A2_vcmphgt>, RevCmp<setge>, v4i1, V4I16>;
+def: OpmR_RR_pat<Outn<A2_vcmphgtu>, RevCmp<setuge>, v4i1, V4I16>;
+def: OpmR_RR_pat<Outn<A2_vcmpweq>, setne, v2i1, V2I32>;
+def: OpmR_RR_pat<Outn<A2_vcmpwgt>, setle, v2i1, V2I32>;
+def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, setule, v2i1, V2I32>;
+def: OpmR_RR_pat<Outn<A2_vcmpwgt>, RevCmp<setge>, v2i1, V2I32>;
+def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, RevCmp<setuge>, v2i1, V2I32>;
let AddedComplexity = 100 in {
def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 255), 0)),
@@ -680,25 +712,10 @@ def: Pat<(i32 (zext (i1 (seteq I32:$Rs, anyimm:$s8)))),
def: Pat<(i32 (zext (i1 (setne I32:$Rs, anyimm:$s8)))),
(A4_rcmpneqi I32:$Rs, imm:$s8)>;
-def: Pat<(i1 (setne I1:$Ps, I1:$Pt)),
- (C2_xor I1:$Ps, I1:$Pt)>;
-
-def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)),
- (A2_vcmpbeq (ToZext64 $Rs), (ToZext64 $Rt))>;
-def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)),
- (A4_vcmpbgt (ToZext64 $Rs), (ToZext64 $Rt))>;
-def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)),
- (A2_vcmpbgtu (ToZext64 $Rs), (ToZext64 $Rt))>;
-
-def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)),
- (A2_vcmpheq (ToZext64 $Rs), (ToZext64 $Rt))>;
-def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)),
- (A2_vcmphgt (ToZext64 $Rs), (ToZext64 $Rt))>;
-def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)),
- (A2_vcmphgtu (ToZext64 $Rs), (ToZext64 $Rt))>;
-
-def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)),
- (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>;
+def: Pat<(i1 (seteq I1:$Ps, (i1 -1))), (I1:$Ps)>;
+def: Pat<(i1 (setne I1:$Ps, (i1 -1))), (C2_not I1:$Ps)>;
+def: Pat<(i1 (seteq I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, (C2_not I1:$Pt))>;
+def: Pat<(i1 (setne I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>;
// Floating-point comparisons with checks for ordered/unordered status.
@@ -706,18 +723,13 @@ class T3<InstHexagon MI1, InstHexagon MI2, InstHexagon MI3>
: OutPatFrag<(ops node:$Rs, node:$Rt),
(MI1 (MI2 $Rs, $Rt), (MI3 $Rs, $Rt))>;
-class OpmR_RR_pat<PatFrag Output, PatFrag Op, ValueType ResType,
- PatFrag RsPred, PatFrag RtPred = RsPred>
- : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)),
- (Output RsPred:$Rs, RtPred:$Rt)>;
-
class Cmpuf<InstHexagon MI>: T3<C2_or, F2_sfcmpuo, MI>;
class Cmpud<InstHexagon MI>: T3<C2_or, F2_dfcmpuo, MI>;
class Cmpufn<InstHexagon MI>: T3<C2_orn, F2_sfcmpuo, MI>;
class Cmpudn<InstHexagon MI>: T3<C2_orn, F2_dfcmpuo, MI>;
-let Predicates = [HasV5T] in {
+let Predicates = [HasV5] in {
def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>, setueq, i1, F32>;
def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, setuge, i1, F32>;
def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, setugt, i1, F32>;
@@ -733,11 +745,7 @@ let Predicates = [HasV5T] in {
def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>;
}
-class Outn<InstHexagon MI>
- : OutPatFrag<(ops node:$Rs, node:$Rt),
- (C2_not (MI $Rs, $Rt))>;
-
-let Predicates = [HasV5T] in {
+let Predicates = [HasV5] in {
def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>;
def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>;
@@ -776,7 +784,7 @@ def: Pat<(select I1:$Pu, I64:$Rs, I64:$Rt),
(Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
(C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
-let Predicates = [HasV5T] in {
+let Predicates = [HasV5] in {
def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I),
(C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>;
def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt),
@@ -813,20 +821,6 @@ def: Pat<(vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt),
def: Pat<(vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt),
(C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
-
-class HvxSel_pat<InstHexagon MI, PatFrag RegPred>
- : Pat<(select I1:$Pu, RegPred:$Vs, RegPred:$Vt),
- (MI I1:$Pu, RegPred:$Vs, RegPred:$Vt)>;
-
-let Predicates = [HasV60T,UseHVX] in {
- def: HvxSel_pat<PS_vselect, HVI8>;
- def: HvxSel_pat<PS_vselect, HVI16>;
- def: HvxSel_pat<PS_vselect, HVI32>;
- def: HvxSel_pat<PS_wselect, HWI8>;
- def: HvxSel_pat<PS_wselect, HWI16>;
- def: HvxSel_pat<PS_wselect, HWI32>;
-}
-
// From LegalizeDAG.cpp: (Pu ? Pv : Pw) <=> (Pu & Pv) | (!Pu & Pw).
def: Pat<(select I1:$Pu, I1:$Pv, I1:$Pw),
(C2_or (C2_and I1:$Pu, I1:$Pv),
@@ -878,7 +872,7 @@ let AddedComplexity = 200 in {
defm: SelMinMax_pats<setult, I64, A2_minup, A2_maxup>;
}
-let AddedComplexity = 100, Predicates = [HasV5T] in {
+let AddedComplexity = 100, Predicates = [HasV5] in {
defm: SelMinMax_pats<setolt, F32, F2_sfmin, F2_sfmax>;
defm: SelMinMax_pats<setole, F32, F2_sfmin, F2_sfmax>;
defm: SelMinMax_pats<setogt, F32, F2_sfmax, F2_sfmin>;
@@ -892,40 +886,34 @@ let AddedComplexity = 100, Predicates = [HasV5T] in {
def SDTHexagonINSERT:
SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
-def SDTHexagonINSERTRP:
- SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
- SDTCisInt<0>, SDTCisVT<3, i64>]>;
-
def HexagonINSERT: SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>;
-def HexagonINSERTRP: SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>;
-def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2),
- (S2_insert I32:$Rs, I32:$Rt, imm:$u1, imm:$u2)>;
-def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2),
- (S2_insertp I64:$Rs, I64:$Rt, imm:$u1, imm:$u2)>;
-def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru),
- (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>;
-def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru),
- (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>;
+let AddedComplexity = 10 in {
+ def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2),
+ (S2_insert I32:$Rs, I32:$Rt, imm:$u1, imm:$u2)>;
+ def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2),
+ (S2_insertp I64:$Rs, I64:$Rt, imm:$u1, imm:$u2)>;
+}
+def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, I32:$Width, I32:$Off),
+ (S2_insert_rp I32:$Rs, I32:$Rt, (Combinew $Width, $Off))>;
+def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, I32:$Width, I32:$Off),
+ (S2_insertp_rp I64:$Rs, I64:$Rt, (Combinew $Width, $Off))>;
def SDTHexagonEXTRACTU
: SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
-def SDTHexagonEXTRACTURP
- : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
- SDTCisVT<2, i64>]>;
-
def HexagonEXTRACTU: SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>;
-def HexagonEXTRACTURP: SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>;
-def: Pat<(HexagonEXTRACTU I32:$Rs, u5_0ImmPred:$u5, u5_0ImmPred:$U5),
- (S2_extractu I32:$Rs, imm:$u5, imm:$U5)>;
-def: Pat<(HexagonEXTRACTU I64:$Rs, u6_0ImmPred:$u6, u6_0ImmPred:$U6),
- (S2_extractup I64:$Rs, imm:$u6, imm:$U6)>;
-def: Pat<(HexagonEXTRACTURP I32:$Rs, I64:$Rt),
- (S2_extractu_rp I32:$Rs, I64:$Rt)>;
-def: Pat<(HexagonEXTRACTURP I64:$Rs, I64:$Rt),
- (S2_extractup_rp I64:$Rs, I64:$Rt)>;
+let AddedComplexity = 10 in {
+ def: Pat<(HexagonEXTRACTU I32:$Rs, u5_0ImmPred:$u5, u5_0ImmPred:$U5),
+ (S2_extractu I32:$Rs, imm:$u5, imm:$U5)>;
+ def: Pat<(HexagonEXTRACTU I64:$Rs, u6_0ImmPred:$u6, u6_0ImmPred:$U6),
+ (S2_extractup I64:$Rs, imm:$u6, imm:$U6)>;
+}
+def: Pat<(HexagonEXTRACTU I32:$Rs, I32:$Width, I32:$Off),
+ (S2_extractu_rp I32:$Rs, (Combinew $Width, $Off))>;
+def: Pat<(HexagonEXTRACTU I64:$Rs, I32:$Width, I32:$Off),
+ (S2_extractup_rp I64:$Rs, (Combinew $Width, $Off))>;
def SDTHexagonVSPLAT:
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
@@ -938,20 +926,20 @@ def: Pat<(v2i32 (HexagonVSPLAT s8_0ImmPred:$s8)),
(A2_combineii imm:$s8, imm:$s8)>;
def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (Combinew I32:$Rs, I32:$Rs)>;
+let AddedComplexity = 10 in
+def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), (S6_vsplatrbp I32:$Rs)>,
+ Requires<[HasV62]>;
+def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)),
+ (Combinew (S2_vsplatrb I32:$Rs), (S2_vsplatrb I32:$Rs))>;
+
// --(8) Shift/permute ---------------------------------------------------
//
def SDTHexagonI64I32I32: SDTypeProfile<1, 2,
[SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
-def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>,
- SDTCisSubVecOfVec<1, 0>]>;
-def SDTHexagonVPACK: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, SDTCisVec<1>]>;
def HexagonCOMBINE: SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>;
-def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>;
-def HexagonVPACKE: SDNode<"HexagonISD::VPACKE", SDTHexagonVPACK>;
-def HexagonVPACKO: SDNode<"HexagonISD::VPACKO", SDTHexagonVPACK>;
def: Pat<(HexagonCOMBINE I32:$Rs, I32:$Rt), (Combinew $Rs, $Rt)>;
@@ -1001,11 +989,15 @@ def: OpR_RR_pat<S2_asr_r_p, Sra, i64, I64, I32>;
def: OpR_RR_pat<S2_lsr_r_p, Srl, i64, I64, I32>;
def: OpR_RR_pat<S2_asl_r_p, Shl, i64, I64, I32>;
+let Predicates = [HasV60] in {
+ def: OpR_RI_pat<S6_rol_i_r, Rol, i32, I32, u5_0ImmPred>;
+ def: OpR_RI_pat<S6_rol_i_p, Rol, i64, I64, u6_0ImmPred>;
+}
def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)),
(S2_asr_i_r_rnd I32:$Rs, imm:$u5)>;
def: Pat<(sra (add (sra I64:$Rs, u6_0ImmPred:$u6), 1), (i32 1)),
- (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>, Requires<[HasV5T]>;
+ (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>, Requires<[HasV5]>;
// Prefer S2_addasl_rrri over S2_asl_i_r_acc.
let AddedComplexity = 120 in
@@ -1046,41 +1038,55 @@ let AddedComplexity = 100 in {
def: AccRRI_pat<S2_asl_i_p_and, And, Su<Shl>, I64, u6_0ImmPred>;
def: AccRRI_pat<S2_asl_i_p_or, Or, Su<Shl>, I64, u6_0ImmPred>;
def: AccRRI_pat<S2_asl_i_p_xacc, Xor, Su<Shl>, I64, u6_0ImmPred>;
+
+ let Predicates = [HasV60] in {
+ def: AccRRI_pat<S6_rol_i_r_acc, Add, Su<Rol>, I32, u5_0ImmPred>;
+ def: AccRRI_pat<S6_rol_i_r_nac, Sub, Su<Rol>, I32, u5_0ImmPred>;
+ def: AccRRI_pat<S6_rol_i_r_and, And, Su<Rol>, I32, u5_0ImmPred>;
+ def: AccRRI_pat<S6_rol_i_r_or, Or, Su<Rol>, I32, u5_0ImmPred>;
+ def: AccRRI_pat<S6_rol_i_r_xacc, Xor, Su<Rol>, I32, u5_0ImmPred>;
+
+ def: AccRRI_pat<S6_rol_i_p_acc, Add, Su<Rol>, I64, u6_0ImmPred>;
+ def: AccRRI_pat<S6_rol_i_p_nac, Sub, Su<Rol>, I64, u6_0ImmPred>;
+ def: AccRRI_pat<S6_rol_i_p_and, And, Su<Rol>, I64, u6_0ImmPred>;
+ def: AccRRI_pat<S6_rol_i_p_or, Or, Su<Rol>, I64, u6_0ImmPred>;
+ def: AccRRI_pat<S6_rol_i_p_xacc, Xor, Su<Rol>, I64, u6_0ImmPred>;
+ }
}
let AddedComplexity = 100 in {
- def: AccRRR_pat<S2_asr_r_r_acc, Add, Su<Sra>, I32, I32>;
- def: AccRRR_pat<S2_asr_r_r_nac, Sub, Su<Sra>, I32, I32>;
- def: AccRRR_pat<S2_asr_r_r_and, And, Su<Sra>, I32, I32>;
- def: AccRRR_pat<S2_asr_r_r_or, Or, Su<Sra>, I32, I32>;
+ def: AccRRR_pat<S2_asr_r_r_acc, Add, Su<Sra>, I32, I32, I32>;
+ def: AccRRR_pat<S2_asr_r_r_nac, Sub, Su<Sra>, I32, I32, I32>;
+ def: AccRRR_pat<S2_asr_r_r_and, And, Su<Sra>, I32, I32, I32>;
+ def: AccRRR_pat<S2_asr_r_r_or, Or, Su<Sra>, I32, I32, I32>;
- def: AccRRR_pat<S2_asr_r_p_acc, Add, Su<Sra>, I64, I32>;
- def: AccRRR_pat<S2_asr_r_p_nac, Sub, Su<Sra>, I64, I32>;
- def: AccRRR_pat<S2_asr_r_p_and, And, Su<Sra>, I64, I32>;
- def: AccRRR_pat<S2_asr_r_p_or, Or, Su<Sra>, I64, I32>;
- def: AccRRR_pat<S2_asr_r_p_xor, Xor, Su<Sra>, I64, I32>;
+ def: AccRRR_pat<S2_asr_r_p_acc, Add, Su<Sra>, I64, I64, I32>;
+ def: AccRRR_pat<S2_asr_r_p_nac, Sub, Su<Sra>, I64, I64, I32>;
+ def: AccRRR_pat<S2_asr_r_p_and, And, Su<Sra>, I64, I64, I32>;
+ def: AccRRR_pat<S2_asr_r_p_or, Or, Su<Sra>, I64, I64, I32>;
+ def: AccRRR_pat<S2_asr_r_p_xor, Xor, Su<Sra>, I64, I64, I32>;
- def: AccRRR_pat<S2_lsr_r_r_acc, Add, Su<Srl>, I32, I32>;
- def: AccRRR_pat<S2_lsr_r_r_nac, Sub, Su<Srl>, I32, I32>;
- def: AccRRR_pat<S2_lsr_r_r_and, And, Su<Srl>, I32, I32>;
- def: AccRRR_pat<S2_lsr_r_r_or, Or, Su<Srl>, I32, I32>;
+ def: AccRRR_pat<S2_lsr_r_r_acc, Add, Su<Srl>, I32, I32, I32>;
+ def: AccRRR_pat<S2_lsr_r_r_nac, Sub, Su<Srl>, I32, I32, I32>;
+ def: AccRRR_pat<S2_lsr_r_r_and, And, Su<Srl>, I32, I32, I32>;
+ def: AccRRR_pat<S2_lsr_r_r_or, Or, Su<Srl>, I32, I32, I32>;
- def: AccRRR_pat<S2_lsr_r_p_acc, Add, Su<Srl>, I64, I32>;
- def: AccRRR_pat<S2_lsr_r_p_nac, Sub, Su<Srl>, I64, I32>;
- def: AccRRR_pat<S2_lsr_r_p_and, And, Su<Srl>, I64, I32>;
- def: AccRRR_pat<S2_lsr_r_p_or, Or, Su<Srl>, I64, I32>;
- def: AccRRR_pat<S2_lsr_r_p_xor, Xor, Su<Srl>, I64, I32>;
+ def: AccRRR_pat<S2_lsr_r_p_acc, Add, Su<Srl>, I64, I64, I32>;
+ def: AccRRR_pat<S2_lsr_r_p_nac, Sub, Su<Srl>, I64, I64, I32>;
+ def: AccRRR_pat<S2_lsr_r_p_and, And, Su<Srl>, I64, I64, I32>;
+ def: AccRRR_pat<S2_lsr_r_p_or, Or, Su<Srl>, I64, I64, I32>;
+ def: AccRRR_pat<S2_lsr_r_p_xor, Xor, Su<Srl>, I64, I64, I32>;
- def: AccRRR_pat<S2_asl_r_r_acc, Add, Su<Shl>, I32, I32>;
- def: AccRRR_pat<S2_asl_r_r_nac, Sub, Su<Shl>, I32, I32>;
- def: AccRRR_pat<S2_asl_r_r_and, And, Su<Shl>, I32, I32>;
- def: AccRRR_pat<S2_asl_r_r_or, Or, Su<Shl>, I32, I32>;
+ def: AccRRR_pat<S2_asl_r_r_acc, Add, Su<Shl>, I32, I32, I32>;
+ def: AccRRR_pat<S2_asl_r_r_nac, Sub, Su<Shl>, I32, I32, I32>;
+ def: AccRRR_pat<S2_asl_r_r_and, And, Su<Shl>, I32, I32, I32>;
+ def: AccRRR_pat<S2_asl_r_r_or, Or, Su<Shl>, I32, I32, I32>;
- def: AccRRR_pat<S2_asl_r_p_acc, Add, Su<Shl>, I64, I32>;
- def: AccRRR_pat<S2_asl_r_p_nac, Sub, Su<Shl>, I64, I32>;
- def: AccRRR_pat<S2_asl_r_p_and, And, Su<Shl>, I64, I32>;
- def: AccRRR_pat<S2_asl_r_p_or, Or, Su<Shl>, I64, I32>;
- def: AccRRR_pat<S2_asl_r_p_xor, Xor, Su<Shl>, I64, I32>;
+ def: AccRRR_pat<S2_asl_r_p_acc, Add, Su<Shl>, I64, I64, I32>;
+ def: AccRRR_pat<S2_asl_r_p_nac, Sub, Su<Shl>, I64, I64, I32>;
+ def: AccRRR_pat<S2_asl_r_p_and, And, Su<Shl>, I64, I64, I32>;
+ def: AccRRR_pat<S2_asl_r_p_or, Or, Su<Shl>, I64, I64, I32>;
+ def: AccRRR_pat<S2_asl_r_p_xor, Xor, Su<Shl>, I64, I64, I32>;
}
@@ -1170,11 +1176,13 @@ def: Pat<(shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))),
// --(9) Arithmetic/bitwise ----------------------------------------------
//
-def: Pat<(abs I32:$Rs), (A2_abs I32:$Rs)>;
-def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>;
-def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>;
+def: Pat<(abs I32:$Rs), (A2_abs I32:$Rs)>;
+def: Pat<(abs I64:$Rs), (A2_absp I64:$Rs)>;
+def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>;
+def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>;
+def: Pat<(ineg I64:$Rs), (A2_negp I64:$Rs)>;
-let Predicates = [HasV5T] in {
+let Predicates = [HasV5] in {
def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>;
def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>;
@@ -1186,13 +1194,6 @@ let Predicates = [HasV5T] in {
(i32 (LoReg $Rs)))>;
}
-let AddedComplexity = 50 in
-def: Pat<(xor (add (sra I32:$Rs, (i32 31)),
- I32:$Rs),
- (sra I32:$Rs, (i32 31))),
- (A2_abs I32:$Rs)>;
-
-
def: Pat<(add I32:$Rs, anyimm:$s16), (A2_addi I32:$Rs, imm:$s16)>;
def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>;
def: Pat<(and I32:$Rs, anyimm:$s10), (A2_andir I32:$Rs, imm:$s10)>;
@@ -1221,18 +1222,20 @@ def: OpR_RR_pat<A2_vsubub, Sub, v8i8, V8I8>;
def: OpR_RR_pat<A2_vsubh, Sub, v4i16, V4I16>;
def: OpR_RR_pat<A2_vsubw, Sub, v2i32, V2I32>;
+def: OpR_RR_pat<A2_and, And, v4i8, V4I8>;
+def: OpR_RR_pat<A2_xor, Xor, v4i8, V4I8>;
+def: OpR_RR_pat<A2_or, Or, v4i8, V4I8>;
def: OpR_RR_pat<A2_and, And, v2i16, V2I16>;
def: OpR_RR_pat<A2_xor, Xor, v2i16, V2I16>;
def: OpR_RR_pat<A2_or, Or, v2i16, V2I16>;
-
def: OpR_RR_pat<A2_andp, And, v8i8, V8I8>;
-def: OpR_RR_pat<A2_andp, And, v4i16, V4I16>;
-def: OpR_RR_pat<A2_andp, And, v2i32, V2I32>;
def: OpR_RR_pat<A2_orp, Or, v8i8, V8I8>;
-def: OpR_RR_pat<A2_orp, Or, v4i16, V4I16>;
-def: OpR_RR_pat<A2_orp, Or, v2i32, V2I32>;
def: OpR_RR_pat<A2_xorp, Xor, v8i8, V8I8>;
+def: OpR_RR_pat<A2_andp, And, v4i16, V4I16>;
+def: OpR_RR_pat<A2_orp, Or, v4i16, V4I16>;
def: OpR_RR_pat<A2_xorp, Xor, v4i16, V4I16>;
+def: OpR_RR_pat<A2_andp, And, v2i32, V2I32>;
+def: OpR_RR_pat<A2_orp, Or, v2i32, V2I32>;
def: OpR_RR_pat<A2_xorp, Xor, v2i32, V2I32>;
def: OpR_RR_pat<M2_mpyi, Mul, i32, I32>;
@@ -1255,7 +1258,7 @@ def: OpR_RR_pat<C2_and, Mul, v2i1, V2I1>;
def: OpR_RR_pat<C2_and, Mul, v4i1, V4I1>;
def: OpR_RR_pat<C2_and, Mul, v8i1, V8I1>;
-let Predicates = [HasV5T] in {
+let Predicates = [HasV5] in {
def: OpR_RR_pat<F2_sfadd, pf2<fadd>, f32, F32>;
def: OpR_RR_pat<F2_sfsub, pf2<fsub>, f32, F32>;
def: OpR_RR_pat<F2_sfmpy, pf2<fmul>, f32, F32>;
@@ -1268,12 +1271,62 @@ let Predicates = [HasV5T] in {
let AddedComplexity = 10 in {
def: AccRRI_pat<M2_macsip, Add, Su<Mul>, I32, u32_0ImmPred>;
def: AccRRI_pat<M2_macsin, Sub, Su<Mul>, I32, u32_0ImmPred>;
- def: AccRRR_pat<M2_maci, Add, Su<Mul>, I32, I32>;
+ def: AccRRR_pat<M2_maci, Add, Su<Mul>, I32, I32, I32>;
}
def: AccRRI_pat<M2_naccii, Sub, Su<Add>, I32, s32_0ImmPred>;
def: AccRRI_pat<M2_accii, Add, Su<Add>, I32, s32_0ImmPred>;
-def: AccRRR_pat<M2_acci, Add, Su<Add>, I32, I32>;
+def: AccRRR_pat<M2_acci, Add, Su<Add>, I32, I32, I32>;
+
+// Mulh for vectors
+//
+def: Pat<(v2i32 (mulhu V2I32:$Rss, V2I32:$Rtt)),
+ (Combinew (M2_mpyu_up (HiReg $Rss), (HiReg $Rtt)),
+ (M2_mpyu_up (LoReg $Rss), (LoReg $Rtt)))>;
+
+def: Pat<(v2i32 (mulhs V2I32:$Rs, V2I32:$Rt)),
+ (Combinew (M2_mpy_up (HiReg $Rs), (HiReg $Rt)),
+ (M2_mpy_up (LoReg $Rt), (LoReg $Rt)))>;
+
+def Mulhub:
+ OutPatFrag<(ops node:$Rss, node:$Rtt),
+ (Combinew (S2_vtrunohb (M5_vmpybuu (HiReg $Rss), (HiReg $Rtt))),
+ (S2_vtrunohb (M5_vmpybuu (LoReg $Rss), (LoReg $Rtt))))>;
+
+// Equivalent of byte-wise arithmetic shift right by 7 in v8i8.
+def Asr7:
+ OutPatFrag<(ops node:$Rss), (C2_mask (C2_not (A4_vcmpbgti $Rss, 0)))>;
+
+def: Pat<(v8i8 (mulhu V8I8:$Rss, V8I8:$Rtt)),
+ (Mulhub $Rss, $Rtt)>;
+
+def: Pat<(v8i8 (mulhs V8I8:$Rss, V8I8:$Rtt)),
+ (A2_vsubub
+ (Mulhub $Rss, $Rtt),
+ (A2_vaddub (A2_andp V8I8:$Rss, (Asr7 $Rtt)),
+ (A2_andp V8I8:$Rtt, (Asr7 $Rss))))>;
+
+def Mpysh:
+ OutPatFrag<(ops node:$Rs, node:$Rt), (M2_vmpy2s_s0 $Rs, $Rt)>;
+def Mpyshh:
+ OutPatFrag<(ops node:$Rss, node:$Rtt), (Mpysh (HiReg $Rss), (HiReg $Rtt))>;
+def Mpyshl:
+ OutPatFrag<(ops node:$Rss, node:$Rtt), (Mpysh (LoReg $Rss), (LoReg $Rtt))>;
+
+def Mulhsh:
+ OutPatFrag<(ops node:$Rss, node:$Rtt),
+ (Combinew (A2_combine_hh (HiReg (Mpyshh $Rss, $Rtt)),
+ (LoReg (Mpyshh $Rss, $Rtt))),
+ (A2_combine_hh (HiReg (Mpyshl $Rss, $Rtt)),
+ (LoReg (Mpyshl $Rss, $Rtt))))>;
+
+def: Pat<(v4i16 (mulhs V4I16:$Rss, V4I16:$Rtt)), (Mulhsh $Rss, $Rtt)>;
+
+def: Pat<(v4i16 (mulhu V4I16:$Rss, V4I16:$Rtt)),
+ (A2_vaddh
+ (Mulhsh $Rss, $Rtt),
+ (A2_vaddh (A2_andp V4I16:$Rss, (S2_asr_i_vh $Rtt, 15)),
+ (A2_andp V4I16:$Rtt, (S2_asr_i_vh $Rss, 15))))>;
def: Pat<(ineg (mul I32:$Rs, u8_0ImmPred:$u8)),
@@ -1291,24 +1344,24 @@ def: Pat<(mul I32:$Rs, n8_0ImmPred:$n8),
def: Pat<(add Sext64:$Rs, I64:$Rt),
(A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>;
-def: AccRRR_pat<M4_and_and, And, Su<And>, I32, I32>;
-def: AccRRR_pat<M4_and_or, And, Su<Or>, I32, I32>;
-def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32>;
-def: AccRRR_pat<M4_or_and, Or, Su<And>, I32, I32>;
-def: AccRRR_pat<M4_or_or, Or, Su<Or>, I32, I32>;
-def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32>;
-def: AccRRR_pat<M4_xor_and, Xor, Su<And>, I32, I32>;
-def: AccRRR_pat<M4_xor_or, Xor, Su<Or>, I32, I32>;
-def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32>;
-def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64>;
+def: AccRRR_pat<M4_and_and, And, Su<And>, I32, I32, I32>;
+def: AccRRR_pat<M4_and_or, And, Su<Or>, I32, I32, I32>;
+def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32, I32>;
+def: AccRRR_pat<M4_or_and, Or, Su<And>, I32, I32, I32>;
+def: AccRRR_pat<M4_or_or, Or, Su<Or>, I32, I32, I32>;
+def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32, I32>;
+def: AccRRR_pat<M4_xor_and, Xor, Su<And>, I32, I32, I32>;
+def: AccRRR_pat<M4_xor_or, Xor, Su<Or>, I32, I32, I32>;
+def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32, I32>;
+def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64, I64>;
// For dags like (or (and (not _), _), (shl _, _)) where the "or" with
// one argument matches the patterns below, and with the other argument
// matches S2_asl_r_r_or, etc, prefer the patterns below.
let AddedComplexity = 110 in { // greater than S2_asl_r_r_and/or/xor.
- def: AccRRR_pat<M4_and_andn, And, Su<Not2<And>>, I32, I32>;
- def: AccRRR_pat<M4_or_andn, Or, Su<Not2<And>>, I32, I32>;
- def: AccRRR_pat<M4_xor_andn, Xor, Su<Not2<And>>, I32, I32>;
+ def: AccRRR_pat<M4_and_andn, And, Su<Not2<And>>, I32, I32, I32>;
+ def: AccRRR_pat<M4_or_andn, Or, Su<Not2<And>>, I32, I32, I32>;
+ def: AccRRR_pat<M4_xor_andn, Xor, Su<Not2<And>>, I32, I32, I32>;
}
// S4_addaddi and S4_subaddi don't have tied operands, so give them
@@ -1444,7 +1497,7 @@ def: Pat<(add I32:$Ru, (Su<Mul> I32:$Ry, I32:$Rs)),
(M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>;
-let Predicates = [HasV5T] in {
+let Predicates = [HasV5] in {
def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx),
(F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>;
def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx),
@@ -1479,13 +1532,13 @@ def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
// Multiplies two v4i8 vectors.
def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
(S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>,
- Requires<[HasV5T]>;
+ Requires<[HasV5]>;
// Multiplies two v8i8 vectors.
def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
(Combinew (S2_vtrunehb (M5_vmpybuu (HiReg $Rs), (HiReg $Rt))),
(S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>,
- Requires<[HasV5T]>;
+ Requires<[HasV5]>;
// --(10) Bit ------------------------------------------------------------
@@ -1519,7 +1572,6 @@ def: Pat<(i32 (ctpop I32:$Rs)), (S5_popcountp (A4_combineir 0, I32:$Rs))>;
def: Pat<(bitreverse I32:$Rs), (S2_brev I32:$Rs)>;
def: Pat<(bitreverse I64:$Rss), (S2_brevp I64:$Rss)>;
-
let AddedComplexity = 20 in { // Complexity greater than and/or/xor
def: Pat<(and I32:$Rs, IsNPow2_32:$V),
(S2_clrbit_i IntRegs:$Rs, (LogN2_32 $V))>;
@@ -1582,6 +1634,15 @@ let AddedComplexity = 10 in // Complexity greater than compare reg-reg.
def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), IntRegs:$Rt)),
(C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>;
+def SDTTestBit:
+ SDTypeProfile<1, 2, [SDTCisVT<0, i1>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
+def HexagonTSTBIT: SDNode<"HexagonISD::TSTBIT", SDTTestBit>;
+
+def: Pat<(HexagonTSTBIT I32:$Rs, u5_0ImmPred:$u5),
+ (S2_tstbit_i I32:$Rs, imm:$u5)>;
+def: Pat<(HexagonTSTBIT I32:$Rs, I32:$Rt),
+ (S2_tstbit_r I32:$Rs, I32:$Rt)>;
+
let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)),
(S4_ntstbit_i I32:$Rs, imm:$u5)>;
@@ -1790,7 +1851,12 @@ let AddedComplexity = 20 in {
defm: Loadxi_pat<zextloadv2i8, v2i16, anyimm1, L2_loadbzw2_io>;
defm: Loadxi_pat<zextloadv4i8, v4i16, anyimm2, L2_loadbzw4_io>;
defm: Loadxi_pat<load, i32, anyimm2, L2_loadri_io>;
+ defm: Loadxi_pat<load, v2i16, anyimm2, L2_loadri_io>;
+ defm: Loadxi_pat<load, v4i8, anyimm2, L2_loadri_io>;
defm: Loadxi_pat<load, i64, anyimm3, L2_loadrd_io>;
+ defm: Loadxi_pat<load, v2i32, anyimm3, L2_loadrd_io>;
+ defm: Loadxi_pat<load, v4i16, anyimm3, L2_loadrd_io>;
+ defm: Loadxi_pat<load, v8i8, anyimm3, L2_loadrd_io>;
defm: Loadxi_pat<load, f32, anyimm2, L2_loadri_io>;
defm: Loadxi_pat<load, f64, anyimm3, L2_loadrd_io>;
// No sextloadi1.
@@ -1828,10 +1894,15 @@ let AddedComplexity = 60 in {
def: Loadxu_pat<zextloadi16, i32, anyimm1, L4_loadruh_ur>;
def: Loadxu_pat<zextloadv2i8, v2i16, anyimm1, L4_loadbzw2_ur>;
def: Loadxu_pat<zextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>;
- def: Loadxu_pat<load, f32, anyimm2, L4_loadri_ur>;
- def: Loadxu_pat<load, f64, anyimm3, L4_loadrd_ur>;
def: Loadxu_pat<load, i32, anyimm2, L4_loadri_ur>;
+ def: Loadxu_pat<load, v2i16, anyimm2, L4_loadri_ur>;
+ def: Loadxu_pat<load, v4i8, anyimm2, L4_loadri_ur>;
def: Loadxu_pat<load, i64, anyimm3, L4_loadrd_ur>;
+ def: Loadxu_pat<load, v2i32, anyimm3, L4_loadrd_ur>;
+ def: Loadxu_pat<load, v4i16, anyimm3, L4_loadrd_ur>;
+ def: Loadxu_pat<load, v8i8, anyimm3, L4_loadrd_ur>;
+ def: Loadxu_pat<load, f32, anyimm2, L4_loadri_ur>;
+ def: Loadxu_pat<load, f64, anyimm3, L4_loadrd_ur>;
def: Loadxum_pat<sextloadi8, i64, anyimm0, ToSext64, L4_loadrb_ur>;
def: Loadxum_pat<zextloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>;
@@ -1845,29 +1916,39 @@ let AddedComplexity = 60 in {
}
let AddedComplexity = 40 in {
- def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>;
- def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>;
- def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>;
- def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>;
- def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>;
- def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>;
- def: Loadxr_shl_pat<load, i32, L4_loadri_rr>;
- def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>;
- def: Loadxr_shl_pat<load, f32, L4_loadri_rr>;
- def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>;
+ def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>;
+ def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>;
+ def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>;
+ def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>;
+ def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>;
+ def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>;
+ def: Loadxr_shl_pat<load, i32, L4_loadri_rr>;
+ def: Loadxr_shl_pat<load, v2i16, L4_loadri_rr>;
+ def: Loadxr_shl_pat<load, v4i8, L4_loadri_rr>;
+ def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>;
+ def: Loadxr_shl_pat<load, v2i32, L4_loadrd_rr>;
+ def: Loadxr_shl_pat<load, v4i16, L4_loadrd_rr>;
+ def: Loadxr_shl_pat<load, v8i8, L4_loadrd_rr>;
+ def: Loadxr_shl_pat<load, f32, L4_loadri_rr>;
+ def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>;
}
let AddedComplexity = 20 in {
- def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>;
- def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>;
- def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>;
- def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>;
- def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>;
- def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>;
- def: Loadxr_add_pat<load, i32, L4_loadri_rr>;
- def: Loadxr_add_pat<load, i64, L4_loadrd_rr>;
- def: Loadxr_add_pat<load, f32, L4_loadri_rr>;
- def: Loadxr_add_pat<load, f64, L4_loadrd_rr>;
+ def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>;
+ def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>;
+ def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>;
+ def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>;
+ def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>;
+ def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>;
+ def: Loadxr_add_pat<load, i32, L4_loadri_rr>;
+ def: Loadxr_add_pat<load, v2i16, L4_loadri_rr>;
+ def: Loadxr_add_pat<load, v4i8, L4_loadri_rr>;
+ def: Loadxr_add_pat<load, i64, L4_loadrd_rr>;
+ def: Loadxr_add_pat<load, v2i32, L4_loadrd_rr>;
+ def: Loadxr_add_pat<load, v4i16, L4_loadrd_rr>;
+ def: Loadxr_add_pat<load, v8i8, L4_loadrd_rr>;
+ def: Loadxr_add_pat<load, f32, L4_loadri_rr>;
+ def: Loadxr_add_pat<load, f64, L4_loadrd_rr>;
}
let AddedComplexity = 40 in {
@@ -1897,17 +1978,22 @@ let AddedComplexity = 20 in {
// Absolute address
let AddedComplexity = 60 in {
- def: Loada_pat<zextloadi1, i32, anyimm0, PS_loadrubabs>;
- def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>;
- def: Loada_pat<extloadi8, i32, anyimm0, PS_loadrubabs>;
- def: Loada_pat<zextloadi8, i32, anyimm0, PS_loadrubabs>;
- def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>;
- def: Loada_pat<extloadi16, i32, anyimm1, PS_loadruhabs>;
- def: Loada_pat<zextloadi16, i32, anyimm1, PS_loadruhabs>;
- def: Loada_pat<load, i32, anyimm2, PS_loadriabs>;
- def: Loada_pat<load, i64, anyimm3, PS_loadrdabs>;
- def: Loada_pat<load, f32, anyimm2, PS_loadriabs>;
- def: Loada_pat<load, f64, anyimm3, PS_loadrdabs>;
+ def: Loada_pat<zextloadi1, i32, anyimm0, PS_loadrubabs>;
+ def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>;
+ def: Loada_pat<extloadi8, i32, anyimm0, PS_loadrubabs>;
+ def: Loada_pat<zextloadi8, i32, anyimm0, PS_loadrubabs>;
+ def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>;
+ def: Loada_pat<extloadi16, i32, anyimm1, PS_loadruhabs>;
+ def: Loada_pat<zextloadi16, i32, anyimm1, PS_loadruhabs>;
+ def: Loada_pat<load, i32, anyimm2, PS_loadriabs>;
+ def: Loada_pat<load, v2i16, anyimm2, PS_loadriabs>;
+ def: Loada_pat<load, v4i8, anyimm2, PS_loadriabs>;
+ def: Loada_pat<load, i64, anyimm3, PS_loadrdabs>;
+ def: Loada_pat<load, v2i32, anyimm3, PS_loadrdabs>;
+ def: Loada_pat<load, v4i16, anyimm3, PS_loadrdabs>;
+ def: Loada_pat<load, v8i8, anyimm3, PS_loadrdabs>;
+ def: Loada_pat<load, f32, anyimm2, PS_loadriabs>;
+ def: Loada_pat<load, f64, anyimm3, PS_loadrdabs>;
def: Loada_pat<atomic_load_8, i32, anyimm0, PS_loadrubabs>;
def: Loada_pat<atomic_load_16, i32, anyimm1, PS_loadruhabs>;
@@ -1933,18 +2019,23 @@ let AddedComplexity = 30 in {
// GP-relative address
let AddedComplexity = 100 in {
- def: Loada_pat<extloadi1, i32, addrgp, L2_loadrubgp>;
- def: Loada_pat<zextloadi1, i32, addrgp, L2_loadrubgp>;
- def: Loada_pat<extloadi8, i32, addrgp, L2_loadrubgp>;
- def: Loada_pat<sextloadi8, i32, addrgp, L2_loadrbgp>;
- def: Loada_pat<zextloadi8, i32, addrgp, L2_loadrubgp>;
- def: Loada_pat<extloadi16, i32, addrgp, L2_loadruhgp>;
- def: Loada_pat<sextloadi16, i32, addrgp, L2_loadrhgp>;
- def: Loada_pat<zextloadi16, i32, addrgp, L2_loadruhgp>;
- def: Loada_pat<load, i32, addrgp, L2_loadrigp>;
- def: Loada_pat<load, i64, addrgp, L2_loadrdgp>;
- def: Loada_pat<load, f32, addrgp, L2_loadrigp>;
- def: Loada_pat<load, f64, addrgp, L2_loadrdgp>;
+ def: Loada_pat<extloadi1, i32, addrgp, L2_loadrubgp>;
+ def: Loada_pat<zextloadi1, i32, addrgp, L2_loadrubgp>;
+ def: Loada_pat<extloadi8, i32, addrgp, L2_loadrubgp>;
+ def: Loada_pat<sextloadi8, i32, addrgp, L2_loadrbgp>;
+ def: Loada_pat<zextloadi8, i32, addrgp, L2_loadrubgp>;
+ def: Loada_pat<extloadi16, i32, addrgp, L2_loadruhgp>;
+ def: Loada_pat<sextloadi16, i32, addrgp, L2_loadrhgp>;
+ def: Loada_pat<zextloadi16, i32, addrgp, L2_loadruhgp>;
+ def: Loada_pat<load, i32, addrgp, L2_loadrigp>;
+ def: Loada_pat<load, v2i16, addrgp, L2_loadrigp>;
+ def: Loada_pat<load, v4i8, addrgp, L2_loadrigp>;
+ def: Loada_pat<load, i64, addrgp, L2_loadrdgp>;
+ def: Loada_pat<load, v2i32, addrgp, L2_loadrdgp>;
+ def: Loada_pat<load, v4i16, addrgp, L2_loadrdgp>;
+ def: Loada_pat<load, v8i8, addrgp, L2_loadrdgp>;
+ def: Loada_pat<load, f32, addrgp, L2_loadrigp>;
+ def: Loada_pat<load, f64, addrgp, L2_loadrdgp>;
def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>;
def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>;
@@ -1983,46 +2074,10 @@ def: Pat<(i1 (load (add I32:$Rs, anyimm0:$Off))),
def: Pat<(i1 (load I32:$Rs)),
(C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>;
-// HVX loads
-
-multiclass HvxLd_pat<InstHexagon MI, PatFrag Load, ValueType VT,
- PatFrag ImmPred> {
- def: Pat<(VT (Load I32:$Rt)), (MI I32:$Rt, 0)>;
- def: Pat<(VT (Load (add I32:$Rt, ImmPred:$s))), (MI I32:$Rt, imm:$s)>;
- // The HVX selection code for shuffles can generate vector constants.
- // Calling "Select" on the resulting loads from CP fails without these
- // patterns.
- def: Pat<(VT (Load (HexagonCP tconstpool:$A))), (MI (A2_tfrsi imm:$A), 0)>;
- def: Pat<(VT (Load (HexagonAtPcrel tconstpool:$A))),
- (MI (C4_addipc imm:$A), 0)>;
-}
-
-
-let Predicates = [UseHVX] in {
- multiclass HvxLdVs_pat<InstHexagon MI, PatFrag Load> {
- defm: HvxLd_pat<MI, Load, VecI8, IsVecOff>;
- defm: HvxLd_pat<MI, Load, VecI16, IsVecOff>;
- defm: HvxLd_pat<MI, Load, VecI32, IsVecOff>;
- }
- defm: HvxLdVs_pat<V6_vL32b_nt_ai, alignednontemporalload>;
- defm: HvxLdVs_pat<V6_vL32b_ai, alignedload>;
- defm: HvxLdVs_pat<V6_vL32Ub_ai, unalignedload>;
-
- multiclass HvxLdWs_pat<InstHexagon MI, PatFrag Load> {
- defm: HvxLd_pat<MI, Load, VecPI8, IsVecOff>;
- defm: HvxLd_pat<MI, Load, VecPI16, IsVecOff>;
- defm: HvxLd_pat<MI, Load, VecPI32, IsVecOff>;
- }
- defm: HvxLdWs_pat<PS_vloadrw_nt_ai, alignednontemporalload>;
- defm: HvxLdWs_pat<PS_vloadrw_ai, alignedload>;
- defm: HvxLdWs_pat<PS_vloadrwu_ai, unalignedload>;
-}
-
// --(13) Store ----------------------------------------------------------
//
-
class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset, InstHexagon MI>
: Pat<(Store Value:$Rt, I32:$Rx, Offset:$s4),
(MI I32:$Rx, imm:$s4, Value:$Rt)>;
@@ -2135,7 +2190,7 @@ class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod,
// swapped. This relies on the knowledge that the F.Fragment uses names
// "ptr" and "val".
class AtomSt<PatFrag F>
- : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode,
+ : PatFrag<(ops node:$val, node:$ptr), !head(F.Fragments), F.PredicateCode,
F.OperandTransform> {
let IsAtomic = F.IsAtomic;
let MemoryVT = F.MemoryVT;
@@ -2459,36 +2514,6 @@ let AddedComplexity = 10 in {
def: Storexi_base_pat<AtomSt<atomic_store_64>, I64, S2_storerd_io>;
}
-// HVX stores
-
-multiclass HvxSt_pat<InstHexagon MI, PatFrag Store, PatFrag ImmPred,
- PatFrag Value> {
- def: Pat<(Store Value:$Vs, I32:$Rt),
- (MI I32:$Rt, 0, Value:$Vs)>;
- def: Pat<(Store Value:$Vs, (add I32:$Rt, ImmPred:$s)),
- (MI I32:$Rt, imm:$s, Value:$Vs)>;
-}
-
-let Predicates = [UseHVX] in {
- multiclass HvxStVs_pat<InstHexagon MI, PatFrag Store> {
- defm: HvxSt_pat<MI, Store, IsVecOff, HVI8>;
- defm: HvxSt_pat<MI, Store, IsVecOff, HVI16>;
- defm: HvxSt_pat<MI, Store, IsVecOff, HVI32>;
- }
- defm: HvxStVs_pat<V6_vS32b_nt_ai, alignednontemporalstore>;
- defm: HvxStVs_pat<V6_vS32b_ai, alignedstore>;
- defm: HvxStVs_pat<V6_vS32Ub_ai, unalignedstore>;
-
- multiclass HvxStWs_pat<InstHexagon MI, PatFrag Store> {
- defm: HvxSt_pat<MI, Store, IsVecOff, HWI8>;
- defm: HvxSt_pat<MI, Store, IsVecOff, HWI16>;
- defm: HvxSt_pat<MI, Store, IsVecOff, HWI32>;
- }
- defm: HvxStWs_pat<PS_vstorerw_nt_ai, alignednontemporalstore>;
- defm: HvxStWs_pat<PS_vstorerw_ai, alignedstore>;
- defm: HvxStWs_pat<PS_vstorerwu_ai, unalignedstore>;
-}
-
// --(14) Memop ----------------------------------------------------------
//
@@ -2570,8 +2595,10 @@ multiclass Memopxr_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
SDNode Oper, InstHexagon MI> {
- defm: Memopxr_base_pat <Load, Store, Oper, MI>;
- defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>;
+ let Predicates = [UseMEMOPS] in {
+ defm: Memopxr_base_pat <Load, Store, Oper, MI>;
+ defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>;
+ }
}
let AddedComplexity = 200 in {
@@ -2669,8 +2696,10 @@ multiclass Memopxi_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
InstHexagon MI> {
- defm: Memopxi_base_pat <Load, Store, Oper, Arg, ArgMod, MI>;
- defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>;
+ let Predicates = [UseMEMOPS] in {
+ defm: Memopxi_base_pat <Load, Store, Oper, Arg, ArgMod, MI>;
+ defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>;
+ }
}
let AddedComplexity = 220 in {
@@ -2829,6 +2858,8 @@ def: Pat<(brcond (not I1:$Pu), bb:$dst),
(J2_jumpf I1:$Pu, bb:$dst)>;
def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst),
(J2_jumpf I1:$Pu, bb:$dst)>;
+def: Pat<(brcond (i1 (seteq I1:$Pu, 0)), bb:$dst),
+ (J2_jumpf I1:$Pu, bb:$dst)>;
def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst),
(J2_jumpt I1:$Pu, bb:$dst)>;
@@ -2898,97 +2929,17 @@ def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf,
def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>;
-
-def SDTVecLeaf: SDTypeProfile<1, 0, [SDTCisVec<0>]>;
-
-def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2,
- [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>;
-def HexagonVEXTRACTW : SDNode<"HexagonISD::VEXTRACTW", SDTHexagonVEXTRACTW>;
-
-def SDTHexagonVINSERTW0: SDTypeProfile<1, 2,
- [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>;
-def HexagonVINSERTW0 : SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>;
-
-def Combinev: OutPatFrag<(ops node:$Rs, node:$Rt),
- (REG_SEQUENCE HvxWR, $Rs, vsub_hi, $Rt, vsub_lo)>;
-
-def LoVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_lo)>;
-def HiVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_hi)>;
-
-let Predicates = [UseHVX] in {
- def: OpR_RR_pat<V6_vpackeb, pf2<HexagonVPACKE>, VecI8, HVI8>;
- def: OpR_RR_pat<V6_vpackob, pf2<HexagonVPACKO>, VecI8, HVI8>;
- def: OpR_RR_pat<V6_vpackeh, pf2<HexagonVPACKE>, VecI16, HVI16>;
- def: OpR_RR_pat<V6_vpackoh, pf2<HexagonVPACKO>, VecI16, HVI16>;
-}
-
-def HexagonVZERO: SDNode<"HexagonISD::VZERO", SDTVecLeaf>;
-def vzero: PatFrag<(ops), (HexagonVZERO)>;
-
-let Predicates = [UseHVX] in {
- def: Pat<(VecI8 vzero), (V6_vd0)>;
- def: Pat<(VecI16 vzero), (V6_vd0)>;
- def: Pat<(VecI32 vzero), (V6_vd0)>;
-
- def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)),
- (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
- def: Pat<(VecPI16 (concat_vectors HVI16:$Vs, HVI16:$Vt)),
- (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
- def: Pat<(VecPI32 (concat_vectors HVI32:$Vs, HVI32:$Vt)),
- (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
-
- def: Pat<(HexagonVEXTRACTW HVI8:$Vu, I32:$Rs),
- (V6_extractw HvxVR:$Vu, I32:$Rs)>;
- def: Pat<(HexagonVEXTRACTW HVI16:$Vu, I32:$Rs),
- (V6_extractw HvxVR:$Vu, I32:$Rs)>;
- def: Pat<(HexagonVEXTRACTW HVI32:$Vu, I32:$Rs),
- (V6_extractw HvxVR:$Vu, I32:$Rs)>;
-
- def: Pat<(HexagonVINSERTW0 HVI8:$Vu, I32:$Rt),
- (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
- def: Pat<(HexagonVINSERTW0 HVI16:$Vu, I32:$Rt),
- (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
- def: Pat<(HexagonVINSERTW0 HVI32:$Vu, I32:$Rt),
- (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
-
- def: Pat<(add HVI8:$Vs, HVI8:$Vt), (V6_vaddb HvxVR:$Vs, HvxVR:$Vt)>;
- def: Pat<(add HVI16:$Vs, HVI16:$Vt), (V6_vaddh HvxVR:$Vs, HvxVR:$Vt)>;
- def: Pat<(add HVI32:$Vs, HVI32:$Vt), (V6_vaddw HvxVR:$Vs, HvxVR:$Vt)>;
-
- def: Pat<(sub HVI8:$Vs, HVI8:$Vt), (V6_vsubb HvxVR:$Vs, HvxVR:$Vt)>;
- def: Pat<(sub HVI16:$Vs, HVI16:$Vt), (V6_vsubh HvxVR:$Vs, HvxVR:$Vt)>;
- def: Pat<(sub HVI32:$Vs, HVI32:$Vt), (V6_vsubw HvxVR:$Vs, HvxVR:$Vt)>;
-
- def: Pat<(and HVI8:$Vs, HVI8:$Vt), (V6_vand HvxVR:$Vs, HvxVR:$Vt)>;
- def: Pat<(or HVI8:$Vs, HVI8:$Vt), (V6_vor HvxVR:$Vs, HvxVR:$Vt)>;
- def: Pat<(xor HVI8:$Vs, HVI8:$Vt), (V6_vxor HvxVR:$Vs, HvxVR:$Vt)>;
-
- def: Pat<(vselect HQ8:$Qu, HVI8:$Vs, HVI8:$Vt),
- (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
- def: Pat<(vselect HQ16:$Qu, HVI16:$Vs, HVI16:$Vt),
- (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
- def: Pat<(vselect HQ32:$Qu, HVI32:$Vs, HVI32:$Vt),
- (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
-
- def: Pat<(VecPI16 (sext HVI8:$Vs)), (V6_vsb HvxVR:$Vs)>;
- def: Pat<(VecPI32 (sext HVI16:$Vs)), (V6_vsh HvxVR:$Vs)>;
- def: Pat<(VecPI16 (zext HVI8:$Vs)), (V6_vzb HvxVR:$Vs)>;
- def: Pat<(VecPI32 (zext HVI16:$Vs)), (V6_vzh HvxVR:$Vs)>;
-
- def: Pat<(sext_inreg HVI32:$Vs, v16i16),
- (V6_vpackeb (LoVec (V6_vsh HvxVR:$Vs)),
- (HiVec (V6_vsh HvxVR:$Vs)))>;
- def: Pat<(sext_inreg HVI32:$Vs, v32i16),
- (V6_vpackeb (LoVec (V6_vsh HvxVR:$Vs)),
- (HiVec (V6_vsh HvxVR:$Vs)))>;
-
- def: Pat<(VecI16 (sext_invec HVI8:$Vs)), (LoVec (V6_vsb HvxVR:$Vs))>;
- def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (V6_vsh HvxVR:$Vs))>;
- def: Pat<(VecI32 (sext_invec HVI8:$Vs)),
- (LoVec (V6_vsh (LoVec (V6_vsb HvxVR:$Vs))))>;
-
- def: Pat<(VecI16 (zext_invec HVI8:$Vs)), (LoVec (V6_vzb HvxVR:$Vs))>;
- def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (V6_vzh HvxVR:$Vs))>;
- def: Pat<(VecI32 (zext_invec HVI8:$Vs)),
- (LoVec (V6_vzh (LoVec (V6_vzb HvxVR:$Vs))))>;
+// The declared return value of the store-locked intrinsics is i32, but
+// the instructions actually define i1. To avoid register copies from
+// IntRegs to PredRegs and back, fold the entire pattern checking the
+// result against true/false.
+let AddedComplexity = 100 in {
+ def: Pat<(i1 (setne (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)),
+ (S2_storew_locked I32:$Rs, I32:$Rt)>;
+ def: Pat<(i1 (seteq (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)),
+ (C2_not (S2_storew_locked I32:$Rs, I32:$Rt))>;
+ def: Pat<(i1 (setne (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)),
+ (S4_stored_locked I32:$Rs, I64:$Rt)>;
+ def: Pat<(i1 (seteq (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)),
+ (C2_not (S4_stored_locked I32:$Rs, I64:$Rt))>;
}
diff --git a/lib/Target/Hexagon/HexagonPatternsHVX.td b/lib/Target/Hexagon/HexagonPatternsHVX.td
new file mode 100644
index 000000000000..a4cfca9ac7d7
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonPatternsHVX.td
@@ -0,0 +1,497 @@
+def SDTVecLeaf:
+ SDTypeProfile<1, 0, [SDTCisVec<0>]>;
+def SDTVecBinOp:
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>;
+
+def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2,
+ [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>;
+def HexagonVEXTRACTW : SDNode<"HexagonISD::VEXTRACTW", SDTHexagonVEXTRACTW>;
+
+def SDTHexagonVINSERTW0: SDTypeProfile<1, 2,
+ [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>;
+def HexagonVINSERTW0: SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>;
+
+def SDTHexagonVSPLATW: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
+def HexagonVSPLATW: SDNode<"HexagonISD::VSPLATW", SDTHexagonVSPLATW>;
+
+def HwLen2: SDNodeXForm<imm, [{
+ const auto &ST = static_cast<const HexagonSubtarget&>(CurDAG->getSubtarget());
+ return CurDAG->getTargetConstant(ST.getVectorLength()/2, SDLoc(N), MVT::i32);
+}]>;
+
+def Q2V: OutPatFrag<(ops node:$Qs), (V6_vandqrt $Qs, (A2_tfrsi -1))>;
+
+def Combinev: OutPatFrag<(ops node:$Vs, node:$Vt),
+ (REG_SEQUENCE HvxWR, $Vs, vsub_hi, $Vt, vsub_lo)>;
+
+def Combineq: OutPatFrag<(ops node:$Qs, node:$Qt),
+ (V6_vandvrt
+ (V6_vor
+ (V6_vror (V6_vpackeb (V6_vd0), (Q2V $Qs)),
+ (A2_tfrsi (HwLen2 (i32 0)))), // Half the vector length
+ (V6_vpackeb (V6_vd0), (Q2V $Qt))),
+ (A2_tfrsi -1))>;
+
+def LoVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_lo)>;
+def HiVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_hi)>;
+
+def HexagonVZERO: SDNode<"HexagonISD::VZERO", SDTVecLeaf>;
+def HexagonQCAT: SDNode<"HexagonISD::QCAT", SDTVecBinOp>;
+def HexagonQTRUE: SDNode<"HexagonISD::QTRUE", SDTVecLeaf>;
+def HexagonQFALSE: SDNode<"HexagonISD::QFALSE", SDTVecLeaf>;
+
+def vzero: PatFrag<(ops), (HexagonVZERO)>;
+def qtrue: PatFrag<(ops), (HexagonQTRUE)>;
+def qfalse: PatFrag<(ops), (HexagonQFALSE)>;
+def qcat: PatFrag<(ops node:$Qs, node:$Qt),
+ (HexagonQCAT node:$Qs, node:$Qt)>;
+
+def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>;
+
+def VSxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackb $Vs)>;
+def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh $Vs)>;
+def VZxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackub $Vs)>;
+def VZxth: OutPatFrag<(ops node:$Vs), (V6_vunpackuh $Vs)>;
+
+def SplatB: SDNodeXForm<imm, [{
+ uint32_t V = N->getZExtValue();
+ assert(isUInt<8>(V));
+ uint32_t S = V << 24 | V << 16 | V << 8 | V;
+ return CurDAG->getTargetConstant(S, SDLoc(N), MVT::i32);
+}]>;
+
+def SplatH: SDNodeXForm<imm, [{
+ uint32_t V = N->getZExtValue();
+ assert(isUInt<16>(V));
+ return CurDAG->getTargetConstant(V << 16 | V, SDLoc(N), MVT::i32);
+}]>;
+
+def IsVecOff : PatLeaf<(i32 imm), [{
+ int32_t V = N->getSExtValue();
+ int32_t VecSize = HRI->getSpillSize(Hexagon::HvxVRRegClass);
+ assert(isPowerOf2_32(VecSize));
+ if ((uint32_t(V) & (uint32_t(VecSize)-1)) != 0)
+ return false;
+ int32_t L = Log2_32(VecSize);
+ return isInt<4>(V >> L);
+}]>;
+
+
+def alignedload: PatFrag<(ops node:$a), (load $a), [{
+ return isAlignedMemNode(dyn_cast<MemSDNode>(N));
+}]>;
+
+def unalignedload: PatFrag<(ops node:$a), (load $a), [{
+ return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
+}]>;
+
+def alignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{
+ return isAlignedMemNode(dyn_cast<MemSDNode>(N));
+}]>;
+
+def unalignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{
+ return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
+}]>;
+
+
+// HVX loads
+
+multiclass HvxLd_pat<InstHexagon MI, PatFrag Load, ValueType ResType,
+ PatFrag ImmPred> {
+ def: Pat<(ResType (Load I32:$Rt)),
+ (MI I32:$Rt, 0)>;
+ def: Pat<(ResType (Load (add I32:$Rt, ImmPred:$s))),
+ (MI I32:$Rt, imm:$s)>;
+ // The HVX selection code for shuffles can generate vector constants.
+ // Calling "Select" on the resulting loads from CP fails without these
+ // patterns.
+ def: Pat<(ResType (Load (HexagonCP tconstpool:$A))),
+ (MI (A2_tfrsi imm:$A), 0)>;
+ def: Pat<(ResType (Load (HexagonAtPcrel tconstpool:$A))),
+ (MI (C4_addipc imm:$A), 0)>;
+}
+
+multiclass HvxLda_pat<InstHexagon MI, PatFrag Load, ValueType ResType,
+ PatFrag ImmPred> {
+ let AddedComplexity = 50 in {
+ def: Pat<(ResType (Load (valignaddr I32:$Rt))),
+ (MI I32:$Rt, 0)>;
+ def: Pat<(ResType (Load (add (valignaddr I32:$Rt), ImmPred:$Off))),
+ (MI I32:$Rt, imm:$Off)>;
+ }
+ defm: HvxLd_pat<MI, Load, ResType, ImmPred>;
+}
+
+let Predicates = [UseHVX] in {
+ defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI8, IsVecOff>;
+ defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI16, IsVecOff>;
+ defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI32, IsVecOff>;
+
+ defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI8, IsVecOff>;
+ defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI16, IsVecOff>;
+ defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI32, IsVecOff>;
+
+ defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI8, IsVecOff>;
+ defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI16, IsVecOff>;
+ defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI32, IsVecOff>;
+}
+
+// HVX stores
+
+multiclass HvxSt_pat<InstHexagon MI, PatFrag Store, PatFrag ImmPred,
+ PatFrag Value> {
+ def: Pat<(Store Value:$Vs, I32:$Rt),
+ (MI I32:$Rt, 0, Value:$Vs)>;
+ def: Pat<(Store Value:$Vs, (add I32:$Rt, ImmPred:$s)),
+ (MI I32:$Rt, imm:$s, Value:$Vs)>;
+}
+
+let Predicates = [UseHVX] in {
+ defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, IsVecOff, HVI8>;
+ defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, IsVecOff, HVI16>;
+ defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, IsVecOff, HVI32>;
+
+ defm: HvxSt_pat<V6_vS32b_ai, alignedstore, IsVecOff, HVI8>;
+ defm: HvxSt_pat<V6_vS32b_ai, alignedstore, IsVecOff, HVI16>;
+ defm: HvxSt_pat<V6_vS32b_ai, alignedstore, IsVecOff, HVI32>;
+
+ defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, IsVecOff, HVI8>;
+ defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, IsVecOff, HVI16>;
+ defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, IsVecOff, HVI32>;
+}
+
+// Bitcasts between same-size vector types are no-ops, except for the
+// actual type change.
+class Bitcast<ValueType ResTy, ValueType InpTy, RegisterClass RC>
+ : Pat<(ResTy (bitconvert (InpTy RC:$Val))), (ResTy RC:$Val)>;
+
+let Predicates = [UseHVX] in {
+ def: Bitcast<VecI8, VecI16, HvxVR>;
+ def: Bitcast<VecI8, VecI32, HvxVR>;
+ def: Bitcast<VecI16, VecI8, HvxVR>;
+ def: Bitcast<VecI16, VecI32, HvxVR>;
+ def: Bitcast<VecI32, VecI8, HvxVR>;
+ def: Bitcast<VecI32, VecI16, HvxVR>;
+
+ def: Bitcast<VecPI8, VecPI16, HvxWR>;
+ def: Bitcast<VecPI8, VecPI32, HvxWR>;
+ def: Bitcast<VecPI16, VecPI8, HvxWR>;
+ def: Bitcast<VecPI16, VecPI32, HvxWR>;
+ def: Bitcast<VecPI32, VecPI8, HvxWR>;
+ def: Bitcast<VecPI32, VecPI16, HvxWR>;
+}
+
+let Predicates = [UseHVX] in {
+ def: Pat<(VecI8 vzero), (V6_vd0)>;
+ def: Pat<(VecI16 vzero), (V6_vd0)>;
+ def: Pat<(VecI32 vzero), (V6_vd0)>;
+ def: Pat<(VecPI8 vzero), (PS_vdd0)>;
+ def: Pat<(VecPI16 vzero), (PS_vdd0)>;
+ def: Pat<(VecPI32 vzero), (PS_vdd0)>;
+
+ def: Pat<(concat_vectors (VecI8 vzero), (VecI8 vzero)), (PS_vdd0)>;
+ def: Pat<(concat_vectors (VecI16 vzero), (VecI16 vzero)), (PS_vdd0)>;
+ def: Pat<(concat_vectors (VecI32 vzero), (VecI32 vzero)), (PS_vdd0)>;
+
+ def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)),
+ (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
+ def: Pat<(VecPI16 (concat_vectors HVI16:$Vs, HVI16:$Vt)),
+ (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
+ def: Pat<(VecPI32 (concat_vectors HVI32:$Vs, HVI32:$Vt)),
+ (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
+
+ def: Pat<(VecQ8 (qcat HQ16:$Qs, HQ16:$Qt)), (Combineq $Qt, $Qs)>;
+ def: Pat<(VecQ16 (qcat HQ32:$Qs, HQ32:$Qt)), (Combineq $Qt, $Qs)>;
+
+ def: Pat<(HexagonVEXTRACTW HVI8:$Vu, I32:$Rs),
+ (V6_extractw HvxVR:$Vu, I32:$Rs)>;
+ def: Pat<(HexagonVEXTRACTW HVI16:$Vu, I32:$Rs),
+ (V6_extractw HvxVR:$Vu, I32:$Rs)>;
+ def: Pat<(HexagonVEXTRACTW HVI32:$Vu, I32:$Rs),
+ (V6_extractw HvxVR:$Vu, I32:$Rs)>;
+
+ def: Pat<(HexagonVINSERTW0 HVI8:$Vu, I32:$Rt),
+ (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
+ def: Pat<(HexagonVINSERTW0 HVI16:$Vu, I32:$Rt),
+ (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
+ def: Pat<(HexagonVINSERTW0 HVI32:$Vu, I32:$Rt),
+ (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>;
+}
+
+def Vsplatib: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatB $V)))>;
+def Vsplatih: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatH $V)))>;
+def Vsplatiw: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 $V))>;
+
+def Vsplatrb: OutPatFrag<(ops node:$Rs), (V6_lvsplatw (S2_vsplatrb $Rs))>;
+def Vsplatrh: OutPatFrag<(ops node:$Rs),
+ (V6_lvsplatw (A2_combine_ll $Rs, $Rs))>;
+def Vsplatrw: OutPatFrag<(ops node:$Rs), (V6_lvsplatw $Rs)>;
+
+def Rep: OutPatFrag<(ops node:$N), (Combinev $N, $N)>;
+
+let Predicates = [UseHVX] in {
+ let AddedComplexity = 10 in {
+ def: Pat<(VecI8 (HexagonVSPLAT u8_0ImmPred:$V)), (Vsplatib $V)>;
+ def: Pat<(VecI16 (HexagonVSPLAT u16_0ImmPred:$V)), (Vsplatih $V)>;
+ def: Pat<(VecI32 (HexagonVSPLAT anyimm:$V)), (Vsplatiw $V)>;
+ def: Pat<(VecPI8 (HexagonVSPLAT u8_0ImmPred:$V)), (Rep (Vsplatib $V))>;
+ def: Pat<(VecPI16 (HexagonVSPLAT u16_0ImmPred:$V)), (Rep (Vsplatih $V))>;
+ def: Pat<(VecPI32 (HexagonVSPLAT anyimm:$V)), (Rep (Vsplatiw $V))>;
+ }
+ def: Pat<(VecI8 (HexagonVSPLAT I32:$Rs)), (Vsplatrb $Rs)>;
+ def: Pat<(VecI16 (HexagonVSPLAT I32:$Rs)), (Vsplatrh $Rs)>;
+ def: Pat<(VecI32 (HexagonVSPLAT I32:$Rs)), (Vsplatrw $Rs)>;
+ def: Pat<(VecPI8 (HexagonVSPLAT I32:$Rs)), (Rep (Vsplatrb $Rs))>;
+ def: Pat<(VecPI16 (HexagonVSPLAT I32:$Rs)), (Rep (Vsplatrh $Rs))>;
+ def: Pat<(VecPI32 (HexagonVSPLAT I32:$Rs)), (Rep (Vsplatrw $Rs))>;
+
+ def: Pat<(VecI8 (HexagonVSPLATW I32:$Rs)), (Vsplatrw $Rs)>;
+ def: Pat<(VecI16 (HexagonVSPLATW I32:$Rs)), (Vsplatrw $Rs)>;
+ def: Pat<(VecI32 (HexagonVSPLATW I32:$Rs)), (Vsplatrw $Rs)>;
+ def: Pat<(VecPI8 (HexagonVSPLATW I32:$Rs)), (Rep (Vsplatrw $Rs))>;
+ def: Pat<(VecPI16 (HexagonVSPLATW I32:$Rs)), (Rep (Vsplatrw $Rs))>;
+ def: Pat<(VecPI32 (HexagonVSPLATW I32:$Rs)), (Rep (Vsplatrw $Rs))>;
+}
+
+class Vneg1<ValueType VecTy>
+ : PatFrag<(ops), (VecTy (HexagonVSPLATW (i32 -1)))>;
+
+class Vnot<ValueType VecTy>
+ : PatFrag<(ops node:$Vs), (xor $Vs, Vneg1<VecTy>)>;
+
+let Predicates = [UseHVX] in {
+ let AddedComplexity = 200 in {
+ def: Pat<(Vnot<VecI8> HVI8:$Vs), (V6_vnot HvxVR:$Vs)>;
+ def: Pat<(Vnot<VecI16> HVI16:$Vs), (V6_vnot HvxVR:$Vs)>;
+ def: Pat<(Vnot<VecI32> HVI32:$Vs), (V6_vnot HvxVR:$Vs)>;
+ }
+
+ def: OpR_RR_pat<V6_vaddb, Add, VecI8, HVI8>;
+ def: OpR_RR_pat<V6_vaddh, Add, VecI16, HVI16>;
+ def: OpR_RR_pat<V6_vaddw, Add, VecI32, HVI32>;
+ def: OpR_RR_pat<V6_vaddb_dv, Add, VecPI8, HWI8>;
+ def: OpR_RR_pat<V6_vaddh_dv, Add, VecPI16, HWI16>;
+ def: OpR_RR_pat<V6_vaddw_dv, Add, VecPI32, HWI32>;
+ def: OpR_RR_pat<V6_vsubb, Sub, VecI8, HVI8>;
+ def: OpR_RR_pat<V6_vsubh, Sub, VecI16, HVI16>;
+ def: OpR_RR_pat<V6_vsubw, Sub, VecI32, HVI32>;
+ def: OpR_RR_pat<V6_vsubb_dv, Sub, VecPI8, HWI8>;
+ def: OpR_RR_pat<V6_vsubh_dv, Sub, VecPI16, HWI16>;
+ def: OpR_RR_pat<V6_vsubw_dv, Sub, VecPI32, HWI32>;
+ def: OpR_RR_pat<V6_vand, And, VecI8, HVI8>;
+ def: OpR_RR_pat<V6_vand, And, VecI16, HVI16>;
+ def: OpR_RR_pat<V6_vand, And, VecI32, HVI32>;
+ def: OpR_RR_pat<V6_vor, Or, VecI8, HVI8>;
+ def: OpR_RR_pat<V6_vor, Or, VecI16, HVI16>;
+ def: OpR_RR_pat<V6_vor, Or, VecI32, HVI32>;
+ def: OpR_RR_pat<V6_vxor, Xor, VecI8, HVI8>;
+ def: OpR_RR_pat<V6_vxor, Xor, VecI16, HVI16>;
+ def: OpR_RR_pat<V6_vxor, Xor, VecI32, HVI32>;
+
+ def: Pat<(vselect HQ8:$Qu, HVI8:$Vs, HVI8:$Vt),
+ (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
+ def: Pat<(vselect HQ16:$Qu, HVI16:$Vs, HVI16:$Vt),
+ (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
+ def: Pat<(vselect HQ32:$Qu, HVI32:$Vs, HVI32:$Vt),
+ (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
+
+ def: Pat<(vselect (qnot HQ8:$Qu), HVI8:$Vs, HVI8:$Vt),
+ (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
+ def: Pat<(vselect (qnot HQ16:$Qu), HVI16:$Vs, HVI16:$Vt),
+ (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
+ def: Pat<(vselect (qnot HQ32:$Qu), HVI32:$Vs, HVI32:$Vt),
+ (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
+}
+
+let Predicates = [UseHVX] in {
+ def: Pat<(VecPI16 (sext HVI8:$Vs)), (VSxtb $Vs)>;
+ def: Pat<(VecPI32 (sext HVI16:$Vs)), (VSxth $Vs)>;
+ def: Pat<(VecPI16 (zext HVI8:$Vs)), (VZxtb $Vs)>;
+ def: Pat<(VecPI32 (zext HVI16:$Vs)), (VZxth $Vs)>;
+
+ def: Pat<(VecI16 (sext_invec HVI8:$Vs)), (LoVec (VSxtb $Vs))>;
+ def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (VSxth $Vs))>;
+ def: Pat<(VecI32 (sext_invec HVI8:$Vs)),
+ (LoVec (VSxth (LoVec (VSxtb $Vs))))>;
+ def: Pat<(VecPI16 (sext_invec HWI8:$Vss)), (VSxtb (LoVec $Vss))>;
+ def: Pat<(VecPI32 (sext_invec HWI16:$Vss)), (VSxth (LoVec $Vss))>;
+ def: Pat<(VecPI32 (sext_invec HWI8:$Vss)),
+ (VSxth (LoVec (VSxtb (LoVec $Vss))))>;
+
+ def: Pat<(VecI16 (zext_invec HVI8:$Vs)), (LoVec (VZxtb $Vs))>;
+ def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (VZxth $Vs))>;
+ def: Pat<(VecI32 (zext_invec HVI8:$Vs)),
+ (LoVec (VZxth (LoVec (VZxtb $Vs))))>;
+ def: Pat<(VecPI16 (zext_invec HWI8:$Vss)), (VZxtb (LoVec $Vss))>;
+ def: Pat<(VecPI32 (zext_invec HWI16:$Vss)), (VZxth (LoVec $Vss))>;
+ def: Pat<(VecPI32 (zext_invec HWI8:$Vss)),
+ (VZxth (LoVec (VZxtb (LoVec $Vss))))>;
+
+ def: Pat<(VecI8 (trunc HWI16:$Vss)),
+ (V6_vpackeb (HiVec $Vss), (LoVec $Vss))>;
+ def: Pat<(VecI16 (trunc HWI32:$Vss)),
+ (V6_vpackeh (HiVec $Vss), (LoVec $Vss))>;
+
+ def: Pat<(VecQ8 (trunc HVI8:$Vs)),
+ (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>;
+ def: Pat<(VecQ16 (trunc HVI16:$Vs)),
+ (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>;
+ def: Pat<(VecQ32 (trunc HVI32:$Vs)),
+ (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>;
+}
+
+let Predicates = [UseHVX] in {
+ // The "source" types are not legal, and there are no parameterized
+ // definitions for them, but they are length-specific.
+ let Predicates = [UseHVX,UseHVX64B] in {
+ def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v32i8)),
+ (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>;
+ def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i8)),
+ (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>;
+ def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i16)),
+ (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>;
+ }
+ let Predicates = [UseHVX,UseHVX128B] in {
+ def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v64i8)),
+ (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>;
+ def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i8)),
+ (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>;
+ def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i16)),
+ (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>;
+ }
+
+ def: Pat<(HexagonVASL HVI8:$Vs, I32:$Rt),
+ (V6_vpackeb (V6_vaslh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt),
+ (V6_vaslh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>;
+ def: Pat<(HexagonVASR HVI8:$Vs, I32:$Rt),
+ (V6_vpackeb (V6_vasrh (HiVec (VSxtb HvxVR:$Vs)), I32:$Rt),
+ (V6_vasrh (LoVec (VSxtb HvxVR:$Vs)), I32:$Rt))>;
+ def: Pat<(HexagonVLSR HVI8:$Vs, I32:$Rt),
+ (V6_vpackeb (V6_vlsrh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt),
+ (V6_vlsrh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>;
+
+ def: Pat<(HexagonVASL HVI16:$Vs, I32:$Rt), (V6_vaslh HvxVR:$Vs, I32:$Rt)>;
+ def: Pat<(HexagonVASL HVI32:$Vs, I32:$Rt), (V6_vaslw HvxVR:$Vs, I32:$Rt)>;
+ def: Pat<(HexagonVASR HVI16:$Vs, I32:$Rt), (V6_vasrh HvxVR:$Vs, I32:$Rt)>;
+ def: Pat<(HexagonVASR HVI32:$Vs, I32:$Rt), (V6_vasrw HvxVR:$Vs, I32:$Rt)>;
+ def: Pat<(HexagonVLSR HVI16:$Vs, I32:$Rt), (V6_vlsrh HvxVR:$Vs, I32:$Rt)>;
+ def: Pat<(HexagonVLSR HVI32:$Vs, I32:$Rt), (V6_vlsrw HvxVR:$Vs, I32:$Rt)>;
+
+ def: Pat<(add HVI32:$Vx, (HexagonVASL HVI32:$Vu, I32:$Rt)),
+ (V6_vaslw_acc HvxVR:$Vx, HvxVR:$Vu, I32:$Rt)>;
+ def: Pat<(add HVI32:$Vx, (HexagonVASR HVI32:$Vu, I32:$Rt)),
+ (V6_vasrw_acc HvxVR:$Vx, HvxVR:$Vu, I32:$Rt)>;
+
+ def: Pat<(shl HVI16:$Vs, HVI16:$Vt), (V6_vaslhv HvxVR:$Vs, HvxVR:$Vt)>;
+ def: Pat<(shl HVI32:$Vs, HVI32:$Vt), (V6_vaslwv HvxVR:$Vs, HvxVR:$Vt)>;
+ def: Pat<(sra HVI16:$Vs, HVI16:$Vt), (V6_vasrhv HvxVR:$Vs, HvxVR:$Vt)>;
+ def: Pat<(sra HVI32:$Vs, HVI32:$Vt), (V6_vasrwv HvxVR:$Vs, HvxVR:$Vt)>;
+ def: Pat<(srl HVI16:$Vs, HVI16:$Vt), (V6_vlsrhv HvxVR:$Vs, HvxVR:$Vt)>;
+ def: Pat<(srl HVI32:$Vs, HVI32:$Vt), (V6_vlsrwv HvxVR:$Vs, HvxVR:$Vt)>;
+
+ def: Pat<(VecI16 (bswap HVI16:$Vs)),
+ (V6_vdelta HvxVR:$Vs, (V6_lvsplatw (A2_tfrsi 0x01010101)))>;
+ def: Pat<(VecI32 (bswap HVI32:$Vs)),
+ (V6_vdelta HvxVR:$Vs, (V6_lvsplatw (A2_tfrsi 0x03030303)))>;
+
+ def: Pat<(VecI8 (ctpop HVI8:$Vs)),
+ (V6_vpackeb (V6_vpopcounth (HiVec (V6_vunpackub HvxVR:$Vs))),
+ (V6_vpopcounth (LoVec (V6_vunpackub HvxVR:$Vs))))>;
+ def: Pat<(VecI16 (ctpop HVI16:$Vs)), (V6_vpopcounth HvxVR:$Vs)>;
+ def: Pat<(VecI32 (ctpop HVI32:$Vs)),
+ (V6_vaddw (LoVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))),
+ (HiVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))))>;
+
+ def: Pat<(VecI8 (ctlz HVI8:$Vs)),
+ (V6_vsubb (V6_vpackeb (V6_vcl0h (HiVec (V6_vunpackub HvxVR:$Vs))),
+ (V6_vcl0h (LoVec (V6_vunpackub HvxVR:$Vs)))),
+ (V6_lvsplatw (A2_tfrsi 0x08080808)))>;
+ def: Pat<(VecI16 (ctlz HVI16:$Vs)), (V6_vcl0h HvxVR:$Vs)>;
+ def: Pat<(VecI32 (ctlz HVI32:$Vs)), (V6_vcl0w HvxVR:$Vs)>;
+}
+
+class HvxSel_pat<InstHexagon MI, PatFrag RegPred>
+ : Pat<(select I1:$Pu, RegPred:$Vs, RegPred:$Vt),
+ (MI I1:$Pu, RegPred:$Vs, RegPred:$Vt)>;
+
+let Predicates = [UseHVX] in {
+ def: HvxSel_pat<PS_vselect, HVI8>;
+ def: HvxSel_pat<PS_vselect, HVI16>;
+ def: HvxSel_pat<PS_vselect, HVI32>;
+ def: HvxSel_pat<PS_wselect, HWI8>;
+ def: HvxSel_pat<PS_wselect, HWI16>;
+ def: HvxSel_pat<PS_wselect, HWI32>;
+}
+
+let Predicates = [UseHVX] in {
+ def: Pat<(VecQ8 (qtrue)), (PS_qtrue)>;
+ def: Pat<(VecQ16 (qtrue)), (PS_qtrue)>;
+ def: Pat<(VecQ32 (qtrue)), (PS_qtrue)>;
+ def: Pat<(VecQ8 (qfalse)), (PS_qfalse)>;
+ def: Pat<(VecQ16 (qfalse)), (PS_qfalse)>;
+ def: Pat<(VecQ32 (qfalse)), (PS_qfalse)>;
+
+ def: Pat<(vnot HQ8:$Qs), (V6_pred_not HvxQR:$Qs)>;
+ def: Pat<(vnot HQ16:$Qs), (V6_pred_not HvxQR:$Qs)>;
+ def: Pat<(vnot HQ32:$Qs), (V6_pred_not HvxQR:$Qs)>;
+ def: Pat<(qnot HQ8:$Qs), (V6_pred_not HvxQR:$Qs)>;
+ def: Pat<(qnot HQ16:$Qs), (V6_pred_not HvxQR:$Qs)>;
+ def: Pat<(qnot HQ32:$Qs), (V6_pred_not HvxQR:$Qs)>;
+
+ def: OpR_RR_pat<V6_pred_and, And, VecQ8, HQ8>;
+ def: OpR_RR_pat<V6_pred_and, And, VecQ16, HQ16>;
+ def: OpR_RR_pat<V6_pred_and, And, VecQ32, HQ32>;
+ def: OpR_RR_pat<V6_pred_or, Or, VecQ8, HQ8>;
+ def: OpR_RR_pat<V6_pred_or, Or, VecQ16, HQ16>;
+ def: OpR_RR_pat<V6_pred_or, Or, VecQ32, HQ32>;
+ def: OpR_RR_pat<V6_pred_xor, Xor, VecQ8, HQ8>;
+ def: OpR_RR_pat<V6_pred_xor, Xor, VecQ16, HQ16>;
+ def: OpR_RR_pat<V6_pred_xor, Xor, VecQ32, HQ32>;
+
+ def: OpR_RR_pat<V6_pred_and_n, Not2<And>, VecQ8, HQ8>;
+ def: OpR_RR_pat<V6_pred_and_n, Not2<And>, VecQ16, HQ16>;
+ def: OpR_RR_pat<V6_pred_and_n, Not2<And>, VecQ32, HQ32>;
+ def: OpR_RR_pat<V6_pred_or_n, Not2<Or>, VecQ8, HQ8>;
+ def: OpR_RR_pat<V6_pred_or_n, Not2<Or>, VecQ16, HQ16>;
+ def: OpR_RR_pat<V6_pred_or_n, Not2<Or>, VecQ32, HQ32>;
+
+ def: OpR_RR_pat<V6_veqb, seteq, VecQ8, HVI8>;
+ def: OpR_RR_pat<V6_veqh, seteq, VecQ16, HVI16>;
+ def: OpR_RR_pat<V6_veqw, seteq, VecQ32, HVI32>;
+ def: OpR_RR_pat<V6_vgtb, setgt, VecQ8, HVI8>;
+ def: OpR_RR_pat<V6_vgth, setgt, VecQ16, HVI16>;
+ def: OpR_RR_pat<V6_vgtw, setgt, VecQ32, HVI32>;
+ def: OpR_RR_pat<V6_vgtub, setugt, VecQ8, HVI8>;
+ def: OpR_RR_pat<V6_vgtuh, setugt, VecQ16, HVI16>;
+ def: OpR_RR_pat<V6_vgtuw, setugt, VecQ32, HVI32>;
+
+ def: AccRRR_pat<V6_veqb_and, And, seteq, HQ8, HVI8, HVI8>;
+ def: AccRRR_pat<V6_veqb_or, Or, seteq, HQ8, HVI8, HVI8>;
+ def: AccRRR_pat<V6_veqb_xor, Xor, seteq, HQ8, HVI8, HVI8>;
+ def: AccRRR_pat<V6_veqh_and, And, seteq, HQ16, HVI16, HVI16>;
+ def: AccRRR_pat<V6_veqh_or, Or, seteq, HQ16, HVI16, HVI16>;
+ def: AccRRR_pat<V6_veqh_xor, Xor, seteq, HQ16, HVI16, HVI16>;
+ def: AccRRR_pat<V6_veqw_and, And, seteq, HQ32, HVI32, HVI32>;
+ def: AccRRR_pat<V6_veqw_or, Or, seteq, HQ32, HVI32, HVI32>;
+ def: AccRRR_pat<V6_veqw_xor, Xor, seteq, HQ32, HVI32, HVI32>;
+
+ def: AccRRR_pat<V6_vgtb_and, And, setgt, HQ8, HVI8, HVI8>;
+ def: AccRRR_pat<V6_vgtb_or, Or, setgt, HQ8, HVI8, HVI8>;
+ def: AccRRR_pat<V6_vgtb_xor, Xor, setgt, HQ8, HVI8, HVI8>;
+ def: AccRRR_pat<V6_vgth_and, And, setgt, HQ16, HVI16, HVI16>;
+ def: AccRRR_pat<V6_vgth_or, Or, setgt, HQ16, HVI16, HVI16>;
+ def: AccRRR_pat<V6_vgth_xor, Xor, setgt, HQ16, HVI16, HVI16>;
+ def: AccRRR_pat<V6_vgtw_and, And, setgt, HQ32, HVI32, HVI32>;
+ def: AccRRR_pat<V6_vgtw_or, Or, setgt, HQ32, HVI32, HVI32>;
+ def: AccRRR_pat<V6_vgtw_xor, Xor, setgt, HQ32, HVI32, HVI32>;
+
+ def: AccRRR_pat<V6_vgtub_and, And, setugt, HQ8, HVI8, HVI8>;
+ def: AccRRR_pat<V6_vgtub_or, Or, setugt, HQ8, HVI8, HVI8>;
+ def: AccRRR_pat<V6_vgtub_xor, Xor, setugt, HQ8, HVI8, HVI8>;
+ def: AccRRR_pat<V6_vgtuh_and, And, setugt, HQ16, HVI16, HVI16>;
+ def: AccRRR_pat<V6_vgtuh_or, Or, setugt, HQ16, HVI16, HVI16>;
+ def: AccRRR_pat<V6_vgtuh_xor, Xor, setugt, HQ16, HVI16, HVI16>;
+ def: AccRRR_pat<V6_vgtuw_and, And, setugt, HQ32, HVI32, HVI32>;
+ def: AccRRR_pat<V6_vgtuw_or, Or, setugt, HQ32, HVI32, HVI32>;
+ def: AccRRR_pat<V6_vgtuw_xor, Xor, setugt, HQ32, HVI32, HVI32>;
+}
diff --git a/lib/Target/Hexagon/HexagonPseudo.td b/lib/Target/Hexagon/HexagonPseudo.td
index b2d66317b66e..fd7466349ecd 100644
--- a/lib/Target/Hexagon/HexagonPseudo.td
+++ b/lib/Target/Hexagon/HexagonPseudo.td
@@ -24,7 +24,7 @@ let PrintMethod = "printGlobalOperand" in {
let isPseudo = 1 in {
let isCodeGenOnly = 0 in
def A2_iconst : Pseudo<(outs IntRegs:$Rd32),
- (ins s27_2Imm:$Ii), "${Rd32}=iconst(#${Ii})">;
+ (ins s27_2Imm:$Ii), "${Rd32} = iconst(#${Ii})">;
def DUPLEX_Pseudo : InstHexagon<(outs),
(ins s32_0Imm:$offset), "DUPLEX", [], "", DUPLEX, TypePSEUDO>;
@@ -34,7 +34,7 @@ let isExtendable = 1, opExtendable = 1, opExtentBits = 6,
isAsmParserOnly = 1 in
def TFRI64_V2_ext : InstHexagon<(outs DoubleRegs:$dst),
(ins s32_0Imm:$src1, s8_0Imm:$src2),
- "$dst=combine(#$src1,#$src2)", [], "",
+ "$dst = combine(#$src1,#$src2)", [], "",
A2_combineii.Itinerary, TypeALU32_2op>, OpcodeHexagon;
// HI/LO Instructions
@@ -44,7 +44,7 @@ class REG_IMMED<string RegHalf, bit Rs, bits<3> MajOp, bit MinOp,
InstHexagon rootInst>
: InstHexagon<(outs IntRegs:$dst),
(ins u16_0Imm:$imm_value),
- "$dst"#RegHalf#"=#$imm_value", [], "",
+ "$dst"#RegHalf#" = #$imm_value", [], "",
rootInst.Itinerary, rootInst.Type>, OpcodeHexagon {
bits<5> dst;
bits<32> imm_value;
@@ -102,6 +102,13 @@ def ENDLOOP1 : Endloop<(outs), (ins b30_2Imm:$offset),
[]>;
}
+let isBranch = 1, isTerminator = 1, hasSideEffects = 0,
+ Defs = [PC, LC0, LC1], Uses = [SA0, SA1, LC0, LC1] in {
+def ENDLOOP01 : Endloop<(outs), (ins b30_2Imm:$offset),
+ ":endloop01",
+ []>;
+}
+
let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2,
opExtendable = 0, hasSideEffects = 0 in
class LOOP_iBase<string mnemonic, InstHexagon rootInst>
@@ -316,7 +323,7 @@ def LDriw_pred : LDInst<(outs PredRegs:$dst),
// Load modifier.
let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
-def LDriw_mod : LDInst<(outs ModRegs:$dst),
+def LDriw_ctr : LDInst<(outs CtrRegs:$dst),
(ins IntRegs:$addr, s32_0Imm:$off),
".error \"should not emit\"", []>;
@@ -399,42 +406,42 @@ let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in {
}
// Vector store pseudos
-let Predicates = [HasV60T, UseHVX], isPseudo = 1, isCodeGenOnly = 1,
+let Predicates = [HasV60,UseHVX], isPseudo = 1, isCodeGenOnly = 1,
mayStore = 1, accessSize = HVXVectorAccess, hasSideEffects = 0 in
class STrivv_template<RegisterClass RC, InstHexagon rootInst>
: InstHexagon<(outs), (ins IntRegs:$addr, s32_0Imm:$off, RC:$src),
"", [], "", rootInst.Itinerary, rootInst.Type>;
def PS_vstorerw_ai: STrivv_template<HvxWR, V6_vS32b_ai>,
- Requires<[HasV60T,UseHVX]>;
+ Requires<[HasV60,UseHVX]>;
def PS_vstorerw_nt_ai: STrivv_template<HvxWR, V6_vS32b_nt_ai>,
- Requires<[HasV60T,UseHVX]>;
+ Requires<[HasV60,UseHVX]>;
def PS_vstorerwu_ai: STrivv_template<HvxWR, V6_vS32Ub_ai>,
- Requires<[HasV60T,UseHVX]>;
+ Requires<[HasV60,UseHVX]>;
let isPseudo = 1, isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0 in
def PS_vstorerq_ai: Pseudo<(outs),
(ins IntRegs:$Rs, s32_0Imm:$Off, HvxQR:$Qt), "", []>,
- Requires<[HasV60T,UseHVX]>;
+ Requires<[HasV60,UseHVX]>;
// Vector load pseudos
-let Predicates = [HasV60T, UseHVX], isPseudo = 1, isCodeGenOnly = 1,
+let Predicates = [HasV60, UseHVX], isPseudo = 1, isCodeGenOnly = 1,
mayLoad = 1, accessSize = HVXVectorAccess, hasSideEffects = 0 in
class LDrivv_template<RegisterClass RC, InstHexagon rootInst>
: InstHexagon<(outs RC:$dst), (ins IntRegs:$addr, s32_0Imm:$off),
"", [], "", rootInst.Itinerary, rootInst.Type>;
def PS_vloadrw_ai: LDrivv_template<HvxWR, V6_vL32b_ai>,
- Requires<[HasV60T,UseHVX]>;
+ Requires<[HasV60,UseHVX]>;
def PS_vloadrw_nt_ai: LDrivv_template<HvxWR, V6_vL32b_nt_ai>,
- Requires<[HasV60T,UseHVX]>;
+ Requires<[HasV60,UseHVX]>;
def PS_vloadrwu_ai: LDrivv_template<HvxWR, V6_vL32Ub_ai>,
- Requires<[HasV60T,UseHVX]>;
+ Requires<[HasV60,UseHVX]>;
let isPseudo = 1, isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0 in
def PS_vloadrq_ai: Pseudo<(outs HvxQR:$Qd),
(ins IntRegs:$Rs, s32_0Imm:$Off), "", []>,
- Requires<[HasV60T,UseHVX]>;
+ Requires<[HasV60,UseHVX]>;
let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
@@ -443,10 +450,20 @@ class VSELInst<dag outs, dag ins, InstHexagon rootInst>
def PS_vselect: VSELInst<(outs HvxVR:$dst),
(ins PredRegs:$src1, HvxVR:$src2, HvxVR:$src3), V6_vcmov>,
- Requires<[HasV60T,UseHVX]>;
+ Requires<[HasV60,UseHVX]>;
def PS_wselect: VSELInst<(outs HvxWR:$dst),
(ins PredRegs:$src1, HvxWR:$src2, HvxWR:$src3), V6_vccombine>,
- Requires<[HasV60T,UseHVX]>;
+ Requires<[HasV60,UseHVX]>;
+
+let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
+ isCodeGenOnly = 1 in {
+ def PS_qtrue: InstHexagon<(outs HvxQR:$Qd), (ins), "", [], "",
+ V6_veqw.Itinerary, TypeCVI_VA>;
+ def PS_qfalse: InstHexagon<(outs HvxQR:$Qd), (ins), "", [], "",
+ V6_vgtw.Itinerary, TypeCVI_VA>;
+ def PS_vdd0: InstHexagon<(outs HvxWR:$Vd), (ins), "", [], "",
+ V6_vsubw_dv.Itinerary, TypeCVI_VA_DV>;
+}
// Store predicate.
let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
@@ -457,8 +474,8 @@ def STriw_pred : STInst<(outs),
// Store modifier.
let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
-def STriw_mod : STInst<(outs),
- (ins IntRegs:$addr, s32_0Imm:$off, ModRegs:$src1),
+def STriw_ctr : STInst<(outs),
+ (ins IntRegs:$addr, s32_0Imm:$off, CtrRegs:$src1),
".error \"should not emit\"", []>;
let isExtendable = 1, opExtendable = 1, opExtentBits = 6,
@@ -499,3 +516,46 @@ def DuplexIClassC: InstDuplex < 0xC >;
def DuplexIClassD: InstDuplex < 0xD >;
def DuplexIClassE: InstDuplex < 0xE >;
def DuplexIClassF: InstDuplex < 0xF >;
+
+// Pseudos for circular buffer instructions. These are needed in order to
+// allocate the correct pair of CSx and Mx registers.
+multiclass NewCircularLoad<RegisterClass RC, MemAccessSize MS> {
+
+let isCodeGenOnly = 1, isPseudo = 1, Defs = [CS], Uses = [CS],
+ addrMode = PostInc, accessSize = MS, hasSideEffects = 0 in {
+ def NAME#_pci : LDInst<(outs RC:$Rd32, IntRegs:$Rx32),
+ (ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Cs),
+ ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_4403ca65>;
+
+ def NAME#_pcr : LDInst<(outs RC:$Rd32, IntRegs:$Rx32),
+ (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Cs),
+ ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_2fc0c436>;
+}
+}
+
+defm PS_loadrub : NewCircularLoad<IntRegs, ByteAccess>;
+defm PS_loadrb : NewCircularLoad<IntRegs, ByteAccess>;
+defm PS_loadruh : NewCircularLoad<IntRegs, HalfWordAccess>;
+defm PS_loadrh : NewCircularLoad<IntRegs, HalfWordAccess>;
+defm PS_loadri : NewCircularLoad<IntRegs, WordAccess>;
+defm PS_loadrd : NewCircularLoad<DoubleRegs, DoubleWordAccess>;
+
+multiclass NewCircularStore<RegisterClass RC, MemAccessSize MS> {
+
+let isCodeGenOnly = 1, isPseudo = 1, Defs = [CS], Uses = [CS],
+ addrMode = PostInc, accessSize = MS, hasSideEffects = 0 in {
+ def NAME#_pci : STInst<(outs IntRegs:$Rx32),
+ (ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, RC:$Rt32, IntRegs:$Cs),
+ ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_9fdb5406>;
+
+ def NAME#_pcr : STInst<(outs IntRegs:$Rx32),
+ (ins IntRegs:$Rx32in, ModRegs:$Mu2, RC:$Rt32, IntRegs:$Cs),
+ ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_f86c328a>;
+}
+}
+
+defm PS_storerb : NewCircularStore<IntRegs, ByteAccess>;
+defm PS_storerh : NewCircularStore<IntRegs, HalfWordAccess>;
+defm PS_storerf : NewCircularStore<IntRegs, HalfWordAccess>;
+defm PS_storeri : NewCircularStore<IntRegs, WordAccess>;
+defm PS_storerd : NewCircularStore<DoubleRegs, WordAccess>;
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 85d6a6b4089e..2e11f875c0f9 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -19,6 +19,7 @@
#include "HexagonTargetMachine.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -145,6 +146,13 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
Reserved.set(Hexagon::R30);
Reserved.set(Hexagon::R31);
Reserved.set(Hexagon::VTMP);
+
+ // Guest registers.
+ Reserved.set(Hexagon::GELR); // G0
+ Reserved.set(Hexagon::GSR); // G1
+ Reserved.set(Hexagon::GOSP); // G2
+ Reserved.set(Hexagon::G3); // G3
+
// Control registers.
Reserved.set(Hexagon::SA0); // C0
Reserved.set(Hexagon::LC0); // C1
@@ -171,6 +179,9 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
Reserved.set(Hexagon::C8);
Reserved.set(Hexagon::USR_OVF);
+ if (MF.getSubtarget<HexagonSubtarget>().hasReservedR19())
+ Reserved.set(Hexagon::R19);
+
for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x))
markSuperRegs(Reserved, x);
@@ -233,6 +244,55 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
+bool HexagonRegisterInfo::shouldCoalesce(MachineInstr *MI,
+ const TargetRegisterClass *SrcRC, unsigned SubReg,
+ const TargetRegisterClass *DstRC, unsigned DstSubReg,
+ const TargetRegisterClass *NewRC, LiveIntervals &LIS) const {
+ // Coalescing will extend the live interval of the destination register.
+ // If the destination register is a vector pair, avoid introducing function
+ // calls into the interval, since it could result in a spilling of a pair
+ // instead of a single vector.
+ MachineFunction &MF = *MI->getParent()->getParent();
+ const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>();
+ if (!HST.useHVXOps() || NewRC->getID() != Hexagon::HvxWRRegClass.getID())
+ return true;
+ bool SmallSrc = SrcRC->getID() == Hexagon::HvxVRRegClass.getID();
+ bool SmallDst = DstRC->getID() == Hexagon::HvxVRRegClass.getID();
+ if (!SmallSrc && !SmallDst)
+ return true;
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ const SlotIndexes &Indexes = *LIS.getSlotIndexes();
+ auto HasCall = [&Indexes] (const LiveInterval::Segment &S) {
+ for (SlotIndex I = S.start.getBaseIndex(), E = S.end.getBaseIndex();
+ I != E; I = I.getNextIndex()) {
+ if (const MachineInstr *MI = Indexes.getInstructionFromIndex(I))
+ if (MI->isCall())
+ return true;
+ }
+ return false;
+ };
+
+ if (SmallSrc == SmallDst) {
+ // Both must be true, because the case for both being false was
+ // checked earlier. Both registers will be coalesced into a register
+ // of a wider class (HvxWR), and we don't want its live range to
+ // span over calls.
+ return !any_of(LIS.getInterval(DstReg), HasCall) &&
+ !any_of(LIS.getInterval(SrcReg), HasCall);
+ }
+
+ // If one register is large (HvxWR) and the other is small (HvxVR), then
+ // coalescing is ok if the large is already live across a function call,
+ // or if the small one is not.
+ unsigned SmallReg = SmallSrc ? SrcReg : DstReg;
+ unsigned LargeReg = SmallSrc ? DstReg : SrcReg;
+ return any_of(LIS.getInterval(LargeReg), HasCall) ||
+ !any_of(LIS.getInterval(SmallReg), HasCall);
+}
+
+
unsigned HexagonRegisterInfo::getRARegister() const {
return Hexagon::R31;
}
@@ -283,6 +343,11 @@ bool HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF)
return MF.getSubtarget<HexagonSubtarget>().getFrameLowering()->hasFP(MF);
}
+const TargetRegisterClass *
+HexagonRegisterInfo::getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const {
+ return &Hexagon::IntRegsRegClass;
+}
unsigned HexagonRegisterInfo::getFirstCallerSavedNonParamReg() const {
return Hexagon::R6;
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index 4ead57da8fa1..497dc45236b1 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -39,6 +39,8 @@ public:
BitVector getReservedRegs(const MachineFunction &MF) const override;
+ bool enableMultipleCopyHints() const override { return true; }
+
void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned FIOperandNum, RegScavenger *RS = nullptr) const override;
@@ -61,6 +63,10 @@ public:
return true;
}
+ bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC,
+ unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg,
+ const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override;
+
// Debug information queries.
unsigned getRARegister() const;
unsigned getFrameRegister(const MachineFunction &MF) const override;
@@ -75,6 +81,10 @@ public:
unsigned getFirstCallerSavedNonParamReg() const;
+ const TargetRegisterClass *
+ getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind = 0) const override;
+
bool isEHReturnCalleeSaveReg(unsigned Reg) const;
};
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td
index 1d1e85e7ac7e..1fe1ef4ac572 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -67,6 +67,17 @@ let Namespace = "Hexagon" in {
let HWEncoding{0} = num;
}
+ // Rg - Guest/Hypervisor registers
+ class Rg<bits<5> num, string n,
+ list<string> alt = [], list<Register> alias = []> :
+ HexagonReg<num, n, alt, alias>;
+
+ // Rgg - 64-bit Guest/Hypervisor registers
+ class Rgg<bits<5> num, string n, list<Register> subregs> :
+ HexagonDoubleReg<num, n, subregs> {
+ let SubRegs = subregs;
+ }
+
def isub_lo : SubRegIndex<32>;
def isub_hi : SubRegIndex<32, 32>;
def vsub_lo : SubRegIndex<512>;
@@ -200,40 +211,87 @@ let Namespace = "Hexagon" in {
def Q1 : Rq<1, "q1">, DwarfRegNum<[132]>;
def Q2 : Rq<2, "q2">, DwarfRegNum<[133]>;
def Q3 : Rq<3, "q3">, DwarfRegNum<[134]>;
+
+ // Guest Registers
+ def GELR: Rg<0, "gelr", ["g0"]>, DwarfRegNum<[220]>;
+ def GSR: Rg<1, "gsr", ["g1"]>, DwarfRegNum<[221]>;
+ def GOSP: Rg<2, "gosp", ["g2"]>, DwarfRegNum<[222]>;
+ def G3: Rg<3, "gbadva", ["g3"]>, DwarfRegNum<[223]>;
+ def G4: Rg<4, "g4">, DwarfRegNum<[224]>;
+ def G5: Rg<5, "g5">, DwarfRegNum<[225]>;
+ def G6: Rg<6, "g6">, DwarfRegNum<[226]>;
+ def G7: Rg<7, "g7">, DwarfRegNum<[227]>;
+ def G8: Rg<8, "g8">, DwarfRegNum<[228]>;
+ def G9: Rg<9, "g9">, DwarfRegNum<[229]>;
+ def G10: Rg<10, "g10">, DwarfRegNum<[230]>;
+ def G11: Rg<11, "g11">, DwarfRegNum<[231]>;
+ def G12: Rg<12, "g12">, DwarfRegNum<[232]>;
+ def G13: Rg<13, "g13">, DwarfRegNum<[233]>;
+ def G14: Rg<14, "g14">, DwarfRegNum<[234]>;
+ def G15: Rg<15, "g15">, DwarfRegNum<[235]>;
+ def GPMUCNT4: Rg<16, "gpmucnt4", ["g16"]>, DwarfRegNum<[236]>;
+ def GPMUCNT5: Rg<17, "gpmucnt5", ["g17"]>, DwarfRegNum<[237]>;
+ def GPMUCNT6: Rg<18, "gpmucnt6", ["g18"]>, DwarfRegNum<[238]>;
+ def GPMUCNT7: Rg<19, "gpmucnt7", ["g19"]>, DwarfRegNum<[239]>;
+ def G20: Rg<20, "g20">, DwarfRegNum<[240]>;
+ def G21: Rg<21, "g21">, DwarfRegNum<[241]>;
+ def G22: Rg<22, "g22">, DwarfRegNum<[242]>;
+ def G23: Rg<23, "g23">, DwarfRegNum<[243]>;
+ def GPCYCLELO: Rg<24, "gpcyclelo", ["g24"]>, DwarfRegNum<[244]>;
+ def GPCYCLEHI: Rg<25, "gpcyclehi", ["g25"]>, DwarfRegNum<[245]>;
+ def GPMUCNT0: Rg<26, "gpmucnt0", ["g26"]>, DwarfRegNum<[246]>;
+ def GPMUCNT1: Rg<27, "gpmucnt1", ["g27"]>, DwarfRegNum<[247]>;
+ def GPMUCNT2: Rg<28, "gpmucnt2", ["g28"]>, DwarfRegNum<[248]>;
+ def GPMUCNT3: Rg<29, "gpmucnt3", ["g29"]>, DwarfRegNum<[249]>;
+ def G30: Rg<30, "g30">, DwarfRegNum<[250]>;
+ def G31: Rg<31, "g31">, DwarfRegNum<[251]>;
+
+ // Guest Register Pairs
+ let SubRegIndices = [isub_lo, isub_hi], CoveredBySubRegs = 1 in {
+ def G1_0 : Rgg<0, "g1:0", [GELR, GSR]>, DwarfRegNum<[220]>;
+ def G3_2 : Rgg<2, "g3:2", [GOSP, G3]>, DwarfRegNum<[222]>;
+ def G5_4 : Rgg<4, "g5:4", [G4, G5]>, DwarfRegNum<[224]>;
+ def G7_6 : Rgg<6, "g7:6", [G6, G7]>, DwarfRegNum<[226]>;
+ def G9_8 : Rgg<8, "g9:8", [G8, G9]>, DwarfRegNum<[228]>;
+ def G11_10 : Rgg<10, "g11:10", [G10, G11]>, DwarfRegNum<[230]>;
+ def G13_12 : Rgg<12, "g13:12", [G12, G13]>, DwarfRegNum<[232]>;
+ def G15_14 : Rgg<14, "g15:14", [G14, G15]>, DwarfRegNum<[234]>;
+ def G17_16 : Rgg<16, "g17:16", [GPMUCNT4, GPMUCNT5]>, DwarfRegNum<[236]>;
+ def G19_18 : Rgg<18, "g19:18", [GPMUCNT6, GPMUCNT7]>, DwarfRegNum<[238]>;
+ def G21_20 : Rgg<20, "g21:20", [G20, G21]>, DwarfRegNum<[240]>;
+ def G23_22 : Rgg<22, "g23:22", [G22, G23]>, DwarfRegNum<[242]>;
+ def G25_24 : Rgg<24, "g25:24", [GPCYCLELO, GPCYCLEHI]>, DwarfRegNum<[244]>;
+ def G27_26 : Rgg<26, "g27:26", [GPMUCNT0, GPMUCNT1]>, DwarfRegNum<[246]>;
+ def G29_28 : Rgg<28, "g29:28", [GPMUCNT2, GPMUCNT3]>, DwarfRegNum<[248]>;
+ def G31_30 : Rgg<30, "g31:30", [G30, G31]>, DwarfRegNum<[250]>;
+ }
+
}
// HVX types
-def VecI1
- : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode],
- [v512i1, v512i1, v1024i1, v1024i1, v512i1]>;
-def VecI8
- : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode],
- [v64i8, v64i8, v128i8, v128i8, v64i8]>;
-def VecI16
- : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode],
- [v32i16, v32i16, v64i16, v64i16, v32i16]>;
-def VecI32
- : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode],
- [v16i32, v16i32, v32i32, v32i32, v16i32]>;
-def VecPI8
- : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode],
- [v128i8, v128i8, v256i8, v256i8, v128i8]>;
-def VecPI16
- : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode],
- [v64i16, v64i16, v128i16, v128i16, v64i16]>;
-def VecPI32
- : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode],
- [v32i32, v32i32, v64i32, v64i32, v32i32]>;
-def VecQ8
- : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode],
- [v64i1, v64i1, v128i1, v128i1, v64i1]>;
-def VecQ16
- : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode],
- [v32i1, v32i1, v64i1, v64i1, v32i1]>;
-def VecQ32
- : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode],
- [v16i1, v16i1, v32i1, v32i1, v16i1]>;
+def VecI1: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [v512i1, v1024i1, v512i1]>;
+def VecI8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [v64i8, v128i8, v64i8]>;
+def VecI16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [v32i16, v64i16, v32i16]>;
+def VecI32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [v16i32, v32i32, v16i32]>;
+
+def VecPI8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [v128i8, v256i8, v128i8]>;
+def VecPI16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [v64i16, v128i16, v64i16]>;
+def VecPI32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [v32i32, v64i32, v32i32]>;
+
+def VecQ8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [v64i1, v128i1, v64i1]>;
+def VecQ16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [v32i1, v64i1, v32i1]>;
+def VecQ32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
+ [v16i1, v32i1, v16i1]>;
// HVX register classes
@@ -242,7 +300,7 @@ def VecQ32
// FIXME: the register order should be defined in terms of the preferred
// allocation order...
//
-def IntRegs : RegisterClass<"Hexagon", [i32, f32, v32i1, v4i8, v2i16], 32,
+def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
(add (sequence "R%u", 0, 9), (sequence "R%u", 12, 28),
R10, R11, R29, R30, R31)>;
@@ -254,8 +312,7 @@ def GeneralSubRegs : RegisterClass<"Hexagon", [i32], 32,
def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32,
(add R7, R6, R5, R4, R3, R2, R1, R0)> ;
-def DoubleRegs : RegisterClass<"Hexagon",
- [i64, f64, v64i1, v8i8, v4i16, v2i32], 64,
+def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64,
(add (sequence "D%u", 0, 4), (sequence "D%u", 6, 13), D5, D14, D15)>;
def GeneralDoubleLow8Regs : RegisterClass<"Hexagon", [i64], 64,
@@ -301,6 +358,25 @@ def CtrRegs64 : RegisterClass<"Hexagon", [i64], 64,
(add C1_0, C3_2, C5_4, C7_6, C9_8, C11_10, CS, UPCYCLE, C17_16,
PKTCOUNT, UTIMER)>;
+let Size = 32, isAllocatable = 0 in
+def GuestRegs : RegisterClass<"Hexagon", [i32], 32,
+ (add GELR, GSR, GOSP,
+ (sequence "G%u", 3, 15),
+ GPMUCNT4, GPMUCNT5, GPMUCNT6, GPMUCNT7,
+ G20, G21, G22, G23,
+ GPCYCLELO, GPCYCLEHI, GPMUCNT0, GPMUCNT1,
+ GPMUCNT2, GPMUCNT3,
+ G30, G31)>;
+
+let Size = 64, isAllocatable = 0 in
+def GuestRegs64 : RegisterClass<"Hexagon", [i64], 64,
+ (add G1_0, G3_2,
+ G5_4, G7_6, G9_8, G11_10, G13_12, G15_14,
+ G17_16, G19_18,
+ G21_20, G23_22,
+ G25_24, G27_26, G29_28,
+ G31_30)>;
+
// These registers are new for v62 and onward.
// The function RegisterMatchesArch() uses this list for validation.
let isAllocatable = 0 in
@@ -313,7 +389,6 @@ let Size = 32, isAllocatable = 0 in
def V65Regs : RegisterClass<"Hexagon", [i32], 32, (add VTMP)>;
-
def HexagonCSR
: CalleeSavedRegs<(add R16, R17, R18, R19, R20, R21, R22, R23,
R24, R25, R26, R27)>;
diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index 3fe4cc73d2f3..c41f0d3c085c 100644
--- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -60,14 +60,14 @@ INITIALIZE_PASS(HexagonSplitConst32AndConst64, "split-const-for-sdata",
"Hexagon Split Const32s and Const64s", false, false)
bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
- const HexagonTargetObjectFile &TLOF =
- *static_cast<const HexagonTargetObjectFile *>(
- Fn.getTarget().getObjFileLowering());
- if (TLOF.isSmallDataEnabled())
- return true;
+ auto &HST = Fn.getSubtarget<HexagonSubtarget>();
+ auto &HTM = static_cast<const HexagonTargetMachine&>(Fn.getTarget());
+ auto &TLOF = *HTM.getObjFileLowering();
+ if (HST.useSmallData() && TLOF.isSmallDataEnabled())
+ return false;
- const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
- const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+ const TargetInstrInfo *TII = HST.getInstrInfo();
+ const TargetRegisterInfo *TRI = HST.getRegisterInfo();
// Loop over all of the basic blocks
for (MachineBasicBlock &B : Fn) {
@@ -109,7 +109,6 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
//===----------------------------------------------------------------------===//
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-
FunctionPass *llvm::createHexagonSplitConst32AndConst64() {
return new HexagonSplitConst32AndConst64();
}
diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp
index c9f5400018e8..e018785f24d8 100644
--- a/lib/Target/Hexagon/HexagonSplitDouble.cpp
+++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -55,6 +56,8 @@ static cl::opt<int> MaxHSDR("max-hsdr", cl::Hidden, cl::init(-1),
cl::desc("Maximum number of split partitions"));
static cl::opt<bool> MemRefsFixed("hsdr-no-mem", cl::Hidden, cl::init(true),
cl::desc("Do not split loads or stores"));
+ static cl::opt<bool> SplitAll("hsdr-split-all", cl::Hidden, cl::init(false),
+ cl::desc("Split all partitions"));
namespace {
@@ -62,9 +65,7 @@ namespace {
public:
static char ID;
- HexagonSplitDoubleRegs() : MachineFunctionPass(ID) {
- initializeHexagonSplitDoubleRegsPass(*PassRegistry::getPassRegistry());
- }
+ HexagonSplitDoubleRegs() : MachineFunctionPass(ID) {}
StringRef getPassName() const override {
return "Hexagon Split Double Registers";
@@ -97,6 +98,7 @@ namespace {
bool isFixedInstr(const MachineInstr *MI) const;
void partitionRegisters(UUSetMap &P2Rs);
int32_t profit(const MachineInstr *MI) const;
+ int32_t profit(unsigned Reg) const;
bool isProfitable(const USet &Part, LoopRegMap &IRM) const;
void collectIndRegsForLoop(const MachineLoop *L, USet &Rs);
@@ -161,7 +163,7 @@ bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const {
if (MI->mayLoad() || MI->mayStore())
if (MemRefsFixed || isVolatileInstr(MI))
return true;
- if (MI->isDebugValue())
+ if (MI->isDebugInstr())
return false;
unsigned Opc = MI->getOpcode();
@@ -244,7 +246,7 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
if (FixedRegs[x])
continue;
unsigned R = TargetRegisterInfo::index2VirtReg(x);
- DEBUG(dbgs() << printReg(R, TRI) << " ~~");
+ LLVM_DEBUG(dbgs() << printReg(R, TRI) << " ~~");
USet &Asc = AssocMap[R];
for (auto U = MRI->use_nodbg_begin(R), Z = MRI->use_nodbg_end();
U != Z; ++U) {
@@ -267,13 +269,13 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
unsigned u = TargetRegisterInfo::virtReg2Index(T);
if (FixedRegs[u])
continue;
- DEBUG(dbgs() << ' ' << printReg(T, TRI));
+ LLVM_DEBUG(dbgs() << ' ' << printReg(T, TRI));
Asc.insert(T);
// Make it symmetric.
AssocMap[T].insert(R);
}
}
- DEBUG(dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
}
UUMap R2P;
@@ -304,15 +306,10 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
P2Rs[I.second].insert(I.first);
}
-static inline int32_t profitImm(unsigned Lo, unsigned Hi) {
+static inline int32_t profitImm(unsigned Imm) {
int32_t P = 0;
- bool LoZ1 = false, HiZ1 = false;
- if (Lo == 0 || Lo == 0xFFFFFFFF)
- P += 10, LoZ1 = true;
- if (Hi == 0 || Hi == 0xFFFFFFFF)
- P += 10, HiZ1 = true;
- if (!LoZ1 && !HiZ1 && Lo == Hi)
- P += 3;
+ if (Imm == 0 || Imm == 0xFFFFFFFF)
+ P += 10;
return P;
}
@@ -342,21 +339,28 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
uint64_t D = MI->getOperand(1).getImm();
unsigned Lo = D & 0xFFFFFFFFULL;
unsigned Hi = D >> 32;
- return profitImm(Lo, Hi);
+ return profitImm(Lo) + profitImm(Hi);
}
case Hexagon::A2_combineii:
- case Hexagon::A4_combineii:
- return profitImm(MI->getOperand(1).getImm(),
- MI->getOperand(2).getImm());
+ case Hexagon::A4_combineii: {
+ const MachineOperand &Op1 = MI->getOperand(1);
+ const MachineOperand &Op2 = MI->getOperand(2);
+ int32_t Prof1 = Op1.isImm() ? profitImm(Op1.getImm()) : 0;
+ int32_t Prof2 = Op2.isImm() ? profitImm(Op2.getImm()) : 0;
+ return Prof1 + Prof2;
+ }
case Hexagon::A4_combineri:
ImmX++;
// Fall through into A4_combineir.
LLVM_FALLTHROUGH;
case Hexagon::A4_combineir: {
ImmX++;
- int64_t V = MI->getOperand(ImmX).getImm();
- if (V == 0 || V == -1)
- return 10;
+ const MachineOperand &OpX = MI->getOperand(ImmX);
+ if (OpX.isImm()) {
+ int64_t V = OpX.getImm();
+ if (V == 0 || V == -1)
+ return 10;
+ }
// Fall through into A2_combinew.
LLVM_FALLTHROUGH;
}
@@ -368,8 +372,11 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
case Hexagon::A2_andp:
case Hexagon::A2_orp:
- case Hexagon::A2_xorp:
- return 1;
+ case Hexagon::A2_xorp: {
+ unsigned Rs = MI->getOperand(1).getReg();
+ unsigned Rt = MI->getOperand(2).getReg();
+ return profit(Rs) + profit(Rt);
+ }
case Hexagon::S2_asl_i_p_or: {
unsigned S = MI->getOperand(3).getImm();
@@ -393,6 +400,25 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
return 0;
}
+int32_t HexagonSplitDoubleRegs::profit(unsigned Reg) const {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+
+ const MachineInstr *DefI = MRI->getVRegDef(Reg);
+ switch (DefI->getOpcode()) {
+ case Hexagon::A2_tfrpi:
+ case Hexagon::CONST64:
+ case Hexagon::A2_combineii:
+ case Hexagon::A4_combineii:
+ case Hexagon::A4_combineri:
+ case Hexagon::A4_combineir:
+ case Hexagon::A2_combinew:
+ return profit(DefI);
+ default:
+ break;
+ }
+ return 0;
+}
+
bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM)
const {
unsigned FixedNum = 0, LoopPhiNum = 0;
@@ -442,7 +468,9 @@ bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM)
if (FixedNum > 0 && LoopPhiNum > 0)
TotalP -= 20*LoopPhiNum;
- DEBUG(dbgs() << "Partition profit: " << TotalP << '\n');
+ LLVM_DEBUG(dbgs() << "Partition profit: " << TotalP << '\n');
+ if (SplitAll)
+ return true;
return TotalP > 0;
}
@@ -535,7 +563,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
Rs.insert(CmpR1);
Rs.insert(CmpR2);
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "For loop at " << printMBBReference(*HB) << " ind regs: ";
dump_partition(dbgs(), Rs, *TRI);
dbgs() << '\n';
@@ -710,23 +738,21 @@ void HexagonSplitDoubleRegs::splitCombine(MachineInstr *MI,
assert(F != PairMap.end());
const UUPair &P = F->second;
- if (Op1.isImm()) {
+ if (!Op1.isReg()) {
BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second)
- .addImm(Op1.getImm());
- } else if (Op1.isReg()) {
+ .add(Op1);
+ } else {
BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.second)
.addReg(Op1.getReg(), getRegState(Op1), Op1.getSubReg());
- } else
- llvm_unreachable("Unexpected operand");
+ }
- if (Op2.isImm()) {
+ if (!Op2.isReg()) {
BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first)
- .addImm(Op2.getImm());
- } else if (Op2.isReg()) {
+ .add(Op2);
+ } else {
BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first)
.addReg(Op2.getReg(), getRegState(Op2), Op2.getSubReg());
- } else
- llvm_unreachable("Unexpected operand");
+ }
}
void HexagonSplitDoubleRegs::splitExt(MachineInstr *MI,
@@ -970,7 +996,7 @@ bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI,
const UUPairMap &PairMap) {
using namespace Hexagon;
- DEBUG(dbgs() << "Splitting: " << *MI);
+ LLVM_DEBUG(dbgs() << "Splitting: " << *MI);
bool Split = false;
unsigned Opc = MI->getOpcode();
@@ -1104,8 +1130,8 @@ bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) {
const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
bool Changed = false;
- DEBUG(dbgs() << "Splitting partition: "; dump_partition(dbgs(), Part, *TRI);
- dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << "Splitting partition: ";
+ dump_partition(dbgs(), Part, *TRI); dbgs() << '\n');
UUPairMap PairMap;
@@ -1122,8 +1148,9 @@ bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) {
unsigned LoR = MRI->createVirtualRegister(IntRC);
unsigned HiR = MRI->createVirtualRegister(IntRC);
- DEBUG(dbgs() << "Created mapping: " << printReg(DR, TRI) << " -> "
- << printReg(HiR, TRI) << ':' << printReg(LoR, TRI) << '\n');
+ LLVM_DEBUG(dbgs() << "Created mapping: " << printReg(DR, TRI) << " -> "
+ << printReg(HiR, TRI) << ':' << printReg(LoR, TRI)
+ << '\n');
PairMap.insert(std::make_pair(DR, UUPair(LoR, HiR)));
}
@@ -1160,12 +1187,12 @@ bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) {
}
bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) {
- DEBUG(dbgs() << "Splitting double registers in function: "
- << MF.getName() << '\n');
-
if (skipFunction(MF.getFunction()))
return false;
+ LLVM_DEBUG(dbgs() << "Splitting double registers in function: "
+ << MF.getName() << '\n');
+
auto &ST = MF.getSubtarget<HexagonSubtarget>();
TRI = ST.getRegisterInfo();
TII = ST.getInstrInfo();
@@ -1178,7 +1205,7 @@ bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) {
collectIndRegs(IRM);
partitionRegisters(P2Rs);
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Register partitioning: (partition #0 is fixed)\n";
for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) {
dbgs() << '#' << I->first << " -> ";
@@ -1196,7 +1223,8 @@ bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) {
if (Limit >= 0 && Counter >= Limit)
break;
USet &Part = I->second;
- DEBUG(dbgs() << "Calculating profit for partition #" << I->first << '\n');
+ LLVM_DEBUG(dbgs() << "Calculating profit for partition #" << I->first
+ << '\n');
if (!isProfitable(Part, IRM))
continue;
Counter++;
diff --git a/lib/Target/Hexagon/HexagonStoreWidening.cpp b/lib/Target/Hexagon/HexagonStoreWidening.cpp
index 300f6de33552..991af047387e 100644
--- a/lib/Target/Hexagon/HexagonStoreWidening.cpp
+++ b/lib/Target/Hexagon/HexagonStoreWidening.cpp
@@ -433,10 +433,11 @@ bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
const MCInstrDesc &StD = TII->get(WOpc);
MachineOperand &MR = FirstSt->getOperand(0);
int64_t Off = FirstSt->getOperand(1).getImm();
- MachineInstr *StI = BuildMI(*MF, DL, StD)
- .addReg(MR.getReg(), getKillRegState(MR.isKill()))
- .addImm(Off)
- .addImm(Val);
+ MachineInstr *StI =
+ BuildMI(*MF, DL, StD)
+ .addReg(MR.getReg(), getKillRegState(MR.isKill()), MR.getSubReg())
+ .addImm(Off)
+ .addImm(Val);
StI->addMemOperand(*MF, NewM);
NG.push_back(StI);
} else {
@@ -455,10 +456,11 @@ bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
const MCInstrDesc &StD = TII->get(WOpc);
MachineOperand &MR = FirstSt->getOperand(0);
int64_t Off = FirstSt->getOperand(1).getImm();
- MachineInstr *StI = BuildMI(*MF, DL, StD)
- .addReg(MR.getReg(), getKillRegState(MR.isKill()))
- .addImm(Off)
- .addReg(VReg, RegState::Kill);
+ MachineInstr *StI =
+ BuildMI(*MF, DL, StD)
+ .addReg(MR.getReg(), getKillRegState(MR.isKill()), MR.getSubReg())
+ .addImm(Off)
+ .addReg(VReg, RegState::Kill);
StI->addMemOperand(*MF, NewM);
NG.push_back(StI);
}
@@ -472,7 +474,7 @@ bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
// from OG was (in the order in which they appeared in the basic block).
// (The ordering in OG does not have to match the order in the basic block.)
bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) {
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Replacing:\n";
for (auto I : OG)
dbgs() << " " << *I;
@@ -576,7 +578,7 @@ bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
};
for (auto &G : SGs) {
assert(G.size() > 1 && "Store group with fewer than 2 elements");
- std::sort(G.begin(), G.end(), Less);
+ llvm::sort(G.begin(), G.end(), Less);
Changed |= processStoreGroup(G);
}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 6f1f6c46a107..0686d6eb6118 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -15,13 +15,14 @@
#include "HexagonInstrInfo.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/Support/CommandLine.h"
@@ -38,17 +39,6 @@ using namespace llvm;
#define GET_SUBTARGETINFO_TARGET_DESC
#include "HexagonGenSubtargetInfo.inc"
-static cl::opt<bool> EnableMemOps("enable-hexagon-memops",
- cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true),
- cl::desc("Generate V4 MEMOP in code generation for Hexagon target"));
-
-static cl::opt<bool> DisableMemOps("disable-hexagon-memops",
- cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false),
- cl::desc("Do not generate V4 MEMOP in code generation for Hexagon target"));
-
-static cl::opt<bool> EnableIEEERndNear("enable-hexagon-ieee-rnd-near",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Generate non-chopped conversion from fp to int."));
static cl::opt<bool> EnableBSBSched("enable-bsb-sched",
cl::Hidden, cl::ZeroOrMore, cl::init(true));
@@ -60,9 +50,6 @@ static cl::opt<bool> EnableDotCurSched("enable-cur-sched",
cl::Hidden, cl::ZeroOrMore, cl::init(true),
cl::desc("Enable the scheduler to generate .cur"));
-static cl::opt<bool> EnableVecFrwdSched("enable-evec-frwd-sched",
- cl::Hidden, cl::ZeroOrMore, cl::init(true));
-
static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
cl::Hidden, cl::ZeroOrMore, cl::init(false),
cl::desc("Disable Hexagon MI Scheduling"));
@@ -105,6 +92,7 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
HexagonSubtarget &
HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
static std::map<StringRef, Hexagon::ArchEnum> CpuTable{
+ {"generic", Hexagon::ArchEnum::V60},
{"hexagonv4", Hexagon::ArchEnum::V4},
{"hexagonv5", Hexagon::ArchEnum::V5},
{"hexagonv55", Hexagon::ArchEnum::V55},
@@ -123,9 +111,7 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
UseHVX64BOps = false;
UseLongCalls = false;
- UseMemOps = DisableMemOps ? false : EnableMemOps;
- ModeIEEERndNear = EnableIEEERndNear;
- UseBSBScheduling = hasV60TOps() && EnableBSBSched;
+ UseBSBScheduling = hasV60Ops() && EnableBSBSched;
ParseSubtargetFeatures(CPUString, FS);
@@ -204,11 +190,14 @@ bool HexagonSubtarget::CallMutation::shouldTFRICallBind(
Type == HexagonII::TypeALU64 || Type == HexagonII::TypeM;
}
-void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAG) {
+void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
+ ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
SUnit* LastSequentialCall = nullptr;
- unsigned VRegHoldingRet = 0;
- unsigned RetRegister;
- SUnit* LastUseOfRet = nullptr;
+ // Map from virtual register to physical register from the copy.
+ DenseMap<unsigned, unsigned> VRegHoldingReg;
+ // Map from the physical register to the instruction that uses virtual
+ // register. This is used to create the barrier edge.
+ DenseMap<unsigned, SUnit *> LastVRegUse;
auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo();
auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
@@ -220,13 +209,15 @@ void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAG) {
LastSequentialCall = &DAG->SUnits[su];
// Look for a compare that defines a predicate.
else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall)
- DAG->SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier));
+ DAG->addEdge(&DAG->SUnits[su], SDep(LastSequentialCall, SDep::Barrier));
// Look for call and tfri* instructions.
else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 &&
shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1]))
- DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier));
- // Prevent redundant register copies between two calls, which are caused by
- // both the return value and the argument for the next call being in %r0.
+ DAG->addEdge(&DAG->SUnits[su], SDep(&DAG->SUnits[su-1], SDep::Barrier));
+ // Prevent redundant register copies due to reads and writes of physical
+ // registers. The original motivation for this was the code generated
+ // between two calls, which are caused both the return value and the
+ // argument for the next call being in %r0.
// Example:
// 1: <call1>
// 2: %vreg = COPY %r0
@@ -235,21 +226,37 @@ void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAG) {
// 5: <call2>
// The scheduler would often swap 3 and 4, so an additional register is
// needed. This code inserts a Barrier dependence between 3 & 4 to prevent
- // this. The same applies for %d0 and %v0/%w0, which are also handled.
+ // this.
+ // The code below checks for all the physical registers, not just R0/D0/V0.
else if (SchedRetvalOptimization) {
const MachineInstr *MI = DAG->SUnits[su].getInstr();
- if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) ||
- MI->readsRegister(Hexagon::V0, &TRI))) {
- // %vreg = COPY %r0
- VRegHoldingRet = MI->getOperand(0).getReg();
- RetRegister = MI->getOperand(1).getReg();
- LastUseOfRet = nullptr;
- } else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet))
- // <use of %X>
- LastUseOfRet = &DAG->SUnits[su];
- else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI))
- // %r0 = ...
- DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier));
+ if (MI->isCopy() &&
+ TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) {
+ // %vregX = COPY %r0
+ VRegHoldingReg[MI->getOperand(0).getReg()] = MI->getOperand(1).getReg();
+ LastVRegUse.erase(MI->getOperand(1).getReg());
+ } else {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ if (MO.isUse() && !MI->isCopy() &&
+ VRegHoldingReg.count(MO.getReg())) {
+ // <use of %vregX>
+ LastVRegUse[VRegHoldingReg[MO.getReg()]] = &DAG->SUnits[su];
+ } else if (MO.isDef() &&
+ TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ for (MCRegAliasIterator AI(MO.getReg(), &TRI, true); AI.isValid();
+ ++AI) {
+ if (LastVRegUse.count(*AI) &&
+ LastVRegUse[*AI] != &DAG->SUnits[su])
+ // %r0 = ...
+ DAG->addEdge(&DAG->SUnits[su], SDep(LastVRegUse[*AI], SDep::Barrier));
+ LastVRegUse.erase(*AI);
+ }
+ }
+ }
+ }
}
}
}
@@ -300,7 +307,7 @@ void HexagonSubtarget::BankConflictMutation::apply(ScheduleDAGInstrs *DAG) {
}
}
-/// \brief Enable use of alias analysis during code generation (during MI
+/// Enable use of alias analysis during code generation (during MI
/// scheduling, DAGCombine, etc.).
bool HexagonSubtarget::useAA() const {
if (OptLevel != CodeGenOpt::None)
@@ -308,7 +315,7 @@ bool HexagonSubtarget::useAA() const {
return false;
}
-/// \brief Perform target specific adjustments to the latency of a schedule
+/// Perform target specific adjustments to the latency of a schedule
/// dependency.
void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
SDep &Dep) const {
@@ -328,25 +335,30 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
return;
}
- if (!hasV60TOps())
+ if (!hasV60Ops())
return;
- // If it's a REG_SEQUENCE, use its destination instruction to determine
+ // Set the latency for a copy to zero since we hope that is will get removed.
+ if (DstInst->isCopy())
+ Dep.setLatency(0);
+
+ // If it's a REG_SEQUENCE/COPY, use its destination instruction to determine
// the correct latency.
- if (DstInst->isRegSequence() && Dst->NumSuccs == 1) {
- unsigned RSeqReg = DstInst->getOperand(0).getReg();
- MachineInstr *RSeqDst = Dst->Succs[0].getSUnit()->getInstr();
+ if ((DstInst->isRegSequence() || DstInst->isCopy()) && Dst->NumSuccs == 1) {
+ unsigned DReg = DstInst->getOperand(0).getReg();
+ MachineInstr *DDst = Dst->Succs[0].getSUnit()->getInstr();
unsigned UseIdx = -1;
- for (unsigned OpNum = 0; OpNum < RSeqDst->getNumOperands(); OpNum++) {
- const MachineOperand &MO = RSeqDst->getOperand(OpNum);
- if (MO.isReg() && MO.getReg() && MO.isUse() && MO.getReg() == RSeqReg) {
+ for (unsigned OpNum = 0; OpNum < DDst->getNumOperands(); OpNum++) {
+ const MachineOperand &MO = DDst->getOperand(OpNum);
+ if (MO.isReg() && MO.getReg() && MO.isUse() && MO.getReg() == DReg) {
UseIdx = OpNum;
break;
}
}
- unsigned RSeqLatency = (InstrInfo.getOperandLatency(&InstrItins, *SrcInst,
- 0, *RSeqDst, UseIdx));
- Dep.setLatency(RSeqLatency);
+ int DLatency = (InstrInfo.getOperandLatency(&InstrItins, *SrcInst,
+ 0, *DDst, UseIdx));
+ DLatency = std::max(DLatency, 0);
+ Dep.setLatency((unsigned)DLatency);
}
// Try to schedule uses near definitions to generate .cur.
@@ -394,7 +406,7 @@ void HexagonSubtarget::updateLatency(MachineInstr &SrcInst,
return;
}
- if (!hasV60TOps())
+ if (!hasV60Ops())
return;
auto &QII = static_cast<const HexagonInstrInfo&>(*getInstrInfo());
@@ -418,6 +430,7 @@ void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
}
assert(DefIdx >= 0 && "Def Reg not found in Src MI");
MachineInstr *DstI = Dst->getInstr();
+ SDep T = I;
for (unsigned OpNum = 0; OpNum < DstI->getNumOperands(); OpNum++) {
const MachineOperand &MO = DstI->getOperand(OpNum);
if (MO.isReg() && MO.isUse() && MO.getReg() == DepR) {
@@ -426,8 +439,7 @@ void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
// For some instructions (ex: COPY), we might end up with < 0 latency
// as they don't have any Itinerary class associated with them.
- if (Latency <= 0)
- Latency = 1;
+ Latency = std::max(Latency, 0);
I.setLatency(Latency);
updateLatency(*SrcI, *DstI, I);
@@ -435,11 +447,10 @@ void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
}
// Update the latency of opposite edge too.
- for (auto &J : Dst->Preds) {
- if (J.getSUnit() != Src)
- continue;
- J.setLatency(I.getLatency());
- }
+ T.setSUnit(Src);
+ auto F = std::find(Dst->Preds.begin(), Dst->Preds.end(), T);
+ assert(F != Dst->Preds.end());
+ F->setLatency(I.getLatency());
}
}
@@ -447,7 +458,7 @@ void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const {
void HexagonSubtarget::changeLatency(SUnit *Src, SUnit *Dst, unsigned Lat)
const {
for (auto &I : Src->Succs) {
- if (I.getSUnit() != Dst)
+ if (!I.isAssignedRegDep() || I.getSUnit() != Dst)
continue;
SDep T = I;
I.setLatency(Lat);
@@ -456,7 +467,7 @@ void HexagonSubtarget::changeLatency(SUnit *Src, SUnit *Dst, unsigned Lat)
T.setSUnit(Src);
auto F = std::find(Dst->Preds.begin(), Dst->Preds.end(), T);
assert(F != Dst->Preds.end());
- F->setLatency(I.getLatency());
+ F->setLatency(Lat);
}
}
@@ -519,13 +530,13 @@ bool HexagonSubtarget::isBestZeroLatency(SUnit *Src, SUnit *Dst,
// Reassign the latency for the previous bests, which requires setting
// the dependence edge in both directions.
if (SrcBest != nullptr) {
- if (!hasV60TOps())
+ if (!hasV60Ops())
changeLatency(SrcBest, Dst, 1);
else
restoreLatency(SrcBest, Dst);
}
if (DstBest != nullptr) {
- if (!hasV60TOps())
+ if (!hasV60Ops())
changeLatency(Src, DstBest, 1);
else
restoreLatency(Src, DstBest);
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index af93f20d97fc..dc8d173a5057 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -32,9 +32,6 @@
#define GET_SUBTARGETINFO_HEADER
#include "HexagonGenSubtargetInfo.inc"
-#define Hexagon_SMALL_DATA_THRESHOLD 8
-#define Hexagon_SLOTS 4
-
namespace llvm {
class MachineInstr;
@@ -46,12 +43,20 @@ class Triple;
class HexagonSubtarget : public HexagonGenSubtargetInfo {
virtual void anchor();
- bool UseMemOps, UseHVX64BOps, UseHVX128BOps;
- bool UseLongCalls;
- bool ModeIEEERndNear;
+ bool UseHVX64BOps = false;
+ bool UseHVX128BOps = false;
+
+ bool UseLongCalls = false;
+ bool UseMemops = false;
+ bool UsePackets = false;
+ bool UseNewValueJumps = false;
+ bool UseNewValueStores = false;
+ bool UseSmallData = false;
bool HasMemNoShuf = false;
bool EnableDuplex = false;
+ bool ReservedR19 = false;
+
public:
Hexagon::ArchEnum HexagonArchVersion;
Hexagon::ArchEnum HexagonHVXVersion = Hexagon::ArchEnum::V4;
@@ -115,44 +120,50 @@ public:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- bool useMemOps() const { return UseMemOps; }
- bool hasV5TOps() const {
+ bool hasV5Ops() const {
return getHexagonArchVersion() >= Hexagon::ArchEnum::V5;
}
- bool hasV5TOpsOnly() const {
+ bool hasV5OpsOnly() const {
return getHexagonArchVersion() == Hexagon::ArchEnum::V5;
}
- bool hasV55TOps() const {
+ bool hasV55Ops() const {
return getHexagonArchVersion() >= Hexagon::ArchEnum::V55;
}
- bool hasV55TOpsOnly() const {
+ bool hasV55OpsOnly() const {
return getHexagonArchVersion() == Hexagon::ArchEnum::V55;
}
- bool hasV60TOps() const {
+ bool hasV60Ops() const {
return getHexagonArchVersion() >= Hexagon::ArchEnum::V60;
}
- bool hasV60TOpsOnly() const {
+ bool hasV60OpsOnly() const {
return getHexagonArchVersion() == Hexagon::ArchEnum::V60;
}
- bool hasV62TOps() const {
+ bool hasV62Ops() const {
return getHexagonArchVersion() >= Hexagon::ArchEnum::V62;
}
- bool hasV62TOpsOnly() const {
+ bool hasV62OpsOnly() const {
return getHexagonArchVersion() == Hexagon::ArchEnum::V62;
}
- bool hasV65TOps() const {
+ bool hasV65Ops() const {
return getHexagonArchVersion() >= Hexagon::ArchEnum::V65;
}
- bool hasV65TOpsOnly() const {
+ bool hasV65OpsOnly() const {
return getHexagonArchVersion() == Hexagon::ArchEnum::V65;
}
- bool modeIEEERndNear() const { return ModeIEEERndNear; }
+ bool useLongCalls() const { return UseLongCalls; }
+ bool useMemops() const { return UseMemops; }
+ bool usePackets() const { return UsePackets; }
+ bool useNewValueJumps() const { return UseNewValueJumps; }
+ bool useNewValueStores() const { return UseNewValueStores; }
+ bool useSmallData() const { return UseSmallData; }
+
bool useHVXOps() const { return HexagonHVXVersion > Hexagon::ArchEnum::V4; }
bool useHVX128BOps() const { return useHVXOps() && UseHVX128BOps; }
bool useHVX64BOps() const { return useHVXOps() && UseHVX64BOps; }
+
bool hasMemNoShuf() const { return HasMemNoShuf; }
- bool useLongCalls() const { return UseLongCalls; }
+ bool hasReservedR19() const { return ReservedR19; }
bool usePredicatedCalls() const;
bool useBSBScheduling() const { return UseBSBScheduling; }
@@ -170,11 +181,6 @@ public:
const std::string &getCPUString () const { return CPUString; }
- // Threshold for small data section
- unsigned getSmallDataThreshold() const {
- return Hexagon_SMALL_DATA_THRESHOLD;
- }
-
const Hexagon::ArchEnum &getHexagonArchVersion() const {
return HexagonArchVersion;
}
@@ -187,11 +193,11 @@ public:
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
const override;
- /// \brief Enable use of alias analysis during code generation (during MI
+ /// Enable use of alias analysis during code generation (during MI
/// scheduling, DAGCombine, etc.).
bool useAA() const override;
- /// \brief Perform target specific adjustments to the latency of a schedule
+ /// Perform target specific adjustments to the latency of a schedule
/// dependency.
void adjustSchedDependency(SUnit *def, SUnit *use, SDep& dep) const override;
@@ -238,6 +244,12 @@ public:
return llvm::any_of(ElemTypes, [ElemTy] (MVT T) { return ElemTy == T; });
}
+ unsigned getTypeAlignment(MVT Ty) const {
+ if (isHVXVectorType(Ty, true))
+ return getVectorLength();
+ return Ty.getSizeInBits() / 8;
+ }
+
unsigned getL1CacheLineSize() const;
unsigned getL1PrefetchDistance() const;
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 363b703fef28..2c75e9139ad7 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -94,9 +94,8 @@ static cl::opt<bool> EnableVectorPrint("enable-hexagon-vector-print",
cl::Hidden, cl::ZeroOrMore, cl::init(false),
cl::desc("Enable Hexagon Vector print instr pass"));
-static cl::opt<bool> EnableTrapUnreachable("hexagon-trap-unreachable",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Enable generating trap for unreachable"));
+static cl::opt<bool> EnableVExtractOpt("hexagon-opt-vextract", cl::Hidden,
+ cl::ZeroOrMore, cl::init(true), cl::desc("Enable vextract optimization"));
/// HexagonTargetMachineModule - Note that this is used on hosts that
/// cannot link in a library unless there are references into the
@@ -122,7 +121,9 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
namespace llvm {
extern char &HexagonExpandCondsetsID;
+ void initializeHexagonBitSimplifyPass(PassRegistry&);
void initializeHexagonConstExtendersPass(PassRegistry&);
+ void initializeHexagonConstPropagationPass(PassRegistry&);
void initializeHexagonEarlyIfConversionPass(PassRegistry&);
void initializeHexagonExpandCondsetsPass(PassRegistry&);
void initializeHexagonGenMuxPass(PassRegistry&);
@@ -133,6 +134,8 @@ namespace llvm {
void initializeHexagonOptAddrModePass(PassRegistry&);
void initializeHexagonPacketizerPass(PassRegistry&);
void initializeHexagonRDFOptPass(PassRegistry&);
+ void initializeHexagonSplitDoubleRegsPass(PassRegistry&);
+ void initializeHexagonVExtractPass(PassRegistry&);
Pass *createHexagonLoopIdiomPass();
Pass *createHexagonVectorLoopCarriedReusePass();
@@ -165,6 +168,7 @@ namespace llvm {
FunctionPass *createHexagonSplitDoubleRegs();
FunctionPass *createHexagonStoreWidening();
FunctionPass *createHexagonVectorPrint();
+ FunctionPass *createHexagonVExtract();
} // end namespace llvm;
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
@@ -184,7 +188,9 @@ extern "C" void LLVMInitializeHexagonTarget() {
RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget());
PassRegistry &PR = *PassRegistry::getPassRegistry();
+ initializeHexagonBitSimplifyPass(PR);
initializeHexagonConstExtendersPass(PR);
+ initializeHexagonConstPropagationPass(PR);
initializeHexagonEarlyIfConversionPass(PR);
initializeHexagonGenMuxPass(PR);
initializeHexagonHardwareLoopsPass(PR);
@@ -194,6 +200,8 @@ extern "C" void LLVMInitializeHexagonTarget() {
initializeHexagonOptAddrModePass(PR);
initializeHexagonPacketizerPass(PR);
initializeHexagonRDFOptPass(PR);
+ initializeHexagonSplitDoubleRegsPass(PR);
+ initializeHexagonVExtractPass(PR);
}
HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
@@ -213,8 +221,6 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
TT, CPU, FS, Options, getEffectiveRelocModel(RM),
getEffectiveCodeModel(CM), (HexagonNoOpt ? CodeGenOpt::None : OL)),
TLOF(make_unique<HexagonTargetObjectFile>()) {
- if (EnableTrapUnreachable)
- this->Options.TrapUnreachable = true;
initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
initAsmInfo();
}
@@ -299,6 +305,11 @@ void HexagonPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
bool NoOpt = (getOptLevel() == CodeGenOpt::None);
+ if (!NoOpt) {
+ addPass(createConstantPropagationPass());
+ addPass(createDeadCodeEliminationPass());
+ }
+
addPass(createAtomicExpandPass());
if (!NoOpt) {
if (EnableLoopPrefetch)
@@ -321,6 +332,8 @@ bool HexagonPassConfig::addInstSelector() {
addPass(createHexagonISelDag(TM, getOptLevel()));
if (!NoOpt) {
+ if (EnableVExtractOpt)
+ addPass(createHexagonVExtract());
// Create logical operations on predicate registers.
if (EnableGenPred)
addPass(createHexagonGenPredicate());
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index ea86c9c42f47..e771f383dffa 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -74,7 +74,7 @@ static cl::opt<bool>
if (TraceGVPlacement) { \
TRACE_TO(errs(), X); \
} else { \
- DEBUG(TRACE_TO(dbgs(), X)); \
+ LLVM_DEBUG(TRACE_TO(dbgs(), X)); \
} \
} while (false)
#endif
@@ -200,11 +200,11 @@ MCSection *HexagonTargetObjectFile::getExplicitSectionGlobal(
bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
const TargetMachine &TM) const {
// Only global variables, not functions.
- DEBUG(dbgs() << "Checking if value is in small-data, -G"
- << SmallDataThreshold << ": \"" << GO->getName() << "\": ");
+ LLVM_DEBUG(dbgs() << "Checking if value is in small-data, -G"
+ << SmallDataThreshold << ": \"" << GO->getName() << "\": ");
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GO);
if (!GVar) {
- DEBUG(dbgs() << "no, not a global variable\n");
+ LLVM_DEBUG(dbgs() << "no, not a global variable\n");
return false;
}
@@ -213,19 +213,19 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
// small data or not. This is how we can support mixing -G0/-G8 in LTO.
if (GVar->hasSection()) {
bool IsSmall = isSmallDataSection(GVar->getSection());
- DEBUG(dbgs() << (IsSmall ? "yes" : "no") << ", has section: "
- << GVar->getSection() << '\n');
+ LLVM_DEBUG(dbgs() << (IsSmall ? "yes" : "no")
+ << ", has section: " << GVar->getSection() << '\n');
return IsSmall;
}
if (GVar->isConstant()) {
- DEBUG(dbgs() << "no, is a constant\n");
+ LLVM_DEBUG(dbgs() << "no, is a constant\n");
return false;
}
bool IsLocal = GVar->hasLocalLinkage();
if (!StaticsInSData && IsLocal) {
- DEBUG(dbgs() << "no, is static\n");
+ LLVM_DEBUG(dbgs() << "no, is static\n");
return false;
}
@@ -234,7 +234,7 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
GType = PT->getElementType();
if (isa<ArrayType>(GType)) {
- DEBUG(dbgs() << "no, is an array\n");
+ LLVM_DEBUG(dbgs() << "no, is an array\n");
return false;
}
@@ -244,22 +244,22 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
// these objects end up in the sdata, the references will still be valid.
if (StructType *ST = dyn_cast<StructType>(GType)) {
if (ST->isOpaque()) {
- DEBUG(dbgs() << "no, has opaque type\n");
+ LLVM_DEBUG(dbgs() << "no, has opaque type\n");
return false;
}
}
unsigned Size = GVar->getParent()->getDataLayout().getTypeAllocSize(GType);
if (Size == 0) {
- DEBUG(dbgs() << "no, has size 0\n");
+ LLVM_DEBUG(dbgs() << "no, has size 0\n");
return false;
}
if (Size > SmallDataThreshold) {
- DEBUG(dbgs() << "no, size exceeds sdata threshold: " << Size << '\n');
+ LLVM_DEBUG(dbgs() << "no, size exceeds sdata threshold: " << Size << '\n');
return false;
}
- DEBUG(dbgs() << "yes\n");
+ LLVM_DEBUG(dbgs() << "yes\n");
return true;
}
diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index d638503990ad..a496a17788d5 100644
--- a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -16,23 +16,59 @@
#include "HexagonTargetTransformInfo.h"
#include "HexagonSubtarget.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/User.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
using namespace llvm;
#define DEBUG_TYPE "hexagontti"
+static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false),
+ cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
+
static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
cl::init(true), cl::Hidden,
cl::desc("Control lookup table emission on Hexagon target"));
+// Constant "cost factor" to make floating point operations more expensive
+// in terms of vectorization cost. This isn't the best way, but it should
+// do. Ultimately, the cost should use cycles.
+static const unsigned FloatFactor = 4;
+
+bool HexagonTTIImpl::useHVX() const {
+ return ST.useHVXOps() && HexagonAutoHVX;
+}
+
+bool HexagonTTIImpl::isTypeForHVX(Type *VecTy) const {
+ assert(VecTy->isVectorTy());
+ // Avoid types like <2 x i32*>.
+ if (!cast<VectorType>(VecTy)->getElementType()->isIntegerTy())
+ return false;
+ EVT VecVT = EVT::getEVT(VecTy);
+ if (!VecVT.isSimple() || VecVT.getSizeInBits() <= 64)
+ return false;
+ if (ST.isHVXVectorType(VecVT.getSimpleVT()))
+ return true;
+ auto Action = TLI.getPreferredVectorAction(VecVT);
+ return Action == TargetLoweringBase::TypeWidenVector;
+}
+
+unsigned HexagonTTIImpl::getTypeNumElements(Type *Ty) const {
+ if (Ty->isVectorTy())
+ return Ty->getVectorNumElements();
+ assert((Ty->isIntegerTy() || Ty->isFloatingPointTy()) &&
+ "Expecting scalar type");
+ return 1;
+}
+
TargetTransformInfo::PopcntSupportKind
HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
- // Return Fast Hardware support as every input < 64 bits will be promoted
+ // Return fast hardware support as every input < 64 bits will be promoted
// to 64 bits.
return TargetTransformInfo::PSK_FastHardware;
}
@@ -41,37 +77,223 @@ HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
UP.Runtime = UP.Partial = true;
+ // Only try to peel innermost loops with small runtime trip counts.
+ if (L && L->empty() && canPeel(L) &&
+ SE.getSmallConstantTripCount(L) == 0 &&
+ SE.getSmallConstantMaxTripCount(L) > 0 &&
+ SE.getSmallConstantMaxTripCount(L) <= 5) {
+ UP.PeelCount = 2;
+ }
+}
+
+bool HexagonTTIImpl::shouldFavorPostInc() const {
+ return true;
+}
+
+/// --- Vector TTI begin ---
+
+unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const {
+ if (Vector)
+ return useHVX() ? 32 : 0;
+ return 32;
+}
+
+unsigned HexagonTTIImpl::getMaxInterleaveFactor(unsigned VF) {
+ return useHVX() ? 2 : 0;
+}
+
+unsigned HexagonTTIImpl::getRegisterBitWidth(bool Vector) const {
+ return Vector ? getMinVectorRegisterBitWidth() : 32;
+}
+
+unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth() const {
+ return useHVX() ? ST.getVectorLength()*8 : 0;
+}
+
+unsigned HexagonTTIImpl::getMinimumVF(unsigned ElemWidth) const {
+ return (8 * ST.getVectorLength()) / ElemWidth;
+}
+
+unsigned HexagonTTIImpl::getScalarizationOverhead(Type *Ty, bool Insert,
+ bool Extract) {
+ return BaseT::getScalarizationOverhead(Ty, Insert, Extract);
+}
+
+unsigned HexagonTTIImpl::getOperandsScalarizationOverhead(
+ ArrayRef<const Value*> Args, unsigned VF) {
+ return BaseT::getOperandsScalarizationOverhead(Args, VF);
+}
+
+unsigned HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy,
+ ArrayRef<Type*> Tys) {
+ return BaseT::getCallInstrCost(F, RetTy, Tys);
+}
+
+unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
+ return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
+}
+
+unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Type*> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed) {
+ if (ID == Intrinsic::bswap) {
+ std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, RetTy);
+ return LT.first + 2;
+ }
+ return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
+ ScalarizationCostPassed);
+}
+
+unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp,
+ ScalarEvolution *SE, const SCEV *S) {
+ return 0;
+}
+
+unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment, unsigned AddressSpace, const Instruction *I) {
+ assert(Opcode == Instruction::Load || Opcode == Instruction::Store);
+ if (Opcode == Instruction::Store)
+ return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
+
+ if (Src->isVectorTy()) {
+ VectorType *VecTy = cast<VectorType>(Src);
+ unsigned VecWidth = VecTy->getBitWidth();
+ if (useHVX() && isTypeForHVX(VecTy)) {
+ unsigned RegWidth = getRegisterBitWidth(true);
+ Alignment = std::min(Alignment, RegWidth/8);
+ // Cost of HVX loads.
+ if (VecWidth % RegWidth == 0)
+ return VecWidth / RegWidth;
+ // Cost of constructing HVX vector from scalar loads.
+ unsigned AlignWidth = 8 * std::max(1u, Alignment);
+ unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
+ return 3*NumLoads;
+ }
+
+ // Non-HVX vectors.
+ // Add extra cost for floating point types.
+ unsigned Cost = VecTy->getElementType()->isFloatingPointTy() ? FloatFactor
+ : 1;
+ Alignment = std::min(Alignment, 8u);
+ unsigned AlignWidth = 8 * std::max(1u, Alignment);
+ unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
+ if (Alignment == 4 || Alignment == 8)
+ return Cost * NumLoads;
+ // Loads of less than 32 bits will need extra inserts to compose a vector.
+ unsigned LogA = Log2_32(Alignment);
+ return (3 - LogA) * Cost * NumLoads;
+ }
+
+ return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
+}
+
+unsigned HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode,
+ Type *Src, unsigned Alignment, unsigned AddressSpace) {
+ return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+}
+
+unsigned HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) {
+ return 1;
+}
+
+unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+ Value *Ptr, bool VariableMask, unsigned Alignment) {
+ return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
+ Alignment);
+}
+
+unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode,
+ Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ unsigned Alignment, unsigned AddressSpace) {
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace);
+}
+
+unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy, const Instruction *I) {
+ if (ValTy->isVectorTy()) {
+ std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy);
+ if (Opcode == Instruction::FCmp)
+ return LT.first + FloatFactor * getTypeNumElements(ValTy);
+ }
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
-unsigned HexagonTTIImpl::getNumberOfRegisters(bool vector) const {
- return vector ? 0 : 32;
+unsigned HexagonTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
+ TTI::OperandValueProperties Opd1PropInfo,
+ TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value*> Args) {
+ if (Ty->isVectorTy()) {
+ std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty);
+ if (LT.second.isFloatingPoint())
+ return LT.first + FloatFactor * getTypeNumElements(Ty);
+ }
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+ Opd1PropInfo, Opd2PropInfo, Args);
}
+unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy,
+ Type *SrcTy, const Instruction *I) {
+ if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) {
+ unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0;
+ unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0;
+
+ std::pair<int, MVT> SrcLT = TLI.getTypeLegalizationCost(DL, SrcTy);
+ std::pair<int, MVT> DstLT = TLI.getTypeLegalizationCost(DL, DstTy);
+ return std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN);
+ }
+ return 1;
+}
+
+unsigned HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) {
+ Type *ElemTy = Val->isVectorTy() ? cast<VectorType>(Val)->getElementType()
+ : Val;
+ if (Opcode == Instruction::InsertElement) {
+ // Need two rotations for non-zero index.
+ unsigned Cost = (Index != 0) ? 2 : 0;
+ if (ElemTy->isIntegerTy(32))
+ return Cost;
+ // If it's not a 32-bit value, there will need to be an extract.
+ return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, Index);
+ }
+
+ if (Opcode == Instruction::ExtractElement)
+ return 2;
+
+ return 1;
+}
+
+/// --- Vector TTI end ---
+
unsigned HexagonTTIImpl::getPrefetchDistance() const {
- return getST()->getL1PrefetchDistance();
+ return ST.getL1PrefetchDistance();
}
unsigned HexagonTTIImpl::getCacheLineSize() const {
- return getST()->getL1CacheLineSize();
+ return ST.getL1CacheLineSize();
}
int HexagonTTIImpl::getUserCost(const User *U,
ArrayRef<const Value *> Operands) {
- auto isCastFoldedIntoLoad = [](const CastInst *CI) -> bool {
+ auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool {
if (!CI->isIntegerCast())
return false;
+ // Only extensions from an integer type shorter than 32-bit to i32
+ // can be folded into the load.
+ const DataLayout &DL = getDataLayout();
+ unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy());
+ unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy());
+ if (DBW != 32 || SBW >= DBW)
+ return false;
+
const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
// Technically, this code could allow multiple uses of the load, and
// check if all the uses are the same extension operation, but this
// should be sufficient for most cases.
- if (!LI || !LI->hasOneUse())
- return false;
-
- // Only extensions from an integer type shorter than 32-bit to i32
- // can be folded into the load.
- unsigned SBW = CI->getSrcTy()->getIntegerBitWidth();
- unsigned DBW = CI->getDestTy()->getIntegerBitWidth();
- return DBW == 32 && (SBW < DBW);
+ return LI && LI->hasOneUse();
};
if (const CastInst *CI = dyn_cast<const CastInst>(U))
@@ -81,5 +303,5 @@ int HexagonTTIImpl::getUserCost(const User *U,
}
bool HexagonTTIImpl::shouldBuildLookupTables() const {
- return EmitLookupTables;
+ return EmitLookupTables;
}
diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index d2cd05012afa..a232f99fc407 100644
--- a/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -37,16 +37,24 @@ class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
friend BaseT;
- const HexagonSubtarget *ST;
- const HexagonTargetLowering *TLI;
+ const HexagonSubtarget &ST;
+ const HexagonTargetLowering &TLI;
- const HexagonSubtarget *getST() const { return ST; }
- const HexagonTargetLowering *getTLI() const { return TLI; }
+ const HexagonSubtarget *getST() const { return &ST; }
+ const HexagonTargetLowering *getTLI() const { return &TLI; }
+
+ bool useHVX() const;
+ bool isTypeForHVX(Type *VecTy) const;
+
+ // Returns the number of vector elements of Ty, if Ty is a vector type,
+ // or 1 if Ty is a scalar type. It is incorrect to call this function
+ // with any other type.
+ unsigned getTypeNumElements(Type *Ty) const;
public:
explicit HexagonTTIImpl(const HexagonTargetMachine *TM, const Function &F)
- : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
- TLI(ST->getTargetLowering()) {}
+ : BaseT(TM, F.getParent()->getDataLayout()),
+ ST(*TM->getSubtargetImpl(F)), TLI(*ST.getTargetLowering()) {}
/// \name Scalar TTI Implementations
/// @{
@@ -57,6 +65,9 @@ public:
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ /// Bias LSR towards creating post-increment opportunities.
+ bool shouldFavorPostInc() const;
+
// L1 cache prefetch.
unsigned getPrefetchDistance() const;
unsigned getCacheLineSize() const;
@@ -67,6 +78,64 @@ public:
/// @{
unsigned getNumberOfRegisters(bool vector) const;
+ unsigned getMaxInterleaveFactor(unsigned VF);
+ unsigned getRegisterBitWidth(bool Vector) const;
+ unsigned getMinVectorRegisterBitWidth() const;
+ unsigned getMinimumVF(unsigned ElemWidth) const;
+
+ bool shouldMaximizeVectorBandwidth(bool OptSize) const {
+ return true;
+ }
+ bool supportsEfficientVectorElementLoadStore() {
+ return false;
+ }
+ bool hasBranchDivergence() {
+ return false;
+ }
+ bool enableAggressiveInterleaving(bool LoopHasReductions) {
+ return false;
+ }
+ bool prefersVectorizedAddressing() {
+ return false;
+ }
+
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
+ unsigned getOperandsScalarizationOverhead(ArrayRef<const Value*> Args,
+ unsigned VF);
+ unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type*> Tys);
+ unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF);
+ unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+ ArrayRef<Type*> Tys, FastMathFlags FMF,
+ unsigned ScalarizationCostPassed = UINT_MAX);
+ unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE,
+ const SCEV *S);
+ unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace, const Instruction *I = nullptr);
+ unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace);
+ unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp);
+ unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
+ bool VariableMask, unsigned Alignment);
+ unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+ unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment,
+ unsigned AddressSpace);
+ unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ const Instruction *I);
+ unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>());
+ unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ const Instruction *I = nullptr);
+ unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+
+ unsigned getCFInstrCost(unsigned Opcode) {
+ return 1;
+ }
/// @}
@@ -77,5 +146,4 @@ public:
};
} // end namespace llvm
-
#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H
diff --git a/lib/Target/Hexagon/HexagonVExtract.cpp b/lib/Target/Hexagon/HexagonVExtract.cpp
new file mode 100644
index 000000000000..929ac2bd0d93
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonVExtract.cpp
@@ -0,0 +1,166 @@
+//===- HexagonVExtract.cpp ------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass will replace multiple occurrences of V6_extractw from the same
+// vector register with a combination of a vector store and scalar loads.
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/PassSupport.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+
+#include <map>
+
+using namespace llvm;
+
+static cl::opt<unsigned> VExtractThreshold("hexagon-vextract-threshold",
+ cl::Hidden, cl::ZeroOrMore, cl::init(1),
+ cl::desc("Threshold for triggering vextract replacement"));
+
+namespace llvm {
+ void initializeHexagonVExtractPass(PassRegistry& Registry);
+ FunctionPass *createHexagonVExtract();
+}
+
+namespace {
+ class HexagonVExtract : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonVExtract() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "Hexagon optimize vextract";
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ const HexagonSubtarget *HST = nullptr;
+ const HexagonInstrInfo *HII = nullptr;
+
+ unsigned genElemLoad(MachineInstr *ExtI, unsigned BaseR,
+ MachineRegisterInfo &MRI);
+ };
+
+ char HexagonVExtract::ID = 0;
+}
+
+INITIALIZE_PASS(HexagonVExtract, "hexagon-vextract",
+ "Hexagon optimize vextract", false, false)
+
+unsigned HexagonVExtract::genElemLoad(MachineInstr *ExtI, unsigned BaseR,
+ MachineRegisterInfo &MRI) {
+ MachineBasicBlock &ExtB = *ExtI->getParent();
+ DebugLoc DL = ExtI->getDebugLoc();
+ unsigned ElemR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+
+ unsigned ExtIdxR = ExtI->getOperand(2).getReg();
+ unsigned ExtIdxS = ExtI->getOperand(2).getSubReg();
+
+ // Simplified check for a compile-time constant value of ExtIdxR.
+ if (ExtIdxS == 0) {
+ MachineInstr *DI = MRI.getVRegDef(ExtIdxR);
+ if (DI->getOpcode() == Hexagon::A2_tfrsi) {
+ unsigned V = DI->getOperand(1).getImm();
+ V &= (HST->getVectorLength()-1) & -4u;
+
+ BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::L2_loadri_io), ElemR)
+ .addReg(BaseR)
+ .addImm(V);
+ return ElemR;
+ }
+ }
+
+ unsigned IdxR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::A2_andir), IdxR)
+ .add(ExtI->getOperand(2))
+ .addImm(-4);
+ BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::L4_loadri_rr), ElemR)
+ .addReg(BaseR)
+ .addReg(IdxR)
+ .addImm(0);
+ return ElemR;
+}
+
+bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) {
+ HST = &MF.getSubtarget<HexagonSubtarget>();
+ HII = HST->getInstrInfo();
+ const auto &HRI = *HST->getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ std::map<unsigned, SmallVector<MachineInstr*,4>> VExtractMap;
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ unsigned Opc = MI.getOpcode();
+ if (Opc != Hexagon::V6_extractw)
+ continue;
+ unsigned VecR = MI.getOperand(1).getReg();
+ VExtractMap[VecR].push_back(&MI);
+ }
+ }
+
+ for (auto &P : VExtractMap) {
+ unsigned VecR = P.first;
+ if (P.second.size() <= VExtractThreshold)
+ continue;
+
+ const auto &VecRC = *MRI.getRegClass(VecR);
+ int FI = MFI.CreateSpillStackObject(HRI.getSpillSize(VecRC),
+ HRI.getSpillAlignment(VecRC));
+ MachineInstr *DefI = MRI.getVRegDef(VecR);
+ MachineBasicBlock::iterator At = std::next(DefI->getIterator());
+ MachineBasicBlock &DefB = *DefI->getParent();
+ unsigned StoreOpc = VecRC.getID() == Hexagon::HvxVRRegClassID
+ ? Hexagon::V6_vS32b_ai
+ : Hexagon::PS_vstorerw_ai;
+ BuildMI(DefB, At, DefI->getDebugLoc(), HII->get(StoreOpc))
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addReg(VecR);
+
+ unsigned VecSize = HRI.getRegSizeInBits(VecRC) / 8;
+
+ for (MachineInstr *ExtI : P.second) {
+ assert(ExtI->getOpcode() == Hexagon::V6_extractw);
+ unsigned SR = ExtI->getOperand(1).getSubReg();
+ assert(ExtI->getOperand(1).getReg() == VecR);
+
+ MachineBasicBlock &ExtB = *ExtI->getParent();
+ DebugLoc DL = ExtI->getDebugLoc();
+ unsigned BaseR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+ BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::PS_fi), BaseR)
+ .addFrameIndex(FI)
+ .addImm(SR == 0 ? 0 : VecSize/2);
+
+ unsigned ElemR = genElemLoad(ExtI, BaseR, MRI);
+ unsigned ExtR = ExtI->getOperand(0).getReg();
+ MRI.replaceRegWith(ExtR, ElemR);
+ ExtB.erase(ExtI);
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+FunctionPass *llvm::createHexagonVExtract() {
+ return new HexagonVExtract();
+}
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index c2404235091c..56ab69db9bd1 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -199,11 +199,12 @@ static MachineBasicBlock::iterator moveInstrOut(MachineInstr &MI,
}
bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
- if (DisablePacketizer || skipFunction(MF.getFunction()))
+ auto &HST = MF.getSubtarget<HexagonSubtarget>();
+ if (DisablePacketizer || !HST.usePackets() || skipFunction(MF.getFunction()))
return false;
- HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
- HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+ HII = HST.getInstrInfo();
+ HRI = HST.getRegisterInfo();
auto &MLI = getAnalysis<MachineLoopInfo>();
auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
@@ -374,7 +375,7 @@ bool HexagonPacketizerList::promoteToDotCur(MachineInstr &MI,
void HexagonPacketizerList::cleanUpDotCur() {
MachineInstr *MI = nullptr;
for (auto BI : CurrentPacketMIs) {
- DEBUG(dbgs() << "Cleanup packet has "; BI->dump(););
+ LLVM_DEBUG(dbgs() << "Cleanup packet has "; BI->dump(););
if (HII->isDotCurInst(*BI)) {
MI = BI;
continue;
@@ -389,7 +390,7 @@ void HexagonPacketizerList::cleanUpDotCur() {
return;
// We did not find a use of the CUR, so de-cur it.
MI->setDesc(HII->get(HII->getNonDotCurOp(*MI)));
- DEBUG(dbgs() << "Demoted CUR "; MI->dump(););
+ LLVM_DEBUG(dbgs() << "Demoted CUR "; MI->dump(););
}
// Check to see if an instruction can be dot cur.
@@ -413,11 +414,10 @@ bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr &MI,
return false;
// Make sure candidate instruction uses cur.
- DEBUG(dbgs() << "Can we DOT Cur Vector MI\n";
- MI.dump();
- dbgs() << "in packet\n";);
+ LLVM_DEBUG(dbgs() << "Can we DOT Cur Vector MI\n"; MI.dump();
+ dbgs() << "in packet\n";);
MachineInstr &MJ = *MII;
- DEBUG({
+ LLVM_DEBUG({
dbgs() << "Checking CUR against ";
MJ.dump();
});
@@ -432,12 +432,12 @@ bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr &MI,
// Check for existing uses of a vector register within the packet which
// would be affected by converting a vector load into .cur formt.
for (auto BI : CurrentPacketMIs) {
- DEBUG(dbgs() << "packet has "; BI->dump(););
+ LLVM_DEBUG(dbgs() << "packet has "; BI->dump(););
if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo()))
return false;
}
- DEBUG(dbgs() << "Can Dot CUR MI\n"; MI.dump(););
+ LLVM_DEBUG(dbgs() << "Can Dot CUR MI\n"; MI.dump(););
// We can convert the opcode into a .cur.
return true;
}
@@ -529,6 +529,9 @@ bool HexagonPacketizerList::updateOffset(SUnit *SUI, SUnit *SUJ) {
return false;
int64_t Offset = MI.getOperand(OPI).getImm();
+ if (!HII->isValidOffset(MI.getOpcode(), Offset+Incr, HRI))
+ return false;
+
MI.getOperand(OPI).setImm(Offset + Incr);
ChangedOffset = Offset;
return true;
@@ -1033,7 +1036,7 @@ void HexagonPacketizerList::initPacketizerState() {
// Ignore bundling of pseudo instructions.
bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr &MI,
const MachineBasicBlock *) {
- if (MI.isDebugValue())
+ if (MI.isDebugInstr())
return true;
if (MI.isCFIInstruction())
@@ -1095,7 +1098,7 @@ bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) {
static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ,
const HexagonInstrInfo &HII) {
const MachineFunction *MF = MI.getParent()->getParent();
- if (MF->getSubtarget<HexagonSubtarget>().hasV60TOpsOnly() &&
+ if (MF->getSubtarget<HexagonSubtarget>().hasV60OpsOnly() &&
HII.isHVXMemWithAIndirect(MI, MJ))
return true;
@@ -1112,6 +1115,10 @@ static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ,
case Hexagon::S4_stored_locked:
case Hexagon::L2_loadw_locked:
case Hexagon::L4_loadd_locked:
+ case Hexagon::Y2_dccleana:
+ case Hexagon::Y2_dccleaninva:
+ case Hexagon::Y2_dcinva:
+ case Hexagon::Y2_dczeroa:
case Hexagon::Y4_l2fetch:
case Hexagon::Y5_l2fetch: {
// These instructions can only be grouped with ALU32 or non-floating-point
@@ -1513,7 +1520,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
bool IsVecJ = HII->isHVXVec(J);
bool IsVecI = HII->isHVXVec(I);
- if (Slot1Store && MF.getSubtarget<HexagonSubtarget>().hasV65TOps() &&
+ if (Slot1Store && MF.getSubtarget<HexagonSubtarget>().hasV65Ops() &&
((LoadJ && StoreI && !NVStoreI) ||
(StoreJ && LoadI && !NVStoreJ)) &&
(J.getOpcode() != Hexagon::S2_allocframe &&
@@ -1683,8 +1690,12 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) {
PacketStalls = false;
PacketStalls |= producesStall(MI);
- if (MI.isImplicitDef())
+ if (MI.isImplicitDef()) {
+ // Add to the packet to allow subsequent instructions to be checked
+ // properly.
+ CurrentPacketMIs.push_back(&MI);
return MII;
+ }
assert(ResourceTracker->canReserveResources(MI));
bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI);
@@ -1754,7 +1765,7 @@ void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
bool memShufDisabled = getmemShufDisabled();
if (memShufDisabled && !foundLSInPacket()) {
setmemShufDisabled(false);
- DEBUG(dbgs() << " Not added to NoShufPacket\n");
+ LLVM_DEBUG(dbgs() << " Not added to NoShufPacket\n");
}
memShufDisabled = getmemShufDisabled();
@@ -1773,7 +1784,7 @@ void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
CurrentPacketMIs.clear();
ResourceTracker->clearResources();
- DEBUG(dbgs() << "End packet\n");
+ LLVM_DEBUG(dbgs() << "End packet\n");
}
bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) {
@@ -1803,17 +1814,18 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) {
SUnit *SUI = MIToSUnit[const_cast<MachineInstr *>(&I)];
- // Check if the latency is 0 between this instruction and any instruction
- // in the current packet. If so, we disregard any potential stalls due to
- // the instructions in the previous packet. Most of the instruction pairs
- // that can go together in the same packet have 0 latency between them.
- // Only exceptions are newValueJumps as they're generated much later and
- // the latencies can't be changed at that point. Another is .cur
- // instructions if its consumer has a 0 latency successor (such as .new).
- // In this case, the latency between .cur and the consumer stays non-zero
- // even though we can have both .cur and .new in the same packet. Changing
- // the latency to 0 is not an option as it causes software pipeliner to
- // not pipeline in some cases.
+ // If the latency is 0 and there is a data dependence between this
+ // instruction and any instruction in the current packet, we disregard any
+ // potential stalls due to the instructions in the previous packet. Most of
+ // the instruction pairs that can go together in the same packet have 0
+ // latency between them. The exceptions are
+ // 1. NewValueJumps as they're generated much later and the latencies can't
+ // be changed at that point.
+ // 2. .cur instructions, if its consumer has a 0 latency successor (such as
+ // .new). In this case, the latency between .cur and the consumer stays
+ // non-zero even though we can have both .cur and .new in the same packet.
+ // Changing the latency to 0 is not an option as it causes software pipeliner
+ // to not pipeline in some cases.
// For Example:
// {
@@ -1826,19 +1838,10 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) {
for (auto J : CurrentPacketMIs) {
SUnit *SUJ = MIToSUnit[J];
for (auto &Pred : SUI->Preds)
- if (Pred.getSUnit() == SUJ &&
- (Pred.getLatency() == 0 || HII->isNewValueJump(I) ||
- HII->isToBeScheduledASAP(*J, I)))
- return false;
- }
-
- // Check if the latency is greater than one between this instruction and any
- // instruction in the previous packet.
- for (auto J : OldPacketMIs) {
- SUnit *SUJ = MIToSUnit[J];
- for (auto &Pred : SUI->Preds)
- if (Pred.getSUnit() == SUJ && Pred.getLatency() > 1)
- return true;
+ if (Pred.getSUnit() == SUJ)
+ if ((Pred.getLatency() == 0 && Pred.isAssignedRegDep()) ||
+ HII->isNewValueJump(I) || HII->isToBeScheduledASAP(*J, I))
+ return false;
}
// Check if the latency is greater than one between this instruction and any
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/lib/Target/Hexagon/HexagonVLIWPacketizer.h
index 764d9ae9059a..40dcee3441a2 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.h
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@@ -59,7 +59,7 @@ class HexagonPacketizerList : public VLIWPacketizerList {
bool PacketStalls = false;
protected:
- /// \brief A handle to the branch probability pass.
+ /// A handle to the branch probability pass.
const MachineBranchProbabilityInfo *MBPI;
const MachineLoopInfo *MLI;
diff --git a/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
index 39395dbd3aec..9d1073346c72 100644
--- a/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
+++ b/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
@@ -138,6 +138,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -363,17 +364,18 @@ bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
if (II &&
(II->getIntrinsicID() == Intrinsic::hexagon_V6_hi ||
II->getIntrinsicID() == Intrinsic::hexagon_V6_lo)) {
- DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
+ LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
return false;
}
return true;
}
void HexagonVectorLoopCarriedReuse::findValueToReuse() {
for (auto *D : Dependences) {
- DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
+ LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
if (D->iterations() > HexagonVLCRIterationLim) {
- DEBUG(dbgs() <<
- ".. Skipping because number of iterations > than the limit\n");
+ LLVM_DEBUG(
+ dbgs()
+ << ".. Skipping because number of iterations > than the limit\n");
continue;
}
@@ -381,7 +383,8 @@ void HexagonVectorLoopCarriedReuse::findValueToReuse() {
Instruction *BEInst = D->back();
int Iters = D->iterations();
BasicBlock *BB = PN->getParent();
- DEBUG(dbgs() << "Checking if any uses of " << *PN << " can be reused\n");
+ LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
+ << " can be reused\n");
SmallVector<Instruction *, 4> PNUsers;
for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) {
@@ -391,7 +394,8 @@ void HexagonVectorLoopCarriedReuse::findValueToReuse() {
if (User->getParent() != BB)
continue;
if (ReplacedInsts.count(User)) {
- DEBUG(dbgs() << *User << " has already been replaced. Skipping...\n");
+ LLVM_DEBUG(dbgs() << *User
+ << " has already been replaced. Skipping...\n");
continue;
}
if (isa<PHINode>(User))
@@ -403,7 +407,7 @@ void HexagonVectorLoopCarriedReuse::findValueToReuse() {
PNUsers.push_back(User);
}
- DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
+ LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
// For each interesting use I of PN, find an Instruction BEUser that
// performs the same operation as I on BEInst and whose other operands,
@@ -439,7 +443,7 @@ void HexagonVectorLoopCarriedReuse::findValueToReuse() {
}
}
if (BEUser) {
- DEBUG(dbgs() << "Found Value for reuse.\n");
+ LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
ReuseCandidate.Inst2Replace = I;
ReuseCandidate.BackedgeInst = BEUser;
return;
@@ -460,7 +464,7 @@ Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,
}
void HexagonVectorLoopCarriedReuse::reuseValue() {
- DEBUG(dbgs() << ReuseCandidate);
+ LLVM_DEBUG(dbgs() << ReuseCandidate);
Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
Instruction *BEInst = ReuseCandidate.BackedgeInst;
int NumOperands = Inst2Replace->getNumOperands();
@@ -485,7 +489,7 @@ void HexagonVectorLoopCarriedReuse::reuseValue() {
}
}
- DEBUG(dbgs() << "reuseValue is making the following changes\n");
+ LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
SmallVector<Instruction *, 4> InstsInPreheader;
for (int i = 0; i < Iterations; ++i) {
@@ -506,8 +510,8 @@ void HexagonVectorLoopCarriedReuse::reuseValue() {
InstsInPreheader.push_back(InstInPreheader);
InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");
InstInPreheader->insertBefore(LoopPH->getTerminator());
- DEBUG(dbgs() << "Added " << *InstInPreheader << " to " << LoopPH->getName()
- << "\n");
+ LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "
+ << LoopPH->getName() << "\n");
}
BasicBlock *BB = BEInst->getParent();
IRBuilder<> IRB(BB);
@@ -519,7 +523,8 @@ void HexagonVectorLoopCarriedReuse::reuseValue() {
NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);
NewPhi->addIncoming(InstInPreheader, LoopPH);
NewPhi->addIncoming(BEVal, BB);
- DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName()
+ << "\n");
BEVal = NewPhi;
}
// We are in LCSSA form. So, a value defined inside the Loop is used only
@@ -538,7 +543,7 @@ bool HexagonVectorLoopCarriedReuse::doVLCR() {
bool Changed = false;
bool Continue;
- DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");
+ LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");
do {
// Reset datastructures.
Dependences.clear();
@@ -625,10 +630,9 @@ void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
else
delete D;
}
- DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
- DEBUG(for (size_t i = 0; i < Dependences.size(); ++i) {
- dbgs() << *Dependences[i] << "\n";
- });
+ LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
+ LLVM_DEBUG(for (size_t i = 0; i < Dependences.size();
+ ++i) { dbgs() << *Dependences[i] << "\n"; });
}
Pass *llvm::createHexagonVectorLoopCarriedReusePass() {
diff --git a/lib/Target/Hexagon/HexagonVectorPrint.cpp b/lib/Target/Hexagon/HexagonVectorPrint.cpp
index ddd668b2cb1e..18d2f2f4acde 100644
--- a/lib/Target/Hexagon/HexagonVectorPrint.cpp
+++ b/lib/Target/Hexagon/HexagonVectorPrint.cpp
@@ -144,14 +144,15 @@ bool HexagonVectorPrint::runOnMachineFunction(MachineFunction &Fn) {
unsigned Reg = 0;
if (getInstrVecReg(*MII, Reg)) {
VecPrintList.push_back((&*MII));
- DEBUG(dbgs() << "Found vector reg inside bundle \n"; MII->dump());
+ LLVM_DEBUG(dbgs() << "Found vector reg inside bundle \n";
+ MII->dump());
}
}
} else {
unsigned Reg = 0;
if (getInstrVecReg(MI, Reg)) {
VecPrintList.push_back(&MI);
- DEBUG(dbgs() << "Found vector reg \n"; MI.dump());
+ LLVM_DEBUG(dbgs() << "Found vector reg \n"; MI.dump());
}
}
}
@@ -163,33 +164,33 @@ bool HexagonVectorPrint::runOnMachineFunction(MachineFunction &Fn) {
for (auto *I : VecPrintList) {
DebugLoc DL = I->getDebugLoc();
MachineBasicBlock *MBB = I->getParent();
- DEBUG(dbgs() << "Evaluating V MI\n"; I->dump());
+ LLVM_DEBUG(dbgs() << "Evaluating V MI\n"; I->dump());
unsigned Reg = 0;
if (!getInstrVecReg(*I, Reg))
llvm_unreachable("Need a vector reg");
MachineBasicBlock::instr_iterator MII = I->getIterator();
if (I->isInsideBundle()) {
- DEBUG(dbgs() << "add to end of bundle\n"; I->dump());
+ LLVM_DEBUG(dbgs() << "add to end of bundle\n"; I->dump());
while (MBB->instr_end() != MII && MII->isInsideBundle())
MII++;
} else {
- DEBUG(dbgs() << "add after instruction\n"; I->dump());
+ LLVM_DEBUG(dbgs() << "add after instruction\n"; I->dump());
MII++;
}
if (MBB->instr_end() == MII)
continue;
if (Reg >= Hexagon::V0 && Reg <= Hexagon::V31) {
- DEBUG(dbgs() << "adding dump for V" << Reg-Hexagon::V0 << '\n');
+ LLVM_DEBUG(dbgs() << "adding dump for V" << Reg - Hexagon::V0 << '\n');
addAsmInstr(MBB, Reg, MII, DL, QII, Fn);
} else if (Reg >= Hexagon::W0 && Reg <= Hexagon::W15) {
- DEBUG(dbgs() << "adding dump for W" << Reg-Hexagon::W0 << '\n');
+ LLVM_DEBUG(dbgs() << "adding dump for W" << Reg - Hexagon::W0 << '\n');
addAsmInstr(MBB, Hexagon::V0 + (Reg - Hexagon::W0) * 2 + 1,
MII, DL, QII, Fn);
addAsmInstr(MBB, Hexagon::V0 + (Reg - Hexagon::W0) * 2,
MII, DL, QII, Fn);
} else if (Reg >= Hexagon::Q0 && Reg <= Hexagon::Q3) {
- DEBUG(dbgs() << "adding dump for Q" << Reg-Hexagon::Q0 << '\n');
+ LLVM_DEBUG(dbgs() << "adding dump for Q" << Reg - Hexagon::Q0 << '\n');
addAsmInstr(MBB, Reg, MII, DL, QII, Fn);
} else
llvm_unreachable("Bad Vector reg");
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index b3ab6763281c..af1e5429d0c2 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -51,7 +51,7 @@ class HexagonAsmBackend : public MCAsmBackend {
SmallVector<MCFixup, 4> Fixups;
SmallString<256> Code;
raw_svector_ostream VecOS(Code);
- E.encodeInstruction(HMB, VecOS, Fixups, RF.getSubtargetInfo());
+ E.encodeInstruction(HMB, VecOS, Fixups, *RF.getSubtargetInfo());
// Update the fragment.
RF.setInst(HMB);
@@ -61,13 +61,14 @@ class HexagonAsmBackend : public MCAsmBackend {
public:
HexagonAsmBackend(const Target &T, const Triple &TT, uint8_t OSABI,
- StringRef CPU) :
- OSABI(OSABI), CPU(CPU), MCII(T.createMCInstrInfo()),
- RelaxTarget(new MCInst *), Extender(nullptr) {}
-
- std::unique_ptr<MCObjectWriter>
- createObjectWriter(raw_pwrite_stream &OS) const override {
- return createHexagonELFObjectWriter(OS, OSABI, CPU);
+ StringRef CPU)
+ : MCAsmBackend(support::little), OSABI(OSABI), CPU(CPU),
+ MCII(T.createMCInstrInfo()), RelaxTarget(new MCInst *),
+ Extender(nullptr) {}
+
+ std::unique_ptr<MCObjectTargetWriter>
+ createObjectTargetWriter() const override {
+ return createHexagonELFObjectWriter(OSABI, CPU);
}
void setExtender(MCContext &Context) const {
@@ -413,7 +414,8 @@ public:
/// fixup kind as appropriate.
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
- uint64_t FixupValue, bool IsResolved) const override {
+ uint64_t FixupValue, bool IsResolved,
+ const MCSubtargetInfo *STI) const override {
// When FixupValue is 0 the relocation is external and there
// is nothing for us to do.
@@ -510,17 +512,15 @@ public:
break;
}
- DEBUG(dbgs() << "Name=" << getFixupKindInfo(Kind).Name << "(" <<
- (unsigned)Kind << ")\n");
- DEBUG(uint32_t OldData = 0;
- for (unsigned i = 0; i < NumBytes; i++)
- OldData |= (InstAddr[i] << (i * 8)) & (0xff << (i * 8));
- dbgs() << "\tBValue=0x"; dbgs().write_hex(Value) <<
- ": AValue=0x"; dbgs().write_hex(FixupValue) <<
- ": Offset=" << Offset <<
- ": Size=" << Data.size() <<
- ": OInst=0x"; dbgs().write_hex(OldData) <<
- ": Reloc=0x"; dbgs().write_hex(Reloc););
+ LLVM_DEBUG(dbgs() << "Name=" << getFixupKindInfo(Kind).Name << "("
+ << (unsigned)Kind << ")\n");
+ LLVM_DEBUG(
+ uint32_t OldData = 0; for (unsigned i = 0; i < NumBytes; i++) OldData |=
+ (InstAddr[i] << (i * 8)) & (0xff << (i * 8));
+ dbgs() << "\tBValue=0x"; dbgs().write_hex(Value) << ": AValue=0x";
+ dbgs().write_hex(FixupValue)
+ << ": Offset=" << Offset << ": Size=" << Data.size() << ": OInst=0x";
+ dbgs().write_hex(OldData) << ": Reloc=0x"; dbgs().write_hex(Reloc););
// For each byte of the fragment that the fixup touches, mask in the
// bits from the fixup value. The Value has been "split up" into the
@@ -530,10 +530,10 @@ public:
InstAddr[i] |= uint8_t(Reloc >> (i * 8)) & 0xff; // Apply new reloc
}
- DEBUG(uint32_t NewData = 0;
- for (unsigned i = 0; i < NumBytes; i++)
- NewData |= (InstAddr[i] << (i * 8)) & (0xff << (i * 8));
- dbgs() << ": NInst=0x"; dbgs().write_hex(NewData) << "\n";);
+ LLVM_DEBUG(uint32_t NewData = 0;
+ for (unsigned i = 0; i < NumBytes; i++) NewData |=
+ (InstAddr[i] << (i * 8)) & (0xff << (i * 8));
+ dbgs() << ": NInst=0x"; dbgs().write_hex(NewData) << "\n";);
}
bool isInstRelaxable(MCInst const &HMI) const {
@@ -562,7 +562,8 @@ public:
/// relaxation.
///
/// \param Inst - The instruction to test.
- bool mayNeedRelaxation(MCInst const &Inst) const override {
+ bool mayNeedRelaxation(MCInst const &Inst,
+ const MCSubtargetInfo &STI) const override {
return true;
}
@@ -571,7 +572,8 @@ public:
bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved,
uint64_t Value,
const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const override {
+ const MCAsmLayout &Layout,
+ const bool WasForced) const override {
MCInst const &MCB = DF->getInst();
assert(HexagonMCInstrInfo::isBundle(MCB));
@@ -682,17 +684,17 @@ public:
assert(Update && "Didn't find relaxation target");
}
- bool writeNopData(uint64_t Count,
- MCObjectWriter * OW) const override {
+ bool writeNopData(raw_ostream &OS, uint64_t Count) const override {
static const uint32_t Nopcode = 0x7f000000, // Hard-coded NOP.
ParseIn = 0x00004000, // In packet parse-bits.
ParseEnd = 0x0000c000; // End of packet parse-bits.
while(Count % HEXAGON_INSTR_SIZE) {
- DEBUG(dbgs() << "Alignment not a multiple of the instruction size:" <<
- Count % HEXAGON_INSTR_SIZE << "/" << HEXAGON_INSTR_SIZE << "\n");
+ LLVM_DEBUG(dbgs() << "Alignment not a multiple of the instruction size:"
+ << Count % HEXAGON_INSTR_SIZE << "/"
+ << HEXAGON_INSTR_SIZE << "\n");
--Count;
- OW->write8(0);
+ OS << '\0';
}
while(Count) {
@@ -700,7 +702,7 @@ public:
// Close the packet whenever a multiple of the maximum packet size remains
uint32_t ParseBits = (Count % (HEXAGON_PACKET_SIZE * HEXAGON_INSTR_SIZE))?
ParseIn: ParseEnd;
- OW->write32(Nopcode | ParseBits);
+ support::endian::write<uint32_t>(OS, Nopcode | ParseBits, Endian);
}
return true;
}
@@ -736,7 +738,7 @@ public:
Inst.addOperand(MCOperand::createInst(Nop));
Size -= 4;
if (!HexagonMCChecker(
- Context, *MCII, RF.getSubtargetInfo(), Inst,
+ Context, *MCII, *RF.getSubtargetInfo(), Inst,
*Context.getRegisterInfo(), false)
.check()) {
Inst.erase(Inst.end() - 1);
@@ -744,7 +746,7 @@ public:
}
}
bool Error = HexagonMCShuffle(Context, true, *MCII,
- RF.getSubtargetInfo(), Inst);
+ *RF.getSubtargetInfo(), Inst);
//assert(!Error);
(void)Error;
ReplaceInstruction(Asm.getEmitter(), RF, Inst);
@@ -765,11 +767,12 @@ public:
// MCAsmBackend
MCAsmBackend *llvm::createHexagonAsmBackend(Target const &T,
- MCRegisterInfo const & /*MRI*/,
- const Triple &TT, StringRef CPU,
- const MCTargetOptions &Options) {
+ const MCSubtargetInfo &STI,
+ MCRegisterInfo const & /*MRI*/,
+ const MCTargetOptions &Options) {
+ const Triple &TT = STI.getTargetTriple();
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
- StringRef CPUString = Hexagon_MC::selectHexagonCPU(CPU);
+ StringRef CPUString = Hexagon_MC::selectHexagonCPU(STI.getCPU());
return new HexagonAsmBackend(T, TT, OSABI, CPUString);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index f5a376033757..cb504b5c3d5d 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -25,7 +25,7 @@ namespace llvm {
/// HexagonII - This namespace holds all of the target specific flags that
/// instruction info tracks.
namespace HexagonII {
- unsigned const TypeCVI_FIRST = TypeCVI_HIST;
+ unsigned const TypeCVI_FIRST = TypeCVI_4SLOT_MPY;
unsigned const TypeCVI_LAST = TypeCVI_VX_LATE;
enum SubTarget {
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
index 12aa1bd9b2a0..e82e6b559f62 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -298,9 +298,7 @@ unsigned HexagonELFObjectWriter::getRelocType(MCContext &Ctx,
}
}
-std::unique_ptr<MCObjectWriter>
-llvm::createHexagonELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
- StringRef CPU) {
- auto MOTW = llvm::make_unique<HexagonELFObjectWriter>(OSABI, CPU);
- return createELFObjectWriter(std::move(MOTW), OS, /*IsLittleEndian*/ true);
+std::unique_ptr<MCObjectTargetWriter>
+llvm::createHexagonELFObjectWriter(uint8_t OSABI, StringRef CPU) {
+ return llvm::make_unique<HexagonELFObjectWriter>(OSABI, CPU);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 1929152129fa..3b3a15b990f1 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -69,19 +69,12 @@ void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
OS << "\n";
}
- auto Separator = "";
- if (HexagonMCInstrInfo::isInnerLoop(*MI)) {
- OS << Separator;
- Separator = " ";
- MCInst ME;
- ME.setOpcode(Hexagon::ENDLOOP0);
- printInstruction(&ME, OS);
- }
- if (HexagonMCInstrInfo::isOuterLoop(*MI)) {
- OS << Separator;
- MCInst ME;
- ME.setOpcode(Hexagon::ENDLOOP1);
- printInstruction(&ME, OS);
+ bool IsLoop0 = HexagonMCInstrInfo::isInnerLoop(*MI);
+ bool IsLoop1 = HexagonMCInstrInfo::isOuterLoop(*MI);
+ if (IsLoop0) {
+ OS << (IsLoop1 ? " :endloop01" : " :endloop0");
+ } else if (IsLoop1) {
+ OS << " :endloop1";
}
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 631c38c2734f..3382684803aa 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -33,7 +33,9 @@
#include <cassert>
#include <cstddef>
#include <cstdint>
+#include <map>
#include <string>
+#include <vector>
#define DEBUG_TYPE "mccodeemitter"
@@ -42,62 +44,350 @@ using namespace Hexagon;
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
-HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII,
- MCContext &aMCT)
- : MCT(aMCT), MCII(aMII), Addend(new unsigned(0)),
- Extended(new bool(false)), CurrentBundle(new MCInst const *),
- CurrentIndex(new size_t(0)) {}
+static const unsigned fixup_Invalid = ~0u;
+
+#define _ fixup_Invalid
+#define P(x) Hexagon::fixup_Hexagon##x
+static const std::map<unsigned, std::vector<unsigned>> ExtFixups = {
+ { MCSymbolRefExpr::VK_DTPREL,
+ { _, _, _, _,
+ _, _, P(_DTPREL_16_X), P(_DTPREL_11_X),
+ P(_DTPREL_11_X), P(_9_X), _, P(_DTPREL_11_X),
+ P(_DTPREL_16_X), _, _, _,
+ P(_DTPREL_16_X), _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_DTPREL_32_6_X) }},
+ { MCSymbolRefExpr::VK_GOT,
+ { _, _, _, _,
+ _, _, P(_GOT_11_X), _ /* [1] */,
+ _ /* [1] */, P(_9_X), _, P(_GOT_11_X),
+ P(_GOT_16_X), _, _, _,
+ P(_GOT_16_X), _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_GOT_32_6_X) }},
+ { MCSymbolRefExpr::VK_GOTREL,
+ { _, _, _, _,
+ _, _, P(_GOTREL_11_X), P(_GOTREL_11_X),
+ P(_GOTREL_11_X), P(_9_X), _, P(_GOTREL_11_X),
+ P(_GOTREL_16_X), _, _, _,
+ P(_GOTREL_16_X), _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_GOTREL_32_6_X) }},
+ { MCSymbolRefExpr::VK_TPREL,
+ { _, _, _, _,
+ _, _, P(_TPREL_16_X), P(_TPREL_11_X),
+ P(_TPREL_11_X), P(_9_X), _, P(_TPREL_11_X),
+ P(_TPREL_16_X), _, _, _,
+ P(_TPREL_16_X), _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_TPREL_32_6_X) }},
+ { MCSymbolRefExpr::VK_Hexagon_GD_GOT,
+ { _, _, _, _,
+ _, _, P(_GD_GOT_16_X), P(_GD_GOT_11_X),
+ P(_GD_GOT_11_X), P(_9_X), _, P(_GD_GOT_11_X),
+ P(_GD_GOT_16_X), _, _, _,
+ P(_GD_GOT_16_X), _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_GD_GOT_32_6_X) }},
+ { MCSymbolRefExpr::VK_Hexagon_GD_PLT,
+ { _, _, _, _,
+ _, _, _, _,
+ _, P(_9_X), _, P(_GD_PLT_B22_PCREL_X),
+ _, _, _, _,
+ _, _, _, _,
+ _, _, P(_GD_PLT_B22_PCREL_X), _,
+ _, _, _, _,
+ _, _, _, _,
+ _ }},
+ { MCSymbolRefExpr::VK_Hexagon_IE,
+ { _, _, _, _,
+ _, _, P(_IE_16_X), _,
+ _, P(_9_X), _, _,
+ P(_IE_16_X), _, _, _,
+ P(_IE_16_X), _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_IE_32_6_X) }},
+ { MCSymbolRefExpr::VK_Hexagon_IE_GOT,
+ { _, _, _, _,
+ _, _, P(_IE_GOT_11_X), P(_IE_GOT_11_X),
+ P(_IE_GOT_11_X), P(_9_X), _, P(_IE_GOT_11_X),
+ P(_IE_GOT_16_X), _, _, _,
+ P(_IE_GOT_16_X), _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_IE_GOT_32_6_X) }},
+ { MCSymbolRefExpr::VK_Hexagon_LD_GOT,
+ { _, _, _, _,
+ _, _, P(_LD_GOT_11_X), P(_LD_GOT_11_X),
+ P(_LD_GOT_11_X), P(_9_X), _, P(_LD_GOT_11_X),
+ P(_LD_GOT_16_X), _, _, _,
+ P(_LD_GOT_16_X), _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_LD_GOT_32_6_X) }},
+ { MCSymbolRefExpr::VK_Hexagon_LD_PLT,
+ { _, _, _, _,
+ _, _, _, _,
+ _, P(_9_X), _, P(_LD_PLT_B22_PCREL_X),
+ _, _, _, _,
+ _, _, _, _,
+ _, _, P(_LD_PLT_B22_PCREL_X), _,
+ _, _, _, _,
+ _, _, _, _,
+ _ }},
+ { MCSymbolRefExpr::VK_Hexagon_PCREL,
+ { _, _, _, _,
+ _, _, P(_6_PCREL_X), _,
+ _, P(_9_X), _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_32_PCREL) }},
+ { MCSymbolRefExpr::VK_None,
+ { _, _, _, _,
+ _, _, P(_6_X), P(_8_X),
+ P(_8_X), P(_9_X), P(_10_X), P(_11_X),
+ P(_12_X), P(_B13_PCREL), _, P(_B15_PCREL_X),
+ P(_16_X), _, _, _,
+ _, _, P(_B22_PCREL_X), _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_32_6_X) }},
+};
+// [1] The fixup is GOT_16_X for signed values and GOT_11_X for unsigned.
+
+static const std::map<unsigned, std::vector<unsigned>> StdFixups = {
+ { MCSymbolRefExpr::VK_DTPREL,
+ { _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_DTPREL_16), _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_DTPREL_32) }},
+ { MCSymbolRefExpr::VK_GOT,
+ { _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_GOT_32) }},
+ { MCSymbolRefExpr::VK_GOTREL,
+ { _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _ /* [2] */, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_GOTREL_32) }},
+ { MCSymbolRefExpr::VK_PLT,
+ { _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, P(_PLT_B22_PCREL), _,
+ _, _, _, _,
+ _, _, _, _,
+ _ }},
+ { MCSymbolRefExpr::VK_TPREL,
+ { _, _, _, _,
+ _, _, _, _,
+ _, _, _, P(_TPREL_11_X),
+ _, _, _, _,
+ P(_TPREL_16), _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_TPREL_32) }},
+ { MCSymbolRefExpr::VK_Hexagon_GD_GOT,
+ { _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_GD_GOT_16), _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_GD_GOT_32) }},
+ { MCSymbolRefExpr::VK_Hexagon_GD_PLT,
+ { _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, P(_GD_PLT_B22_PCREL), _,
+ _, _, _, _,
+ _, _, _, _,
+ _ }},
+ { MCSymbolRefExpr::VK_Hexagon_GPREL,
+ { _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_GPREL16_0), _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _ }},
+ { MCSymbolRefExpr::VK_Hexagon_HI16,
+ { _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_HI16), _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _ }},
+ { MCSymbolRefExpr::VK_Hexagon_IE,
+ { _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_IE_32) }},
+ { MCSymbolRefExpr::VK_Hexagon_IE_GOT,
+ { _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_IE_GOT_16), _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_IE_GOT_32) }},
+ { MCSymbolRefExpr::VK_Hexagon_LD_GOT,
+ { _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_LD_GOT_16), _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_LD_GOT_32) }},
+ { MCSymbolRefExpr::VK_Hexagon_LD_PLT,
+ { _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, P(_LD_PLT_B22_PCREL), _,
+ _, _, _, _,
+ _, _, _, _,
+ _ }},
+ { MCSymbolRefExpr::VK_Hexagon_LO16,
+ { _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_LO16), _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _ }},
+ { MCSymbolRefExpr::VK_Hexagon_PCREL,
+ { _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_32_PCREL) }},
+ { MCSymbolRefExpr::VK_None,
+ { _, _, _, _,
+ _, _, _, _,
+ _, _, _, _,
+ _, P(_B13_PCREL), _, P(_B15_PCREL),
+ _, _, _, _,
+ _, _, P(_B22_PCREL), _,
+ _, _, _, _,
+ _, _, _, _,
+ P(_32) }},
+};
+//
+// [2] The actual fixup is LO16 or HI16, depending on the instruction.
+#undef P
+#undef _
-uint32_t HexagonMCCodeEmitter::parseBits(size_t Last,
- MCInst const &MCB,
+uint32_t HexagonMCCodeEmitter::parseBits(size_t Last, MCInst const &MCB,
MCInst const &MCI) const {
bool Duplex = HexagonMCInstrInfo::isDuplex(MCII, MCI);
- if (*CurrentIndex == 0) {
+ if (State.Index == 0) {
if (HexagonMCInstrInfo::isInnerLoop(MCB)) {
assert(!Duplex);
- assert(*CurrentIndex != Last);
+ assert(State.Index != Last);
return HexagonII::INST_PARSE_LOOP_END;
}
}
- if (*CurrentIndex == 1) {
+ if (State.Index == 1) {
if (HexagonMCInstrInfo::isOuterLoop(MCB)) {
assert(!Duplex);
- assert(*CurrentIndex != Last);
+ assert(State.Index != Last);
return HexagonII::INST_PARSE_LOOP_END;
}
}
if (Duplex) {
- assert(*CurrentIndex == Last);
+ assert(State.Index == Last);
return HexagonII::INST_PARSE_DUPLEX;
}
- if(*CurrentIndex == Last)
+ if (State.Index == Last)
return HexagonII::INST_PARSE_PACKET_END;
return HexagonII::INST_PARSE_NOT_END;
}
-/// EncodeInstruction - Emit the bundle
+/// Emit the bundle.
void HexagonMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
MCInst &HMB = const_cast<MCInst &>(MI);
assert(HexagonMCInstrInfo::isBundle(HMB));
- DEBUG(dbgs() << "Encoding bundle\n";);
- *Addend = 0;
- *Extended = false;
- *CurrentBundle = &MI;
- *CurrentIndex = 0;
+ LLVM_DEBUG(dbgs() << "Encoding bundle\n";);
+ State.Addend = 0;
+ State.Extended = false;
+ State.Bundle = &MI;
+ State.Index = 0;
size_t Last = HexagonMCInstrInfo::bundleSize(HMB) - 1;
+ uint64_t Features = computeAvailableFeatures(STI.getFeatureBits());
+
for (auto &I : HexagonMCInstrInfo::bundleInstructions(HMB)) {
MCInst &HMI = const_cast<MCInst &>(*I.getInst());
- verifyInstructionPredicates(HMI,
- computeAvailableFeatures(STI.getFeatureBits()));
-
- EncodeSingleInstruction(HMI, OS, Fixups, STI,
- parseBits(Last, HMB, HMI));
- *Extended = HexagonMCInstrInfo::isImmext(HMI);
- *Addend += HEXAGON_INSTR_SIZE;
- ++*CurrentIndex;
+ verifyInstructionPredicates(HMI, Features);
+
+ EncodeSingleInstruction(HMI, OS, Fixups, STI, parseBits(Last, HMB, HMI));
+ State.Extended = HexagonMCInstrInfo::isImmext(HMI);
+ State.Addend += HEXAGON_INSTR_SIZE;
+ ++State.Index;
}
}
@@ -115,9 +405,9 @@ static bool RegisterMatches(unsigned Consumer, unsigned Producer,
}
/// EncodeSingleInstruction - Emit a single
-void HexagonMCCodeEmitter::EncodeSingleInstruction(
- const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI, uint32_t Parse) const {
+void HexagonMCCodeEmitter::EncodeSingleInstruction(const MCInst &MI,
+ raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI, uint32_t Parse) const {
assert(!HexagonMCInstrInfo::isBundle(MI));
uint64_t Binary;
@@ -125,198 +415,150 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction(
// in the first place!
assert(!HexagonMCInstrInfo::getDesc(MCII, MI).isPseudo() &&
"pseudo-instruction found");
- DEBUG(dbgs() << "Encoding insn"
- " `" << HexagonMCInstrInfo::getName(MCII, MI) << "'"
- "\n");
+ LLVM_DEBUG(dbgs() << "Encoding insn `"
+ << HexagonMCInstrInfo::getName(MCII, MI) << "'\n");
Binary = getBinaryCodeForInstr(MI, Fixups, STI);
+ unsigned Opc = MI.getOpcode();
+
// Check for unimplemented instructions. Immediate extenders
// are encoded as zero, so they need to be accounted for.
- if (!Binary &&
- MI.getOpcode() != DuplexIClass0 &&
- MI.getOpcode() != A4_ext) {
- DEBUG(dbgs() << "Unimplemented inst: "
- " `" << HexagonMCInstrInfo::getName(MCII, MI) << "'"
- "\n");
+ if (!Binary && Opc != DuplexIClass0 && Opc != A4_ext) {
+ LLVM_DEBUG(dbgs() << "Unimplemented inst `"
+ << HexagonMCInstrInfo::getName(MCII, MI) << "'\n");
llvm_unreachable("Unimplemented Instruction");
}
Binary |= Parse;
// if we need to emit a duplexed instruction
- if (MI.getOpcode() >= Hexagon::DuplexIClass0 &&
- MI.getOpcode() <= Hexagon::DuplexIClassF) {
+ if (Opc >= Hexagon::DuplexIClass0 && Opc <= Hexagon::DuplexIClassF) {
assert(Parse == HexagonII::INST_PARSE_DUPLEX &&
"Emitting duplex without duplex parse bits");
- unsigned dupIClass = MI.getOpcode() - Hexagon::DuplexIClass0;
+ unsigned DupIClass = MI.getOpcode() - Hexagon::DuplexIClass0;
// 29 is the bit position.
// 0b1110 =0xE bits are masked off and down shifted by 1 bit.
// Last bit is moved to bit position 13
- Binary = ((dupIClass & 0xE) << (29 - 1)) | ((dupIClass & 0x1) << 13);
+ Binary = ((DupIClass & 0xE) << (29 - 1)) | ((DupIClass & 0x1) << 13);
- const MCInst *subInst0 = MI.getOperand(0).getInst();
- const MCInst *subInst1 = MI.getOperand(1).getInst();
+ const MCInst *Sub0 = MI.getOperand(0).getInst();
+ const MCInst *Sub1 = MI.getOperand(1).getInst();
- // get subinstruction slot 0
- unsigned subInstSlot0Bits = getBinaryCodeForInstr(*subInst0, Fixups, STI);
- // get subinstruction slot 1
- unsigned subInstSlot1Bits = getBinaryCodeForInstr(*subInst1, Fixups, STI);
+ // Get subinstruction slot 0.
+ unsigned SubBits0 = getBinaryCodeForInstr(*Sub0, Fixups, STI);
+ // Get subinstruction slot 1.
+ State.SubInst1 = true;
+ unsigned SubBits1 = getBinaryCodeForInstr(*Sub1, Fixups, STI);
+ State.SubInst1 = false;
- Binary |= subInstSlot0Bits | (subInstSlot1Bits << 16);
+ Binary |= SubBits0 | (SubBits1 << 16);
}
- support::endian::Writer<support::little>(OS).write<uint32_t>(Binary);
+ support::endian::write<uint32_t>(OS, Binary, support::little);
++MCNumEmitted;
}
LLVM_ATTRIBUTE_NORETURN
-static void raise_relocation_error(unsigned bits, unsigned kind) {
+static void raise_relocation_error(unsigned Width, unsigned Kind) {
std::string Text;
- {
- raw_string_ostream Stream(Text);
- Stream << "Unrecognized relocation combination bits: " << bits
- << " kind: " << kind;
- }
- report_fatal_error(Text);
+ raw_string_ostream Stream(Text);
+ Stream << "Unrecognized relocation combination: width=" << Width
+ << " kind=" << Kind;
+ report_fatal_error(Stream.str());
}
-/// getFixupNoBits - Some insns are not extended and thus have no
-/// bits. These cases require a more brute force method for determining
-/// the correct relocation.
+/// Some insns are not extended and thus have no bits. These cases require
+/// a more brute force method for determining the correct relocation.
Hexagon::Fixups HexagonMCCodeEmitter::getFixupNoBits(
- MCInstrInfo const &MCII, const MCInst &MI, const MCOperand &MO,
- const MCSymbolRefExpr::VariantKind kind) const {
+ MCInstrInfo const &MCII, const MCInst &MI, const MCOperand &MO,
+ const MCSymbolRefExpr::VariantKind VarKind) const {
const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MI);
- unsigned insnType = HexagonMCInstrInfo::getType(MCII, MI);
-
- if (insnType == HexagonII::TypeEXTENDER) {
- switch (kind) {
- case MCSymbolRefExpr::VK_GOTREL:
- return Hexagon::fixup_Hexagon_GOTREL_32_6_X;
- case MCSymbolRefExpr::VK_GOT:
- return Hexagon::fixup_Hexagon_GOT_32_6_X;
- case MCSymbolRefExpr::VK_TPREL:
- return Hexagon::fixup_Hexagon_TPREL_32_6_X;
- case MCSymbolRefExpr::VK_DTPREL:
- return Hexagon::fixup_Hexagon_DTPREL_32_6_X;
- case MCSymbolRefExpr::VK_Hexagon_GD_GOT:
- return Hexagon::fixup_Hexagon_GD_GOT_32_6_X;
- case MCSymbolRefExpr::VK_Hexagon_LD_GOT:
- return Hexagon::fixup_Hexagon_LD_GOT_32_6_X;
- case MCSymbolRefExpr::VK_Hexagon_IE:
- return Hexagon::fixup_Hexagon_IE_32_6_X;
- case MCSymbolRefExpr::VK_Hexagon_IE_GOT:
- return Hexagon::fixup_Hexagon_IE_GOT_32_6_X;
- case MCSymbolRefExpr::VK_Hexagon_PCREL:
- return Hexagon::fixup_Hexagon_B32_PCREL_X;
- case MCSymbolRefExpr::VK_Hexagon_GD_PLT:
- return Hexagon::fixup_Hexagon_GD_PLT_B32_PCREL_X;
- case MCSymbolRefExpr::VK_Hexagon_LD_PLT:
- return Hexagon::fixup_Hexagon_LD_PLT_B32_PCREL_X;
-
- case MCSymbolRefExpr::VK_None: {
- auto Insts = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
- for (auto I = Insts.begin(), N = Insts.end(); I != N; ++I) {
- if (I->getInst() == &MI) {
- const MCInst &NextI = *(I+1)->getInst();
- const MCInstrDesc &D = HexagonMCInstrInfo::getDesc(MCII, NextI);
- if (D.isBranch() || D.isCall() ||
- HexagonMCInstrInfo::getType(MCII, NextI) == HexagonII::TypeCR)
- return Hexagon::fixup_Hexagon_B32_PCREL_X;
- return Hexagon::fixup_Hexagon_32_6_X;
- }
+ unsigned InsnType = HexagonMCInstrInfo::getType(MCII, MI);
+ using namespace Hexagon;
+
+ if (InsnType == HexagonII::TypeEXTENDER) {
+ if (VarKind == MCSymbolRefExpr::VK_None) {
+ auto Instrs = HexagonMCInstrInfo::bundleInstructions(*State.Bundle);
+ for (auto I = Instrs.begin(), N = Instrs.end(); I != N; ++I) {
+ if (I->getInst() != &MI)
+ continue;
+ assert(I+1 != N && "Extender cannot be last in packet");
+ const MCInst &NextI = *(I+1)->getInst();
+ const MCInstrDesc &NextD = HexagonMCInstrInfo::getDesc(MCII, NextI);
+ if (NextD.isBranch() || NextD.isCall() ||
+ HexagonMCInstrInfo::getType(MCII, NextI) == HexagonII::TypeCR)
+ return fixup_Hexagon_B32_PCREL_X;
+ return fixup_Hexagon_32_6_X;
}
- raise_relocation_error(0, kind);
- }
- default:
- raise_relocation_error(0, kind);
}
- } else if (MCID.isBranch())
- return Hexagon::fixup_Hexagon_B13_PCREL;
- switch (MCID.getOpcode()) {
- case Hexagon::HI:
- case Hexagon::A2_tfrih:
- switch (kind) {
- case MCSymbolRefExpr::VK_GOT:
- return Hexagon::fixup_Hexagon_GOT_HI16;
- case MCSymbolRefExpr::VK_GOTREL:
- return Hexagon::fixup_Hexagon_GOTREL_HI16;
- case MCSymbolRefExpr::VK_Hexagon_GD_GOT:
- return Hexagon::fixup_Hexagon_GD_GOT_HI16;
- case MCSymbolRefExpr::VK_Hexagon_LD_GOT:
- return Hexagon::fixup_Hexagon_LD_GOT_HI16;
- case MCSymbolRefExpr::VK_Hexagon_IE:
- return Hexagon::fixup_Hexagon_IE_HI16;
- case MCSymbolRefExpr::VK_Hexagon_IE_GOT:
- return Hexagon::fixup_Hexagon_IE_GOT_HI16;
- case MCSymbolRefExpr::VK_TPREL:
- return Hexagon::fixup_Hexagon_TPREL_HI16;
- case MCSymbolRefExpr::VK_DTPREL:
- return Hexagon::fixup_Hexagon_DTPREL_HI16;
- case MCSymbolRefExpr::VK_None:
- return Hexagon::fixup_Hexagon_HI16;
- default:
- raise_relocation_error(0, kind);
- }
+ static const std::map<unsigned,unsigned> Relocs = {
+ { MCSymbolRefExpr::VK_GOTREL, fixup_Hexagon_GOTREL_32_6_X },
+ { MCSymbolRefExpr::VK_GOT, fixup_Hexagon_GOT_32_6_X },
+ { MCSymbolRefExpr::VK_TPREL, fixup_Hexagon_TPREL_32_6_X },
+ { MCSymbolRefExpr::VK_DTPREL, fixup_Hexagon_DTPREL_32_6_X },
+ { MCSymbolRefExpr::VK_Hexagon_GD_GOT, fixup_Hexagon_GD_GOT_32_6_X },
+ { MCSymbolRefExpr::VK_Hexagon_LD_GOT, fixup_Hexagon_LD_GOT_32_6_X },
+ { MCSymbolRefExpr::VK_Hexagon_IE, fixup_Hexagon_IE_32_6_X },
+ { MCSymbolRefExpr::VK_Hexagon_IE_GOT, fixup_Hexagon_IE_GOT_32_6_X },
+ { MCSymbolRefExpr::VK_Hexagon_PCREL, fixup_Hexagon_B32_PCREL_X },
+ { MCSymbolRefExpr::VK_Hexagon_GD_PLT, fixup_Hexagon_GD_PLT_B32_PCREL_X },
+ { MCSymbolRefExpr::VK_Hexagon_LD_PLT, fixup_Hexagon_LD_PLT_B32_PCREL_X },
+ };
+
+ auto F = Relocs.find(VarKind);
+ if (F != Relocs.end())
+ return Hexagon::Fixups(F->second);
+ raise_relocation_error(0, VarKind);
+ }
- case Hexagon::LO:
- case Hexagon::A2_tfril:
- switch (kind) {
- case MCSymbolRefExpr::VK_GOT:
- return Hexagon::fixup_Hexagon_GOT_LO16;
- case MCSymbolRefExpr::VK_GOTREL:
- return Hexagon::fixup_Hexagon_GOTREL_LO16;
- case MCSymbolRefExpr::VK_Hexagon_GD_GOT:
- return Hexagon::fixup_Hexagon_GD_GOT_LO16;
- case MCSymbolRefExpr::VK_Hexagon_LD_GOT:
- return Hexagon::fixup_Hexagon_LD_GOT_LO16;
- case MCSymbolRefExpr::VK_Hexagon_IE:
- return Hexagon::fixup_Hexagon_IE_LO16;
- case MCSymbolRefExpr::VK_Hexagon_IE_GOT:
- return Hexagon::fixup_Hexagon_IE_GOT_LO16;
- case MCSymbolRefExpr::VK_TPREL:
- return Hexagon::fixup_Hexagon_TPREL_LO16;
- case MCSymbolRefExpr::VK_DTPREL:
- return Hexagon::fixup_Hexagon_DTPREL_LO16;
- case MCSymbolRefExpr::VK_None:
- return Hexagon::fixup_Hexagon_LO16;
- default:
- raise_relocation_error(0, kind);
- }
+ if (MCID.isBranch())
+ return fixup_Hexagon_B13_PCREL;
+
+ static const std::map<unsigned,unsigned> RelocsLo = {
+ { MCSymbolRefExpr::VK_GOT, fixup_Hexagon_GOT_LO16 },
+ { MCSymbolRefExpr::VK_GOTREL, fixup_Hexagon_GOTREL_LO16 },
+ { MCSymbolRefExpr::VK_Hexagon_GD_GOT, fixup_Hexagon_GD_GOT_LO16 },
+ { MCSymbolRefExpr::VK_Hexagon_LD_GOT, fixup_Hexagon_LD_GOT_LO16 },
+ { MCSymbolRefExpr::VK_Hexagon_IE, fixup_Hexagon_IE_LO16 },
+ { MCSymbolRefExpr::VK_Hexagon_IE_GOT, fixup_Hexagon_IE_GOT_LO16 },
+ { MCSymbolRefExpr::VK_TPREL, fixup_Hexagon_TPREL_LO16 },
+ { MCSymbolRefExpr::VK_DTPREL, fixup_Hexagon_DTPREL_LO16 },
+ { MCSymbolRefExpr::VK_None, fixup_Hexagon_LO16 },
+ };
+
+ static const std::map<unsigned,unsigned> RelocsHi = {
+ { MCSymbolRefExpr::VK_GOT, fixup_Hexagon_GOT_HI16 },
+ { MCSymbolRefExpr::VK_GOTREL, fixup_Hexagon_GOTREL_HI16 },
+ { MCSymbolRefExpr::VK_Hexagon_GD_GOT, fixup_Hexagon_GD_GOT_HI16 },
+ { MCSymbolRefExpr::VK_Hexagon_LD_GOT, fixup_Hexagon_LD_GOT_HI16 },
+ { MCSymbolRefExpr::VK_Hexagon_IE, fixup_Hexagon_IE_HI16 },
+ { MCSymbolRefExpr::VK_Hexagon_IE_GOT, fixup_Hexagon_IE_GOT_HI16 },
+ { MCSymbolRefExpr::VK_TPREL, fixup_Hexagon_TPREL_HI16 },
+ { MCSymbolRefExpr::VK_DTPREL, fixup_Hexagon_DTPREL_HI16 },
+ { MCSymbolRefExpr::VK_None, fixup_Hexagon_HI16 },
+ };
- // The only relocs left should be GP relative:
- default:
- if (MCID.mayStore() || MCID.mayLoad()) {
- for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses;
- ++ImpUses) {
- if (*ImpUses != Hexagon::GP)
- continue;
- switch (HexagonMCInstrInfo::getMemAccessSize(MCII, MI)) {
- case 1:
- return fixup_Hexagon_GPREL16_0;
- case 2:
- return fixup_Hexagon_GPREL16_1;
- case 4:
- return fixup_Hexagon_GPREL16_2;
- case 8:
- return fixup_Hexagon_GPREL16_3;
- default:
- raise_relocation_error(0, kind);
- }
- }
+ switch (MCID.getOpcode()) {
+ case Hexagon::LO:
+ case Hexagon::A2_tfril: {
+ auto F = RelocsLo.find(VarKind);
+ if (F != RelocsLo.end())
+ return Hexagon::Fixups(F->second);
+ break;
+ }
+ case Hexagon::HI:
+ case Hexagon::A2_tfrih: {
+ auto F = RelocsHi.find(VarKind);
+ if (F != RelocsHi.end())
+ return Hexagon::Fixups(F->second);
+ break;
}
- raise_relocation_error(0, kind);
}
- llvm_unreachable("Relocation exit not taken");
-}
-
-namespace llvm {
-
-extern const MCInstrDesc HexagonInsts[];
-} // end namespace llvm
+ raise_relocation_error(0, VarKind);
+}
-static bool isPCRel (unsigned Kind) {
- switch(Kind){
+static bool isPCRel(unsigned Kind) {
+ switch (Kind){
case fixup_Hexagon_B22_PCREL:
case fixup_Hexagon_B15_PCREL:
case fixup_Hexagon_B7_PCREL:
@@ -342,16 +584,34 @@ static bool isPCRel (unsigned Kind) {
}
unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
- const MCOperand &MO,
- const MCExpr *ME,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const
-{
+ const MCOperand &MO, const MCExpr *ME, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
if (isa<HexagonMCExpr>(ME))
ME = &HexagonMCInstrInfo::getExpr(*ME);
int64_t Value;
- if (ME->evaluateAsAbsolute(Value))
+ if (ME->evaluateAsAbsolute(Value)) {
+ bool InstExtendable = HexagonMCInstrInfo::isExtendable(MCII, MI) ||
+ HexagonMCInstrInfo::isExtended(MCII, MI);
+ // Only sub-instruction #1 can be extended in a duplex. If MI is a
+ // sub-instruction #0, it is not extended even if Extended is true
+ // (it can be true for the duplex as a whole).
+ bool IsSub0 = HexagonMCInstrInfo::isSubInstruction(MI) && !State.SubInst1;
+ if (State.Extended && InstExtendable && !IsSub0) {
+ unsigned OpIdx = ~0u;
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ if (&MO != &MI.getOperand(I))
+ continue;
+ OpIdx = I;
+ break;
+ }
+ assert(OpIdx != ~0u);
+ if (OpIdx == HexagonMCInstrInfo::getExtendableOp(MCII, MI)) {
+ unsigned Shift = HexagonMCInstrInfo::getExtentAlignment(MCII, MI);
+ Value = (Value & 0x3f) << Shift;
+ }
+ }
return Value;
+ }
assert(ME->getKind() == MCExpr::SymbolRef ||
ME->getKind() == MCExpr::Binary);
if (ME->getKind() == MCExpr::Binary) {
@@ -360,366 +620,99 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
getExprOpValue(MI, MO, Binary->getRHS(), Fixups, STI);
return 0;
}
- Hexagon::Fixups FixupKind =
- Hexagon::Fixups(Hexagon::fixup_Hexagon_TPREL_LO16);
+
+ unsigned FixupKind = fixup_Invalid;
const MCSymbolRefExpr *MCSRE = static_cast<const MCSymbolRefExpr *>(ME);
const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MI);
- unsigned bits = HexagonMCInstrInfo::getExtentBits(MCII, MI) -
- HexagonMCInstrInfo::getExtentAlignment(MCII, MI);
- const MCSymbolRefExpr::VariantKind kind = MCSRE->getKind();
-
- DEBUG(dbgs() << "----------------------------------------\n");
- DEBUG(dbgs() << "Opcode Name: " << HexagonMCInstrInfo::getName(MCII, MI)
- << "\n");
- DEBUG(dbgs() << "Opcode: " << MCID.getOpcode() << "\n");
- DEBUG(dbgs() << "Relocation bits: " << bits << "\n");
- DEBUG(dbgs() << "Addend: " << *Addend << "\n");
- DEBUG(dbgs() << "----------------------------------------\n");
-
- switch (bits) {
- default:
- raise_relocation_error(bits, kind);
- case 32:
- switch (kind) {
- case MCSymbolRefExpr::VK_DTPREL:
- FixupKind = *Extended ? Hexagon::fixup_Hexagon_DTPREL_32_6_X
- : Hexagon::fixup_Hexagon_DTPREL_32;
- break;
- case MCSymbolRefExpr::VK_GOT:
- FixupKind = *Extended ? Hexagon::fixup_Hexagon_GOT_32_6_X
- : Hexagon::fixup_Hexagon_GOT_32;
- break;
- case MCSymbolRefExpr::VK_GOTREL:
- FixupKind = *Extended ? Hexagon::fixup_Hexagon_GOTREL_32_6_X
- : Hexagon::fixup_Hexagon_GOTREL_32;
- break;
- case MCSymbolRefExpr::VK_Hexagon_GD_GOT:
- FixupKind = *Extended ? Hexagon::fixup_Hexagon_GD_GOT_32_6_X
- : Hexagon::fixup_Hexagon_GD_GOT_32;
- break;
- case MCSymbolRefExpr::VK_Hexagon_IE:
- FixupKind = *Extended ? Hexagon::fixup_Hexagon_IE_32_6_X
- : Hexagon::fixup_Hexagon_IE_32;
- break;
- case MCSymbolRefExpr::VK_Hexagon_IE_GOT:
- FixupKind = *Extended ? Hexagon::fixup_Hexagon_IE_GOT_32_6_X
- : Hexagon::fixup_Hexagon_IE_GOT_32;
- break;
- case MCSymbolRefExpr::VK_Hexagon_LD_GOT:
- FixupKind = *Extended ? Hexagon::fixup_Hexagon_LD_GOT_32_6_X
- : Hexagon::fixup_Hexagon_LD_GOT_32;
- break;
- case MCSymbolRefExpr::VK_Hexagon_PCREL:
- FixupKind = Hexagon::fixup_Hexagon_32_PCREL;
- break;
- case MCSymbolRefExpr::VK_None:
- FixupKind =
- *Extended ? Hexagon::fixup_Hexagon_32_6_X : Hexagon::fixup_Hexagon_32;
- break;
- case MCSymbolRefExpr::VK_TPREL:
- FixupKind = *Extended ? Hexagon::fixup_Hexagon_TPREL_32_6_X
- : Hexagon::fixup_Hexagon_TPREL_32;
- break;
- default:
- raise_relocation_error(bits, kind);
- }
- break;
-
- case 22:
- switch (kind) {
- case MCSymbolRefExpr::VK_Hexagon_GD_PLT:
- FixupKind = *Extended ? Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL_X
- : Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL;
- break;
- case MCSymbolRefExpr::VK_Hexagon_LD_PLT:
- FixupKind = *Extended ? Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL_X
- : Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL;
- break;
- case MCSymbolRefExpr::VK_None:
- FixupKind = *Extended ? Hexagon::fixup_Hexagon_B22_PCREL_X
- : Hexagon::fixup_Hexagon_B22_PCREL;
- break;
- case MCSymbolRefExpr::VK_PLT:
- FixupKind = Hexagon::fixup_Hexagon_PLT_B22_PCREL;
- break;
- default:
- raise_relocation_error(bits, kind);
- }
- break;
-
- case 16:
- if (*Extended) {
- switch (kind) {
- case MCSymbolRefExpr::VK_DTPREL:
- FixupKind = Hexagon::fixup_Hexagon_DTPREL_16_X;
- break;
- case MCSymbolRefExpr::VK_GOT:
- FixupKind = Hexagon::fixup_Hexagon_GOT_16_X;
- break;
- case MCSymbolRefExpr::VK_GOTREL:
- FixupKind = Hexagon::fixup_Hexagon_GOTREL_16_X;
- break;
- case MCSymbolRefExpr::VK_Hexagon_GD_GOT:
- FixupKind = Hexagon::fixup_Hexagon_GD_GOT_16_X;
- break;
- case MCSymbolRefExpr::VK_Hexagon_IE:
- FixupKind = Hexagon::fixup_Hexagon_IE_16_X;
- break;
- case MCSymbolRefExpr::VK_Hexagon_IE_GOT:
- FixupKind = Hexagon::fixup_Hexagon_IE_GOT_16_X;
- break;
- case MCSymbolRefExpr::VK_Hexagon_LD_GOT:
- FixupKind = Hexagon::fixup_Hexagon_LD_GOT_16_X;
- break;
- case MCSymbolRefExpr::VK_None:
- FixupKind = Hexagon::fixup_Hexagon_16_X;
- break;
- case MCSymbolRefExpr::VK_TPREL:
- FixupKind = Hexagon::fixup_Hexagon_TPREL_16_X;
- break;
- default:
- raise_relocation_error(bits, kind);
- }
- } else
- switch (kind) {
- case MCSymbolRefExpr::VK_None:
- if (HexagonMCInstrInfo::s27_2_reloc(*MO.getExpr()))
- FixupKind = Hexagon::fixup_Hexagon_27_REG;
- else
- if (MCID.mayStore() || MCID.mayLoad()) {
- for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses;
- ++ImpUses) {
- if (*ImpUses != Hexagon::GP)
- continue;
- switch (HexagonMCInstrInfo::getMemAccessSize(MCII, MI)) {
- case 1:
- FixupKind = fixup_Hexagon_GPREL16_0;
- break;
- case 2:
- FixupKind = fixup_Hexagon_GPREL16_1;
- break;
- case 4:
- FixupKind = fixup_Hexagon_GPREL16_2;
- break;
- case 8:
- FixupKind = fixup_Hexagon_GPREL16_3;
- break;
- default:
- raise_relocation_error(bits, kind);
- }
- }
- } else
- raise_relocation_error(bits, kind);
- break;
- case MCSymbolRefExpr::VK_DTPREL:
- FixupKind = Hexagon::fixup_Hexagon_DTPREL_16;
- break;
- case MCSymbolRefExpr::VK_GOTREL:
- if (MCID.getOpcode() == Hexagon::HI)
- FixupKind = Hexagon::fixup_Hexagon_GOTREL_HI16;
- else
- FixupKind = Hexagon::fixup_Hexagon_GOTREL_LO16;
- break;
- case MCSymbolRefExpr::VK_Hexagon_GD_GOT:
- FixupKind = Hexagon::fixup_Hexagon_GD_GOT_16;
- break;
- case MCSymbolRefExpr::VK_Hexagon_GPREL:
- FixupKind = Hexagon::fixup_Hexagon_GPREL16_0;
- break;
- case MCSymbolRefExpr::VK_Hexagon_HI16:
- FixupKind = Hexagon::fixup_Hexagon_HI16;
- break;
- case MCSymbolRefExpr::VK_Hexagon_IE_GOT:
- FixupKind = Hexagon::fixup_Hexagon_IE_GOT_16;
- break;
- case MCSymbolRefExpr::VK_Hexagon_LD_GOT:
- FixupKind = Hexagon::fixup_Hexagon_LD_GOT_16;
- break;
- case MCSymbolRefExpr::VK_Hexagon_LO16:
- FixupKind = Hexagon::fixup_Hexagon_LO16;
- break;
- case MCSymbolRefExpr::VK_TPREL:
- FixupKind = Hexagon::fixup_Hexagon_TPREL_16;
- break;
- default:
- raise_relocation_error(bits, kind);
+ unsigned FixupWidth = HexagonMCInstrInfo::getExtentBits(MCII, MI) -
+ HexagonMCInstrInfo::getExtentAlignment(MCII, MI);
+ MCSymbolRefExpr::VariantKind VarKind = MCSRE->getKind();
+ unsigned Opc = MCID.getOpcode();
+ unsigned IType = HexagonMCInstrInfo::getType(MCII, MI);
+
+ LLVM_DEBUG(dbgs() << "----------------------------------------\n"
+ << "Opcode Name: " << HexagonMCInstrInfo::getName(MCII, MI)
+ << "\nOpcode: " << Opc << "\nRelocation bits: "
+ << FixupWidth << "\nAddend: " << State.Addend
+ << "\nVariant: " << unsigned(VarKind)
+ << "\n----------------------------------------\n");
+
+ // Pick the applicable fixup kind for the symbol.
+ // Handle special cases first, the rest will be looked up in the tables.
+
+ if (FixupWidth == 16 && !State.Extended) {
+ if (VarKind == MCSymbolRefExpr::VK_None) {
+ if (HexagonMCInstrInfo::s27_2_reloc(*MO.getExpr())) {
+ // A2_iconst.
+ FixupKind = Hexagon::fixup_Hexagon_27_REG;
+ } else {
+ // Look for GP-relative fixups.
+ unsigned Shift = HexagonMCInstrInfo::getExtentAlignment(MCII, MI);
+ static const Hexagon::Fixups GPRelFixups[] = {
+ Hexagon::fixup_Hexagon_GPREL16_0, Hexagon::fixup_Hexagon_GPREL16_1,
+ Hexagon::fixup_Hexagon_GPREL16_2, Hexagon::fixup_Hexagon_GPREL16_3
+ };
+ assert(Shift < array_lengthof(GPRelFixups));
+ auto UsesGP = [] (const MCInstrDesc &D) {
+ for (const MCPhysReg *U = D.getImplicitUses(); U && *U; ++U)
+ if (*U == Hexagon::GP)
+ return true;
+ return false;
+ };
+ if (UsesGP(MCID))
+ FixupKind = GPRelFixups[Shift];
}
- break;
-
- case 15:
- switch (kind) {
- case MCSymbolRefExpr::VK_None:
- FixupKind = *Extended ? Hexagon::fixup_Hexagon_B15_PCREL_X
- : Hexagon::fixup_Hexagon_B15_PCREL;
- break;
- default:
- raise_relocation_error(bits, kind);
+ } else if (VarKind == MCSymbolRefExpr::VK_GOTREL) {
+ // Select between LO/HI.
+ if (Opc == Hexagon::LO)
+ FixupKind = Hexagon::fixup_Hexagon_GOTREL_LO16;
+ else if (Opc == Hexagon::HI)
+ FixupKind = Hexagon::fixup_Hexagon_GOTREL_HI16;
}
- break;
-
- case 13:
- switch (kind) {
- case MCSymbolRefExpr::VK_None:
- FixupKind = Hexagon::fixup_Hexagon_B13_PCREL;
- break;
- default:
- raise_relocation_error(bits, kind);
- }
- break;
-
- case 12:
- if (*Extended)
- switch (kind) {
- // There isn't a GOT_12_X, both 11_X and 16_X resolve to 6/26
- case MCSymbolRefExpr::VK_GOT:
- FixupKind = Hexagon::fixup_Hexagon_GOT_16_X;
- break;
- case MCSymbolRefExpr::VK_GOTREL:
- FixupKind = Hexagon::fixup_Hexagon_GOTREL_16_X;
- break;
- case MCSymbolRefExpr::VK_None:
- FixupKind = Hexagon::fixup_Hexagon_12_X;
- break;
- default:
- raise_relocation_error(bits, kind);
- }
- else
- raise_relocation_error(bits, kind);
- break;
-
- case 11:
- if (*Extended)
- switch (kind) {
- case MCSymbolRefExpr::VK_DTPREL:
- FixupKind = Hexagon::fixup_Hexagon_DTPREL_11_X;
- break;
- case MCSymbolRefExpr::VK_GOT:
- FixupKind = Hexagon::fixup_Hexagon_GOT_11_X;
- break;
- case MCSymbolRefExpr::VK_GOTREL:
- FixupKind = Hexagon::fixup_Hexagon_GOTREL_11_X;
- break;
- case MCSymbolRefExpr::VK_Hexagon_GD_GOT:
- FixupKind = Hexagon::fixup_Hexagon_GD_GOT_11_X;
- break;
- case MCSymbolRefExpr::VK_Hexagon_IE_GOT:
- FixupKind = Hexagon::fixup_Hexagon_IE_GOT_11_X;
+ } else {
+ bool BranchOrCR = MCID.isBranch() || IType == HexagonII::TypeCR;
+ switch (FixupWidth) {
+ case 9:
+ if (BranchOrCR)
+ FixupKind = State.Extended ? Hexagon::fixup_Hexagon_B9_PCREL_X
+ : Hexagon::fixup_Hexagon_B9_PCREL;
+ break;
+ case 8:
+ case 7:
+ if (State.Extended && VarKind == MCSymbolRefExpr::VK_GOT)
+ FixupKind = HexagonMCInstrInfo::isExtentSigned(MCII, MI)
+ ? Hexagon::fixup_Hexagon_GOT_16_X
+ : Hexagon::fixup_Hexagon_GOT_11_X;
+ else if (FixupWidth == 7 && BranchOrCR)
+ FixupKind = State.Extended ? Hexagon::fixup_Hexagon_B7_PCREL_X
+ : Hexagon::fixup_Hexagon_B7_PCREL;
+ break;
+ case 0:
+ FixupKind = getFixupNoBits(MCII, MI, MO, VarKind);
break;
- case MCSymbolRefExpr::VK_Hexagon_LD_GOT:
- FixupKind = Hexagon::fixup_Hexagon_LD_GOT_11_X;
- break;
- case MCSymbolRefExpr::VK_Hexagon_GD_PLT:
- FixupKind = Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL_X;
- break;
- case MCSymbolRefExpr::VK_Hexagon_LD_PLT:
- FixupKind = Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL_X;
- break;
- case MCSymbolRefExpr::VK_None:
- FixupKind = Hexagon::fixup_Hexagon_11_X;
- break;
- case MCSymbolRefExpr::VK_TPREL:
- FixupKind = Hexagon::fixup_Hexagon_TPREL_11_X;
- break;
- default:
- raise_relocation_error(bits, kind);
- }
- else {
- switch (kind) {
- case MCSymbolRefExpr::VK_TPREL:
- FixupKind = Hexagon::fixup_Hexagon_TPREL_11_X;
- break;
- default:
- raise_relocation_error(bits, kind);
- }
}
- break;
+ }
- case 10:
- if (*Extended) {
- switch (kind) {
- case MCSymbolRefExpr::VK_None:
- FixupKind = Hexagon::fixup_Hexagon_10_X;
- break;
- default:
- raise_relocation_error(bits, kind);
- }
- } else
- raise_relocation_error(bits, kind);
- break;
-
- case 9:
- if (MCID.isBranch() ||
- (HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR))
- FixupKind = *Extended ? Hexagon::fixup_Hexagon_B9_PCREL_X
- : Hexagon::fixup_Hexagon_B9_PCREL;
- else if (*Extended)
- FixupKind = Hexagon::fixup_Hexagon_9_X;
- else
- raise_relocation_error(bits, kind);
- break;
-
- case 8:
- if (*Extended)
- FixupKind = Hexagon::fixup_Hexagon_8_X;
- else
- raise_relocation_error(bits, kind);
- break;
-
- case 7:
- if (MCID.isBranch() ||
- (HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR))
- FixupKind = *Extended ? Hexagon::fixup_Hexagon_B7_PCREL_X
- : Hexagon::fixup_Hexagon_B7_PCREL;
- else if (*Extended)
- FixupKind = Hexagon::fixup_Hexagon_7_X;
- else
- raise_relocation_error(bits, kind);
- break;
-
- case 6:
- if (*Extended) {
- switch (kind) {
- case MCSymbolRefExpr::VK_DTPREL:
- FixupKind = Hexagon::fixup_Hexagon_DTPREL_16_X;
- break;
- // This is part of an extender, GOT_11 is a
- // Word32_U6 unsigned/truncated reloc.
- case MCSymbolRefExpr::VK_GOT:
- FixupKind = Hexagon::fixup_Hexagon_GOT_11_X;
- break;
- case MCSymbolRefExpr::VK_GOTREL:
- FixupKind = Hexagon::fixup_Hexagon_GOTREL_11_X;
- break;
- case MCSymbolRefExpr::VK_Hexagon_PCREL:
- FixupKind = Hexagon::fixup_Hexagon_6_PCREL_X;
- break;
- case MCSymbolRefExpr::VK_TPREL:
- FixupKind = Hexagon::fixup_Hexagon_TPREL_16_X;
- break;
- case MCSymbolRefExpr::VK_None:
- FixupKind = Hexagon::fixup_Hexagon_6_X;
- break;
- default:
- raise_relocation_error(bits, kind);
- }
- } else
- raise_relocation_error(bits, kind);
- break;
+ if (FixupKind == fixup_Invalid) {
+ const auto &FixupTable = State.Extended ? ExtFixups : StdFixups;
- case 0:
- FixupKind = getFixupNoBits(MCII, MI, MO, kind);
- break;
+ auto FindVK = FixupTable.find(VarKind);
+ if (FindVK != FixupTable.end())
+ FixupKind = FindVK->second[FixupWidth];
}
- MCExpr const *FixupExpression =
- (*Addend > 0 && isPCRel(FixupKind))
- ? MCBinaryExpr::createAdd(MO.getExpr(),
- MCConstantExpr::create(*Addend, MCT), MCT)
- : MO.getExpr();
+ if (FixupKind == fixup_Invalid)
+ raise_relocation_error(FixupWidth, VarKind);
- MCFixup fixup = MCFixup::create(*Addend, FixupExpression,
+ const MCExpr *FixupExpr = MO.getExpr();
+ if (State.Addend != 0 && isPCRel(FixupKind)) {
+ const MCExpr *C = MCConstantExpr::create(State.Addend, MCT);
+ FixupExpr = MCBinaryExpr::createAdd(FixupExpr, C, MCT);
+ }
+
+ MCFixup Fixup = MCFixup::create(State.Addend, FixupExpr,
MCFixupKind(FixupKind), MI.getLoc());
- Fixups.push_back(fixup);
+ Fixups.push_back(Fixup);
// All of the information is in the fixup.
return 0;
}
@@ -739,55 +732,55 @@ HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO,
#endif
if (HexagonMCInstrInfo::isNewValue(MCII, MI) &&
- &MO == &MI.getOperand(HexagonMCInstrInfo::getNewValueOp(MCII, MI))) {
+ &MO == &HexagonMCInstrInfo::getNewValueOperand(MCII, MI)) {
// Calculate the new value distance to the associated producer
- MCOperand const &MCO =
- MI.getOperand(HexagonMCInstrInfo::getNewValueOp(MCII, MI));
unsigned SOffset = 0;
unsigned VOffset = 0;
- unsigned Register = MCO.getReg();
- unsigned Register1;
- unsigned Register2;
- auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
- auto i = Instructions.begin() + *CurrentIndex - 1;
- for (;; --i) {
- assert(i != Instructions.begin() - 1 && "Couldn't find producer");
- MCInst const &Inst = *i->getInst();
+ unsigned UseReg = MO.getReg();
+ unsigned DefReg1, DefReg2;
+
+ auto Instrs = HexagonMCInstrInfo::bundleInstructions(*State.Bundle);
+ const MCOperand *I = Instrs.begin() + State.Index - 1;
+
+ for (;; --I) {
+ assert(I != Instrs.begin() - 1 && "Couldn't find producer");
+ MCInst const &Inst = *I->getInst();
if (HexagonMCInstrInfo::isImmext(Inst))
continue;
+
+ DefReg1 = DefReg2 = 0;
++SOffset;
- if (HexagonMCInstrInfo::isVector(MCII, Inst))
- // Vector instructions don't count scalars
+ if (HexagonMCInstrInfo::isVector(MCII, Inst)) {
+ // Vector instructions don't count scalars.
++VOffset;
- Register1 =
- HexagonMCInstrInfo::hasNewValue(MCII, Inst)
- ? HexagonMCInstrInfo::getNewValueOperand(MCII, Inst).getReg()
- : static_cast<unsigned>(Hexagon::NoRegister);
- Register2 =
- HexagonMCInstrInfo::hasNewValue2(MCII, Inst)
- ? HexagonMCInstrInfo::getNewValueOperand2(MCII, Inst).getReg()
- : static_cast<unsigned>(Hexagon::NoRegister);
- if (!RegisterMatches(Register, Register1, Register2))
+ }
+ if (HexagonMCInstrInfo::hasNewValue(MCII, Inst))
+ DefReg1 = HexagonMCInstrInfo::getNewValueOperand(MCII, Inst).getReg();
+ if (HexagonMCInstrInfo::hasNewValue2(MCII, Inst))
+ DefReg2 = HexagonMCInstrInfo::getNewValueOperand2(MCII, Inst).getReg();
+ if (!RegisterMatches(UseReg, DefReg1, DefReg2)) {
// This isn't the register we're looking for
continue;
- if (!HexagonMCInstrInfo::isPredicated(MCII, Inst))
+ }
+ if (!HexagonMCInstrInfo::isPredicated(MCII, Inst)) {
// Producer is unpredicated
break;
+ }
assert(HexagonMCInstrInfo::isPredicated(MCII, MI) &&
- "Unpredicated consumer depending on predicated producer");
+ "Unpredicated consumer depending on predicated producer");
if (HexagonMCInstrInfo::isPredicatedTrue(MCII, Inst) ==
- HexagonMCInstrInfo::isPredicatedTrue(MCII, MI))
- // Producer predicate sense matched ours
+ HexagonMCInstrInfo::isPredicatedTrue(MCII, MI))
+ // Producer predicate sense matched ours.
break;
}
// Hexagon PRM 10.11 Construct Nt from distance
- unsigned Offset =
- HexagonMCInstrInfo::isVector(MCII, MI) ? VOffset : SOffset;
+ unsigned Offset = HexagonMCInstrInfo::isVector(MCII, MI) ? VOffset
+ : SOffset;
Offset <<= 1;
- Offset |=
- HexagonMCInstrInfo::SubregisterBit(Register, Register1, Register2);
+ Offset |= HexagonMCInstrInfo::SubregisterBit(UseReg, DefReg1, DefReg2);
return Offset;
}
+
assert(!MO.isImm());
if (MO.isReg()) {
unsigned Reg = MO.getReg();
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
index 14cabf1534a5..fcea63db23a3 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// \brief Definition for classes that emit Hexagon machine code from MCInsts
+/// Definition for classes that emit Hexagon machine code from MCInsts
///
//===----------------------------------------------------------------------===//
@@ -35,25 +35,20 @@ class raw_ostream;
class HexagonMCCodeEmitter : public MCCodeEmitter {
MCContext &MCT;
MCInstrInfo const &MCII;
- std::unique_ptr<unsigned> Addend;
- std::unique_ptr<bool> Extended;
- std::unique_ptr<MCInst const *> CurrentBundle;
- std::unique_ptr<size_t> CurrentIndex;
- // helper routine for getMachineOpValue()
- unsigned getExprOpValue(const MCInst &MI, const MCOperand &MO,
- const MCExpr *ME, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
- const MCOperand &MO,
- const MCSymbolRefExpr::VariantKind kind) const;
+ // A mutable state of the emitter when encoding bundles and duplexes.
+ struct EmitterState {
+ unsigned Addend = 0;
+ bool Extended = false;
+ bool SubInst1 = false;
+ const MCInst *Bundle = nullptr;
+ size_t Index = 0;
+ };
+ mutable EmitterState State;
public:
- HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCContext &aMCT);
-
- // Return parse bits for instruction `MCI' inside bundle `MCB'
- uint32_t parseBits(size_t Last, MCInst const &MCB, MCInst const &MCI) const;
+ HexagonMCCodeEmitter(MCInstrInfo const &MII, MCContext &MCT)
+ : MCT(MCT), MCII(MII) {}
void encodeInstruction(MCInst const &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
@@ -64,18 +59,30 @@ public:
const MCSubtargetInfo &STI,
uint32_t Parse) const;
- // \brief TableGen'erated function for getting the
+ // TableGen'erated function for getting the
// binary encoding for an instruction.
uint64_t getBinaryCodeForInstr(MCInst const &MI,
SmallVectorImpl<MCFixup> &Fixups,
MCSubtargetInfo const &STI) const;
- /// \brief Return binary encoding of operand.
+ /// Return binary encoding of operand.
unsigned getMachineOpValue(MCInst const &MI, MCOperand const &MO,
SmallVectorImpl<MCFixup> &Fixups,
MCSubtargetInfo const &STI) const;
private:
+ // helper routine for getMachineOpValue()
+ unsigned getExprOpValue(const MCInst &MI, const MCOperand &MO,
+ const MCExpr *ME, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
+ const MCOperand &MO,
+ const MCSymbolRefExpr::VariantKind Kind) const;
+
+ // Return parse bits for instruction `MCI' inside bundle `MCB'
+ uint32_t parseBits(size_t Last, MCInst const &MCB, MCInst const &MCI) const;
+
uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
void verifyInstructionPredicates(const MCInst &MI,
uint64_t AvailableFeatures) const;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index 127c97e342dc..3eaef9ac7410 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -205,7 +205,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
switch (L.getOpcode()) {
default:
- DEBUG(dbgs() << "Possible compound ignored\n");
+ LLVM_DEBUG(dbgs() << "Possible compound ignored\n");
return CompoundInsn;
case Hexagon::A2_tfrsi:
@@ -233,7 +233,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
break;
case Hexagon::C2_cmpeq:
- DEBUG(dbgs() << "CX: C2_cmpeq\n");
+ LLVM_DEBUG(dbgs() << "CX: C2_cmpeq\n");
Rs = L.getOperand(1);
Rt = L.getOperand(2);
@@ -246,7 +246,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
break;
case Hexagon::C2_cmpgt:
- DEBUG(dbgs() << "CX: C2_cmpgt\n");
+ LLVM_DEBUG(dbgs() << "CX: C2_cmpgt\n");
Rs = L.getOperand(1);
Rt = L.getOperand(2);
@@ -259,7 +259,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
break;
case Hexagon::C2_cmpgtu:
- DEBUG(dbgs() << "CX: C2_cmpgtu\n");
+ LLVM_DEBUG(dbgs() << "CX: C2_cmpgtu\n");
Rs = L.getOperand(1);
Rt = L.getOperand(2);
@@ -272,7 +272,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
break;
case Hexagon::C2_cmpeqi:
- DEBUG(dbgs() << "CX: C2_cmpeqi\n");
+ LLVM_DEBUG(dbgs() << "CX: C2_cmpeqi\n");
Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
(void)Success;
assert(Success);
@@ -290,7 +290,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
break;
case Hexagon::C2_cmpgti:
- DEBUG(dbgs() << "CX: C2_cmpgti\n");
+ LLVM_DEBUG(dbgs() << "CX: C2_cmpgti\n");
Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
(void)Success;
assert(Success);
@@ -308,7 +308,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
break;
case Hexagon::C2_cmpgtui:
- DEBUG(dbgs() << "CX: C2_cmpgtui\n");
+ LLVM_DEBUG(dbgs() << "CX: C2_cmpgtui\n");
Rs = L.getOperand(1);
compoundOpcode = cmpgtuiBitOpcode[getCompoundOp(R)];
CompoundInsn = new (Context) MCInst;
@@ -319,7 +319,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L,
break;
case Hexagon::S2_tstbit_i:
- DEBUG(dbgs() << "CX: S2_tstbit_i\n");
+ LLVM_DEBUG(dbgs() << "CX: S2_tstbit_i\n");
Rs = L.getOperand(1);
compoundOpcode = tstBitOpcode[getCompoundOp(R)];
CompoundInsn = new (Context) MCInst;
@@ -372,14 +372,14 @@ static bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context,
BExtended = true;
continue;
}
- DEBUG(dbgs() << "J,B: " << JumpInst->getOpcode() << ","
- << Inst->getOpcode() << "\n");
+ LLVM_DEBUG(dbgs() << "J,B: " << JumpInst->getOpcode() << ","
+ << Inst->getOpcode() << "\n");
if (isOrderedCompoundPair(*Inst, BExtended, *JumpInst, JExtended)) {
MCInst *CompoundInsn = getCompoundInsn(Context, *Inst, *JumpInst);
if (CompoundInsn) {
- DEBUG(dbgs() << "B: " << Inst->getOpcode() << ","
- << JumpInst->getOpcode() << " Compounds to "
- << CompoundInsn->getOpcode() << "\n");
+ LLVM_DEBUG(dbgs() << "B: " << Inst->getOpcode() << ","
+ << JumpInst->getOpcode() << " Compounds to "
+ << CompoundInsn->getOpcode() << "\n");
J->setInst(CompoundInsn);
MCI.erase(B);
return true;
@@ -422,7 +422,7 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo co
if (StartedValid &&
!llvm::HexagonMCShuffle(Context, false, MCII, STI, MCI)) {
- DEBUG(dbgs() << "Found ERROR\n");
+ LLVM_DEBUG(dbgs() << "Found ERROR\n");
MCI = OriginalBundle;
}
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
index 4c18af60efd1..b208a3668124 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -263,12 +263,10 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
break;
case Hexagon::L4_return:
-
case Hexagon::L2_deallocframe:
-
return HexagonII::HSIG_L2;
- case Hexagon::EH_RETURN_JMPR:
+ case Hexagon::EH_RETURN_JMPR:
case Hexagon::J2_jumpr:
case Hexagon::PS_jmpret:
// jumpr r31
@@ -789,12 +787,12 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
addOps(Result, Inst, 2);
break; // 1,3 SUBInst $Rdd = combine(#2, #$u2)
}
+ break;
case Hexagon::A4_combineir:
Result.setOpcode(Hexagon::SA1_combinezr);
addOps(Result, Inst, 0);
addOps(Result, Inst, 2);
break; // 1,3 SUBInst $Rdd = combine(#0, $Rs)
-
case Hexagon::A4_combineri:
Result.setOpcode(Hexagon::SA1_combinerz);
addOps(Result, Inst, 0);
@@ -901,6 +899,7 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
addOps(Result, Inst, 1);
break; // 2 1,2 SUBInst memb($Rs + #$u4_0)=#1
}
+ break;
case Hexagon::S2_storerb_io:
Result.setOpcode(Hexagon::SS1_storeb_io);
addOps(Result, Inst, 0);
@@ -937,6 +936,7 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
addOps(Result, Inst, 2);
break; // 1 2,3 SUBInst memw(r29 + #$u5_2) = $Rt
}
+ break;
case Hexagon::S2_storeri_io:
if (Inst.getOperand(0).getReg() == Hexagon::R29) {
Result.setOpcode(Hexagon::SS2_storew_sp);
@@ -1045,8 +1045,8 @@ HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII,
bool bisReversable = true;
if (isStoreInst(MCB.getOperand(j).getInst()->getOpcode()) &&
isStoreInst(MCB.getOperand(k).getInst()->getOpcode())) {
- DEBUG(dbgs() << "skip out of order write pair: " << k << "," << j
- << "\n");
+ LLVM_DEBUG(dbgs() << "skip out of order write pair: " << k << "," << j
+ << "\n");
bisReversable = false;
}
if (HexagonMCInstrInfo::isMemReorderDisabled(MCB)) // }:mem_noshuf
@@ -1066,14 +1066,14 @@ HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII,
// Save off pairs for duplex checking.
duplexToTry.push_back(DuplexCandidate(j, k, iClass));
- DEBUG(dbgs() << "adding pair: " << j << "," << k << ":"
- << MCB.getOperand(j).getInst()->getOpcode() << ","
- << MCB.getOperand(k).getInst()->getOpcode() << "\n");
+ LLVM_DEBUG(dbgs() << "adding pair: " << j << "," << k << ":"
+ << MCB.getOperand(j).getInst()->getOpcode() << ","
+ << MCB.getOperand(k).getInst()->getOpcode() << "\n");
continue;
} else {
- DEBUG(dbgs() << "skipping pair: " << j << "," << k << ":"
- << MCB.getOperand(j).getInst()->getOpcode() << ","
- << MCB.getOperand(k).getInst()->getOpcode() << "\n");
+ LLVM_DEBUG(dbgs() << "skipping pair: " << j << "," << k << ":"
+ << MCB.getOperand(j).getInst()->getOpcode() << ","
+ << MCB.getOperand(k).getInst()->getOpcode() << "\n");
}
// Try reverse.
@@ -1091,13 +1091,15 @@ HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII,
// Save off pairs for duplex checking.
duplexToTry.push_back(DuplexCandidate(k, j, iClass));
- DEBUG(dbgs() << "adding pair:" << k << "," << j << ":"
- << MCB.getOperand(j).getInst()->getOpcode() << ","
- << MCB.getOperand(k).getInst()->getOpcode() << "\n");
+ LLVM_DEBUG(dbgs()
+ << "adding pair:" << k << "," << j << ":"
+ << MCB.getOperand(j).getInst()->getOpcode() << ","
+ << MCB.getOperand(k).getInst()->getOpcode() << "\n");
} else {
- DEBUG(dbgs() << "skipping pair: " << k << "," << j << ":"
- << MCB.getOperand(j).getInst()->getOpcode() << ","
- << MCB.getOperand(k).getInst()->getOpcode() << "\n");
+ LLVM_DEBUG(dbgs()
+ << "skipping pair: " << k << "," << j << ":"
+ << MCB.getOperand(j).getInst()->getOpcode() << ","
+ << MCB.getOperand(k).getInst()->getOpcode() << "\n");
}
}
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index 691e269cb91f..f304bc50530f 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -25,6 +25,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
@@ -47,15 +48,15 @@ static cl::opt<unsigned> GPSize
HexagonMCELFStreamer::HexagonMCELFStreamer(
MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
- raw_pwrite_stream &OS, std::unique_ptr<MCCodeEmitter> Emitter)
- : MCELFStreamer(Context, std::move(TAB), OS, std::move(Emitter)),
+ std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter)
+ : MCELFStreamer(Context, std::move(TAB), std::move(OW), std::move(Emitter)),
MCII(createHexagonMCInstrInfo()) {}
HexagonMCELFStreamer::HexagonMCELFStreamer(
MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
- raw_pwrite_stream &OS, std::unique_ptr<MCCodeEmitter> Emitter,
+ std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter,
MCAssembler *Assembler)
- : MCELFStreamer(Context, std::move(TAB), OS, std::move(Emitter)),
+ : MCELFStreamer(Context, std::move(TAB), std::move(OW), std::move(Emitter)),
MCII(createHexagonMCInstrInfo()) {}
void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCB,
@@ -63,21 +64,6 @@ void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCB,
assert(MCB.getOpcode() == Hexagon::BUNDLE);
assert(HexagonMCInstrInfo::bundleSize(MCB) <= HEXAGON_PACKET_SIZE);
assert(HexagonMCInstrInfo::bundleSize(MCB) > 0);
- bool Extended = false;
- for (auto &I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
- MCInst *MCI = const_cast<MCInst *>(I.getInst());
- if (Extended) {
- if (HexagonMCInstrInfo::isDuplex(*MCII, *MCI)) {
- MCInst *SubInst = const_cast<MCInst *>(MCI->getOperand(1).getInst());
- HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *SubInst);
- } else {
- HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *MCI);
- }
- Extended = false;
- } else {
- Extended = HexagonMCInstrInfo::isImmext(*MCI);
- }
- }
// At this point, MCB is a bundle
// Iterate through the bundle and assign addends for the instructions
@@ -124,7 +110,7 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol,
MCSectionSubPair P = getCurrentSection();
SwitchSection(&Section);
- if (ELFSymbol->isUndefined(false)) {
+ if (ELFSymbol->isUndefined()) {
EmitValueToAlignment(ByteAlignment, 0, 1, 0);
EmitLabel(Symbol);
EmitZeros(Size);
@@ -166,9 +152,10 @@ void HexagonMCELFStreamer::HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol,
namespace llvm {
MCStreamer *createHexagonELFStreamer(Triple const &TT, MCContext &Context,
std::unique_ptr<MCAsmBackend> MAB,
- raw_pwrite_stream &OS,
+ std::unique_ptr<MCObjectWriter> OW,
std::unique_ptr<MCCodeEmitter> CE) {
- return new HexagonMCELFStreamer(Context, std::move(MAB), OS, std::move(CE));
+ return new HexagonMCELFStreamer(Context, std::move(MAB), std::move(OW),
+ std::move(CE));
}
} // end namespace llvm
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
index c6fa0021d86b..c02bef8f06f7 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
@@ -23,11 +23,11 @@ class HexagonMCELFStreamer : public MCELFStreamer {
public:
HexagonMCELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
- raw_pwrite_stream &OS,
+ std::unique_ptr<MCObjectWriter> OW,
std::unique_ptr<MCCodeEmitter> Emitter);
HexagonMCELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,
- raw_pwrite_stream &OS,
+ std::unique_ptr<MCObjectWriter> OW,
std::unique_ptr<MCCodeEmitter> Emitter,
MCAssembler *Assembler);
@@ -43,7 +43,7 @@ public:
MCStreamer *createHexagonELFStreamer(Triple const &TT, MCContext &Context,
std::unique_ptr<MCAsmBackend> MAB,
- raw_pwrite_stream &OS,
+ std::unique_ptr<MCObjectWriter> OW,
std::unique_ptr<MCCodeEmitter> CE);
} // end namespace llvm
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index 19308cd425e8..a11aa92ccbe1 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -158,23 +158,6 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII,
return true;
}
-void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII,
- MCContext &Context, MCInst &MCI) {
- assert(HexagonMCInstrInfo::isExtendable(MCII, MCI) ||
- HexagonMCInstrInfo::isExtended(MCII, MCI));
- MCOperand &exOp =
- MCI.getOperand(HexagonMCInstrInfo::getExtendableOp(MCII, MCI));
- // If the extended value is a constant, then use it for the extended and
- // for the extender instructions, masking off the lower 6 bits and
- // including the assumed bits.
- int64_t Value;
- if (exOp.getExpr()->evaluateAsAbsolute(Value)) {
- unsigned Shift = HexagonMCInstrInfo::getExtentAlignment(MCII, MCI);
- exOp.setExpr(HexagonMCExpr::create(
- MCConstantExpr::create((Value & 0x3f) << Shift, Context), Context));
- }
-}
-
MCInst HexagonMCInstrInfo::deriveExtender(MCInstrInfo const &MCII,
MCInst const &Inst,
MCOperand const &MO) {
@@ -330,16 +313,19 @@ unsigned HexagonMCInstrInfo::getExtentBits(MCInstrInfo const &MCII,
return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask);
}
+bool HexagonMCInstrInfo::isExtentSigned(MCInstrInfo const &MCII,
+ MCInst const &MCI) {
+ const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+ return (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
+}
+
/// Return the maximum value of an extendable operand.
int HexagonMCInstrInfo::getMaxValue(MCInstrInfo const &MCII,
MCInst const &MCI) {
- const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- bool S = (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
-
assert(HexagonMCInstrInfo::isExtendable(MCII, MCI) ||
HexagonMCInstrInfo::isExtended(MCII, MCI));
- if (S) // if value is signed
+ if (HexagonMCInstrInfo::isExtentSigned(MCII, MCI)) // if value is signed
return (1 << (HexagonMCInstrInfo::getExtentBits(MCII, MCI) - 1)) - 1;
return (1 << HexagonMCInstrInfo::getExtentBits(MCII, MCI)) - 1;
}
@@ -347,13 +333,10 @@ int HexagonMCInstrInfo::getMaxValue(MCInstrInfo const &MCII,
/// Return the minimum value of an extendable operand.
int HexagonMCInstrInfo::getMinValue(MCInstrInfo const &MCII,
MCInst const &MCI) {
- const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
- bool S = (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
-
assert(HexagonMCInstrInfo::isExtendable(MCII, MCI) ||
HexagonMCInstrInfo::isExtended(MCII, MCI));
- if (S) // if value is signed
+ if (HexagonMCInstrInfo::isExtentSigned(MCII, MCI)) // if value is signed
return -(1 << (HexagonMCInstrInfo::getExtentBits(MCII, MCI) - 1));
return 0;
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
index 28d89429266b..d040bea23b6d 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -103,9 +103,6 @@ MCInst deriveExtender(MCInstrInfo const &MCII, MCInst const &Inst,
// Convert this instruction in to a duplex subinst
MCInst deriveSubInst(MCInst const &Inst);
-// Clamp off upper 26 bits of extendable operand for emission
-void clampExtended(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI);
-
// Return the extender for instruction at Index or nullptr if none
MCInst const *extenderForIndex(MCInst const &MCB, size_t Index);
void extendIfNeeded(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB,
@@ -143,6 +140,9 @@ unsigned getExtentAlignment(MCInstrInfo const &MCII, MCInst const &MCI);
// Return the number of logical bits of the extendable operand
unsigned getExtentBits(MCInstrInfo const &MCII, MCInst const &MCI);
+// Check if the extendable operand is signed.
+bool isExtentSigned(MCInstrInfo const &MCII, MCInst const &MCI);
+
// Return the max value that a constant extendable operand can have
// without being extended.
int getMaxValue(MCInstrInfo const &MCII, MCInst const &MCI);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
index 7bd54fdfa3d5..4281144acaee 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp
@@ -38,7 +38,8 @@ void HexagonMCShuffler::init(MCInst &MCB) {
// Copy the bundle for the shuffling.
for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
MCInst &MI = *const_cast<MCInst *>(I.getInst());
- DEBUG(dbgs() << "Shuffling: " << MCII.getName(MI.getOpcode()) << '\n');
+ LLVM_DEBUG(dbgs() << "Shuffling: " << MCII.getName(MI.getOpcode())
+ << '\n');
assert(!HexagonMCInstrInfo::getDesc(MCII, MI).isPseudo());
if (!HexagonMCInstrInfo::isImmext(MI)) {
@@ -98,7 +99,7 @@ bool HexagonMCShuffler::reshuffleTo(MCInst &MCB) {
copyTo(MCB);
return true;
}
- DEBUG(MCB.dump());
+ LLVM_DEBUG(MCB.dump());
return false;
}
@@ -119,10 +120,10 @@ bool llvm::HexagonMCShuffle(MCContext &Context, bool Fatal,
// * %d7 = IMPLICIT_DEF; flags:
// After the IMPLICIT_DEFs were removed by the asm printer, the bundle
// became empty.
- DEBUG(dbgs() << "Skipping empty bundle");
+ LLVM_DEBUG(dbgs() << "Skipping empty bundle");
return false;
} else if (!HexagonMCInstrInfo::isBundle(MCB)) {
- DEBUG(dbgs() << "Skipping stand-alone insn");
+ LLVM_DEBUG(dbgs() << "Skipping stand-alone insn");
return false;
}
@@ -144,10 +145,10 @@ llvm::HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII,
// * %d7 = IMPLICIT_DEF; flags:
// After the IMPLICIT_DEFs were removed by the asm printer, the bundle
// became empty.
- DEBUG(dbgs() << "Skipping empty bundle");
+ LLVM_DEBUG(dbgs() << "Skipping empty bundle");
return false;
} else if (!HexagonMCInstrInfo::isBundle(MCB)) {
- DEBUG(dbgs() << "Skipping stand-alone insn");
+ LLVM_DEBUG(dbgs() << "Skipping stand-alone insn");
return false;
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 3fbe2197f937..b211a81524fb 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -29,6 +29,7 @@
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -147,7 +148,7 @@ public:
auto PacketBundle = Contents.rsplit('\n');
auto HeadTail = PacketBundle.first.split('\n');
StringRef Separator = "\n";
- StringRef Indent = "\t\t";
+ StringRef Indent = "\t";
OS << "\t{\n";
while (!HeadTail.first.empty()) {
StringRef InstTxt;
@@ -164,7 +165,7 @@ public:
}
if (HexagonMCInstrInfo::isMemReorderDisabled(Inst))
- OS << "\n\t}:mem_noshuf" << PacketBundle.second;
+ OS << "\n\t} :mem_noshuf" << PacketBundle.second;
else
OS << "\t}" << PacketBundle.second;
}
@@ -248,10 +249,10 @@ createMCAsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS,
static MCStreamer *createMCStreamer(Triple const &T, MCContext &Context,
std::unique_ptr<MCAsmBackend> &&MAB,
- raw_pwrite_stream &OS,
+ std::unique_ptr<MCObjectWriter> &&OW,
std::unique_ptr<MCCodeEmitter> &&Emitter,
bool RelaxAll) {
- return createHexagonELFStreamer(T, Context, std::move(MAB), OS,
+ return createHexagonELFStreamer(T, Context, std::move(MAB), std::move(OW),
std::move(Emitter));
}
@@ -308,6 +309,7 @@ static bool isCPUValid(std::string CPU)
{
std::vector<std::string> table
{
+ "generic",
"hexagonv4",
"hexagonv5",
"hexagonv55",
@@ -342,8 +344,7 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) {
break;
}
bool UseHvx = false;
- for (unsigned F : {ExtensionHVX, ExtensionHVX64B, ExtensionHVX128B,
- ExtensionHVXDbl}) {
+ for (unsigned F : {ExtensionHVX, ExtensionHVX64B, ExtensionHVX128B}) {
if (!FB.test(F))
continue;
UseHvx = true;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 05d17c368dcc..6cd1b3a4691f 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -27,7 +27,7 @@ class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
-class MCObjectWriter;
+class MCObjectTargetWriter;
class MCRegisterInfo;
class MCSubtargetInfo;
class MCTargetOptions;
@@ -61,13 +61,12 @@ MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII,
MCContext &MCT);
MCAsmBackend *createHexagonAsmBackend(const Target &T,
+ const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU,
const MCTargetOptions &Options);
-std::unique_ptr<MCObjectWriter>
-createHexagonELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
- StringRef CPU);
+std::unique_ptr<MCObjectTargetWriter>
+createHexagonELFObjectWriter(uint8_t OSABI, StringRef CPU);
unsigned HexagonGetLastSlot();
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 7709a0f61624..59f3caa6af94 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -641,14 +641,14 @@ bool HexagonShuffler::shuffle() {
}
for (iterator ISJ = begin(); ISJ != end(); ++ISJ)
- DEBUG(dbgs().write_hex(ISJ->Core.getUnits()); if (ISJ->CVI.isValid()) {
+ LLVM_DEBUG(dbgs().write_hex(ISJ->Core.getUnits()); if (ISJ->CVI.isValid()) {
dbgs() << '/';
dbgs().write_hex(ISJ->CVI.getUnits()) << '|';
dbgs() << ISJ->CVI.getLanes();
} dbgs() << ':'
<< HexagonMCInstrInfo::getDesc(MCII, ISJ->getDesc()).getOpcode();
- dbgs() << '\n');
- DEBUG(dbgs() << '\n');
+ dbgs() << '\n');
+ LLVM_DEBUG(dbgs() << '\n');
return Ok;
}
diff --git a/lib/Target/Hexagon/RDFCopy.cpp b/lib/Target/Hexagon/RDFCopy.cpp
index f8c766ac972c..4339fa2089d9 100644
--- a/lib/Target/Hexagon/RDFCopy.cpp
+++ b/lib/Target/Hexagon/RDFCopy.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -103,7 +104,7 @@ bool CopyPropagation::run() {
if (trace()) {
dbgs() << "Copies:\n";
- for (auto I : Copies) {
+ for (NodeId I : Copies) {
dbgs() << "Instr: " << *DFG.addr<StmtNode*>(I).Addr->getCode();
dbgs() << " eq: {";
for (auto J : CopyMap[I])
@@ -130,7 +131,7 @@ bool CopyPropagation::run() {
return 0;
};
- for (auto C : Copies) {
+ for (NodeId C : Copies) {
#ifndef NDEBUG
if (HasLimit && CpCount >= CpLimit)
break;
diff --git a/lib/Target/Hexagon/RDFDeadCode.cpp b/lib/Target/Hexagon/RDFDeadCode.cpp
index 240d7c355bc7..da339bfd3ff4 100644
--- a/lib/Target/Hexagon/RDFDeadCode.cpp
+++ b/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -214,7 +214,7 @@ bool DeadCodeElimination::erase(const SetVector<NodeId> &Nodes) {
return false;
return A.Id < B.Id;
};
- std::sort(DRNs.begin(), DRNs.end(), UsesFirst);
+ llvm::sort(DRNs.begin(), DRNs.end(), UsesFirst);
if (trace())
dbgs() << "Removing dead ref nodes:\n";
diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp
index d1f6e5a4c8ef..3d1ec31dada7 100644
--- a/lib/Target/Hexagon/RDFGraph.cpp
+++ b/lib/Target/Hexagon/RDFGraph.cpp
@@ -893,7 +893,7 @@ void DataFlowGraph::build(unsigned Options) {
NodeAddr<BlockNode*> BA = newBlock(Func, &B);
BlockNodes.insert(std::make_pair(&B, BA));
for (MachineInstr &I : B) {
- if (I.isDebugValue())
+ if (I.isDebugInstr())
continue;
buildStmt(BA, I);
}
@@ -1471,7 +1471,7 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs,
// and add a def for each S in the closure.
// Sort the refs so that the phis will be created in a deterministic order.
- std::sort(MaxRefs.begin(), MaxRefs.end());
+ llvm::sort(MaxRefs.begin(), MaxRefs.end());
// Remove duplicates.
auto NewEnd = std::unique(MaxRefs.begin(), MaxRefs.end());
MaxRefs.erase(NewEnd, MaxRefs.end());
diff --git a/lib/Target/Hexagon/RDFLiveness.cpp b/lib/Target/Hexagon/RDFLiveness.cpp
index 13d9a1741978..c257d754ddf9 100644
--- a/lib/Target/Hexagon/RDFLiveness.cpp
+++ b/lib/Target/Hexagon/RDFLiveness.cpp
@@ -207,7 +207,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
};
std::vector<NodeId> Tmp(Owners.begin(), Owners.end());
- std::sort(Tmp.begin(), Tmp.end(), Less);
+ llvm::sort(Tmp.begin(), Tmp.end(), Less);
// The vector is a list of instructions, so that defs coming from
// the same instruction don't need to be artificially ordered.
@@ -628,7 +628,7 @@ void Liveness::computePhiInfo() {
// Collect the set PropUp of uses that are reached by the current
// phi PA, and are not covered by any intervening def between the
- // currently visited use UA and the the upward phi P.
+ // currently visited use UA and the upward phi P.
if (MidDefs.hasCoverOf(UR))
continue;
@@ -813,7 +813,7 @@ void Liveness::computeLiveIns() {
std::vector<RegisterRef> LV;
for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I)
LV.push_back(RegisterRef(I->PhysReg, I->LaneMask));
- std::sort(LV.begin(), LV.end());
+ llvm::sort(LV.begin(), LV.end());
dbgs() << printMBBReference(B) << "\t rec = {";
for (auto I : LV)
dbgs() << ' ' << Print<RegisterRef>(I, DFG);
@@ -824,7 +824,7 @@ void Liveness::computeLiveIns() {
const RegisterAggr &LG = LiveMap[&B];
for (auto I = LG.rr_begin(), E = LG.rr_end(); I != E; ++I)
LV.push_back(*I);
- std::sort(LV.begin(), LV.end());
+ llvm::sort(LV.begin(), LV.end());
dbgs() << "\tcomp = {";
for (auto I : LV)
dbgs() << ' ' << Print<RegisterRef>(I, DFG);
@@ -880,7 +880,7 @@ void Liveness::resetKills(MachineBasicBlock *B) {
for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) {
MachineInstr *MI = &*I;
- if (MI->isDebugValue())
+ if (MI->isDebugInstr())
continue;
MI->clearKillInfo();
diff --git a/lib/Target/Hexagon/RDFLiveness.h b/lib/Target/Hexagon/RDFLiveness.h
index 8cfb6a1e9554..eaeb4ea115b3 100644
--- a/lib/Target/Hexagon/RDFLiveness.h
+++ b/lib/Target/Hexagon/RDFLiveness.h
@@ -53,8 +53,8 @@ namespace rdf {
using RefMap = std::map<RegisterId, NodeRefSet>;
Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g)
- : DFG(g), TRI(g.getTRI()), PRI(g.getPRI()), MDT(g.getDT()),
- MDF(g.getDF()), LiveMap(g.getPRI()), NoRegs(g.getPRI()) {}
+ : DFG(g), TRI(g.getTRI()), PRI(g.getPRI()), MDT(g.getDT()),
+ MDF(g.getDF()), LiveMap(g.getPRI()), Empty(), NoRegs(g.getPRI()) {}
NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
bool TopShadows, bool FullChain, const RegisterAggr &DefRRs);
diff --git a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
index a330f27ed300..78e2f2b2ddb3 100644
--- a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
+++ b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
@@ -18,6 +18,6 @@ Target &llvm::getTheHexagonTarget() {
}
extern "C" void LLVMInitializeHexagonTargetInfo() {
- RegisterTarget<Triple::hexagon, /*HasJIT=*/false> X(
+ RegisterTarget<Triple::hexagon, /*HasJIT=*/true> X(
getTheHexagonTarget(), "hexagon", "Hexagon", "Hexagon");
}