diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 10:51:19 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 10:51:19 +0000 |
commit | eb11fae6d08f479c0799db45860a98af528fa6e7 (patch) | |
tree | 44d492a50c8c1a7eb8e2d17ea3360ec4d066f042 /lib/Target/Hexagon | |
parent | b8a2042aa938069e862750553db0e4d82d25822c (diff) | |
download | src-eb11fae6d08f479c0799db45860a98af528fa6e7.tar.gz src-eb11fae6d08f479c0799db45860a98af528fa6e7.zip |
Notes
Diffstat (limited to 'lib/Target/Hexagon')
93 files changed, 7457 insertions, 5352 deletions
diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index 387296c69c39..92bda224f3dc 100644 --- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -118,7 +118,6 @@ class HexagonAsmParser : public MCTargetAsmParser { bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; bool ParseDirectiveSubsection(SMLoc L); - bool ParseDirectiveValue(unsigned Size, SMLoc L); bool ParseDirectiveComm(bool IsLocal, SMLoc L); bool RegisterMatchesArch(unsigned MatchNum) const; @@ -165,6 +164,10 @@ public: MCB.setOpcode(Hexagon::BUNDLE); setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); + Parser.addAliasForDirective(".half", ".2byte"); + Parser.addAliasForDirective(".hword", ".2byte"); + Parser.addAliasForDirective(".word", ".4byte"); + MCAsmParserExtension::Initialize(_Parser); } @@ -462,9 +465,9 @@ void HexagonOperand::print(raw_ostream &OS) const { } bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) { - DEBUG(dbgs() << "Bundle:"); - DEBUG(MCB.dump_pretty(dbgs())); - DEBUG(dbgs() << "--\n"); + LLVM_DEBUG(dbgs() << "Bundle:"); + LLVM_DEBUG(MCB.dump_pretty(dbgs())); + LLVM_DEBUG(dbgs() << "--\n"); MCB.setLoc(IDLoc); // Check the bundle for errors. @@ -506,16 +509,19 @@ bool HexagonAsmParser::matchBundleOptions() { "supported with this architecture"; StringRef Option = Parser.getTok().getString(); auto IDLoc = Parser.getTok().getLoc(); - if (Option.compare_lower("endloop0") == 0) + if (Option.compare_lower("endloop01") == 0) { + HexagonMCInstrInfo::setInnerLoop(MCB); + HexagonMCInstrInfo::setOuterLoop(MCB); + } else if (Option.compare_lower("endloop0") == 0) { HexagonMCInstrInfo::setInnerLoop(MCB); - else if (Option.compare_lower("endloop1") == 0) + } else if (Option.compare_lower("endloop1") == 0) { HexagonMCInstrInfo::setOuterLoop(MCB); - else if (Option.compare_lower("mem_noshuf") == 0) + } else if (Option.compare_lower("mem_noshuf") == 0) { if (getSTI().getFeatureBits()[Hexagon::FeatureMemNoShuf]) HexagonMCInstrInfo::setMemReorderDisabled(MCB); else return getParser().Error(IDLoc, MemNoShuffMsg); - else + } else return getParser().Error(IDLoc, llvm::Twine("'") + Option + "' is not a valid bundle option"); Lex(); @@ -554,9 +560,9 @@ bool HexagonAsmParser::matchOneInstruction(MCInst &MCI, SMLoc IDLoc, canonicalizeImmediates(MCI); result = processInstruction(MCI, InstOperands, IDLoc); - DEBUG(dbgs() << "Insn:"); - DEBUG(MCI.dump_pretty(dbgs())); - DEBUG(dbgs() << "\n\n"); + LLVM_DEBUG(dbgs() << "Insn:"); + LLVM_DEBUG(MCI.dump_pretty(dbgs())); + LLVM_DEBUG(dbgs() << "\n\n"); MCI.setLoc(IDLoc); } @@ -648,11 +654,6 @@ bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, /// ParseDirective parses the Hexagon specific directives bool HexagonAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); - if ((IDVal.lower() == ".word") || (IDVal.lower() == ".4byte")) - return ParseDirectiveValue(4, DirectiveID.getLoc()); - if (IDVal.lower() == ".short" || IDVal.lower() == ".hword" || - IDVal.lower() == ".half") - return ParseDirectiveValue(2, DirectiveID.getLoc()); if (IDVal.lower() == ".falign") return ParseDirectiveFalign(256, DirectiveID.getLoc()); if ((IDVal.lower() == ".lcomm") || (IDVal.lower() == ".lcommon")) @@ -720,39 +721,6 @@ bool HexagonAsmParser::ParseDirectiveFalign(unsigned Size, SMLoc L) { return false; } -/// ::= .word [ expression (, expression)* ] -bool HexagonAsmParser::ParseDirectiveValue(unsigned Size, SMLoc L) { - if (getLexer().isNot(AsmToken::EndOfStatement)) { - while (true) { - const MCExpr *Value; - SMLoc ExprLoc = L; - if (getParser().parseExpression(Value)) - return true; - - // Special case constant expressions to match code generator. - if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) { - assert(Size <= 8 && "Invalid size"); - uint64_t IntValue = MCE->getValue(); - if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue)) - return Error(ExprLoc, "literal value out of range for directive"); - getStreamer().EmitIntValue(IntValue, Size); - } else - getStreamer().EmitValue(Value, Size); - - if (getLexer().is(AsmToken::EndOfStatement)) - break; - - // FIXME: Improve diagnostic. - if (getLexer().isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); - } - } - - Lex(); - return false; -} - // This is largely a copy of AsmParser's ParseDirectiveComm extended to // accept a 3rd argument, AccessAlignment which indicates the smallest // memory access made to the symbol, expressed in bytes. If no @@ -1293,9 +1261,9 @@ unsigned HexagonAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, return Match_Success; } - DEBUG(dbgs() << "Unmatched Operand:"); - DEBUG(Op->dump()); - DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Unmatched Operand:"); + LLVM_DEBUG(Op->dump()); + LLVM_DEBUG(dbgs() << "\n"); return Match_InvalidOperand; } @@ -1333,6 +1301,17 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, } break; + case Hexagon::J2_trap1: + if (!getSTI().getFeatureBits()[Hexagon::ArchV65]) { + MCOperand &Rx = Inst.getOperand(0); + MCOperand &Ry = Inst.getOperand(1); + if (Rx.getReg() != Hexagon::R0 || Ry.getReg() != Hexagon::R0) { + Error(IDLoc, "trap1 can only have register r0 as operand"); + return Match_InvalidOperand; + } + } + break; + case Hexagon::A2_iconst: { Inst.setOpcode(Hexagon::A2_addi); MCOperand Reg = Inst.getOperand(0); diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp index 15d6a05a0078..69529b0d1162 100644 --- a/lib/Target/Hexagon/BitTracker.cpp +++ b/lib/Target/Hexagon/BitTracker.cpp @@ -779,15 +779,18 @@ bool BT::UseQueueType::Cmp::operator()(const MachineInstr *InstA, return BA->getNumber() > BB->getNumber(); } - MachineBasicBlock::const_iterator ItA = InstA->getIterator(); - MachineBasicBlock::const_iterator ItB = InstB->getIterator(); - MachineBasicBlock::const_iterator End = BA->end(); - while (ItA != End) { - if (ItA == ItB) - return false; // ItA was before ItB. - ++ItA; - } - return true; + auto getDist = [this] (const MachineInstr *MI) { + auto F = Dist.find(MI); + if (F != Dist.end()) + return F->second; + MachineBasicBlock::const_iterator I = MI->getParent()->begin(); + MachineBasicBlock::const_iterator E = MI->getIterator(); + unsigned D = std::distance(I, E); + Dist.insert(std::make_pair(MI, D)); + return D; + }; + + return getDist(InstA) > getDist(InstB); } // Main W-Z implementation. @@ -840,7 +843,7 @@ void BT::visitPHI(const MachineInstr &PI) { void BT::visitNonBranch(const MachineInstr &MI) { if (Trace) dbgs() << "Visit MI(" << printMBBReference(*MI.getParent()) << "): " << MI; - if (MI.isDebugValue()) + if (MI.isDebugInstr()) return; assert(!MI.isBranch() && "Unexpected branch instruction"); @@ -1138,6 +1141,7 @@ void BT::run() { runEdgeQueue(BlockScanned); runUseQueue(); } + UseQ.reset(); if (Trace) print_cells(dbgs() << "Cells after propagation:\n"); diff --git a/lib/Target/Hexagon/BitTracker.h b/lib/Target/Hexagon/BitTracker.h index 5df6b61710f6..058225c0d812 100644 --- a/lib/Target/Hexagon/BitTracker.h +++ b/lib/Target/Hexagon/BitTracker.h @@ -13,6 +13,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include <cassert> #include <cstdint> @@ -28,7 +29,6 @@ class ConstantInt; class MachineRegisterInfo; class MachineBasicBlock; class MachineFunction; -class MachineInstr; class raw_ostream; class TargetRegisterClass; class TargetRegisterInfo; @@ -73,6 +73,8 @@ private: // Priority queue of instructions using modified registers, ordered by // their relative position in a basic block. struct UseQueueType { + UseQueueType() : Uses(Dist) {} + unsigned size() const { return Uses.size(); } @@ -90,12 +92,18 @@ private: Set.erase(front()); Uses.pop(); } + void reset() { + Dist.clear(); + } private: struct Cmp { + Cmp(DenseMap<const MachineInstr*,unsigned> &Map) : Dist(Map) {} bool operator()(const MachineInstr *MI, const MachineInstr *MJ) const; + DenseMap<const MachineInstr*,unsigned> &Dist; }; std::priority_queue<MachineInstr*, std::vector<MachineInstr*>, Cmp> Uses; - DenseSet<MachineInstr*> Set; // Set to avoid adding duplicate entries. + DenseSet<const MachineInstr*> Set; // Set to avoid adding duplicate entries. + DenseMap<const MachineInstr*,unsigned> Dist; }; void reset(); diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt index 1c36093923ac..a9f606c54eb1 100644 --- a/lib/Target/Hexagon/CMakeLists.txt +++ b/lib/Target/Hexagon/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_TARGET_DEFINITIONS Hexagon.td) tablegen(LLVM HexagonGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM HexagonGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM HexagonGenCallingConv.inc -gen-callingconv) tablegen(LLVM HexagonGenDAGISel.inc -gen-dag-isel) tablegen(LLVM HexagonGenDFAPacketizer.inc -gen-dfa-packetizer) tablegen(LLVM HexagonGenDisassemblerTables.inc -gen-disassembler) @@ -9,6 +10,7 @@ tablegen(LLVM HexagonGenInstrInfo.inc -gen-instr-info) tablegen(LLVM HexagonGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM HexagonGenRegisterInfo.inc -gen-register-info) tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget) + add_public_tablegen_target(HexagonCommonTableGen) add_llvm_target(HexagonCodeGen @@ -59,6 +61,7 @@ add_llvm_target(HexagonCodeGen HexagonTargetTransformInfo.cpp HexagonVectorLoopCarriedReuse.cpp HexagonVectorPrint.cpp + HexagonVExtract.cpp HexagonVLIWPacketizer.cpp RDFCopy.cpp RDFDeadCode.cpp @@ -68,7 +71,7 @@ add_llvm_target(HexagonCodeGen ) add_subdirectory(AsmParser) -add_subdirectory(TargetInfo) -add_subdirectory(MCTargetDesc) add_subdirectory(Disassembler) +add_subdirectory(MCTargetDesc) +add_subdirectory(TargetInfo) diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 481b692ae8bf..1a619ebda84e 100644 --- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -40,7 +40,7 @@ using DecodeStatus = MCDisassembler::DecodeStatus; namespace { -/// \brief Hexagon disassembler for all Hexagon platforms. +/// Hexagon disassembler for all Hexagon platforms. class HexagonDisassembler : public MCDisassembler { public: std::unique_ptr<MCInstrInfo const> const MCII; @@ -127,12 +127,18 @@ static DecodeStatus DecodeHvxQRRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeGuestRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeGuestRegs64RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address, const void *Decoder); @@ -783,3 +789,55 @@ static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address, HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext()); return MCDisassembler::Success; } + +static DecodeStatus DecodeGuestRegsRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + using namespace Hexagon; + + static const MCPhysReg GuestRegDecoderTable[] = { + /* 0 */ GELR, GSR, GOSP, G3, + /* 4 */ G4, G5, G6, G7, + /* 8 */ G8, G9, G10, G11, + /* 12 */ G12, G13, G14, G15, + /* 16 */ GPMUCNT4, GPMUCNT5, GPMUCNT6, GPMUCNT7, + /* 20 */ G20, G21, G22, G23, + /* 24 */ GPCYCLELO, GPCYCLEHI, GPMUCNT0, GPMUCNT1, + /* 28 */ GPMUCNT2, GPMUCNT3, G30, G31 + }; + + if (RegNo >= array_lengthof(GuestRegDecoderTable)) + return MCDisassembler::Fail; + if (GuestRegDecoderTable[RegNo] == Hexagon::NoRegister) + return MCDisassembler::Fail; + + unsigned Register = GuestRegDecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeGuestRegs64RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t /*Address*/, + const void *Decoder) { + using namespace Hexagon; + + static const MCPhysReg GuestReg64DecoderTable[] = { + /* 0 */ G1_0, 0, G3_2, 0, + /* 4 */ G5_4, 0, G7_6, 0, + /* 8 */ G9_8, 0, G11_10, 0, + /* 12 */ G13_12, 0, G15_14, 0, + /* 16 */ G17_16, 0, G19_18, 0, + /* 20 */ G21_20, 0, G23_22, 0, + /* 24 */ G25_24, 0, G27_26, 0, + /* 28 */ G29_28, 0, G31_30, 0 + }; + + if (RegNo >= array_lengthof(GuestReg64DecoderTable)) + return MCDisassembler::Fail; + if (GuestReg64DecoderTable[RegNo] == Hexagon::NoRegister) + return MCDisassembler::Fail; + + unsigned Register = GuestReg64DecoderTable[RegNo]; + Inst.addOperand(MCOperand::createReg(Register)); + return MCDisassembler::Success; +} diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h index 66b387b62c6c..6ec52d18cdc4 100644 --- a/lib/Target/Hexagon/Hexagon.h +++ b/lib/Target/Hexagon/Hexagon.h @@ -49,7 +49,7 @@ namespace llvm { class HexagonTargetMachine; - /// \brief Creates a Hexagon-specific Target Transformation Info pass. + /// Creates a Hexagon-specific Target Transformation Info pass. ImmutablePass *createHexagonTargetTransformInfoPass(const HexagonTargetMachine *TM); } // end namespace llvm; diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td index 6292e2a7a4ea..69e263a425f8 100644 --- a/lib/Target/Hexagon/Hexagon.td +++ b/lib/Target/Hexagon/Hexagon.td @@ -36,32 +36,36 @@ def ExtensionHVXV62: SubtargetFeature<"hvxv62", "HexagonHVXVersion", def ExtensionHVXV65: SubtargetFeature<"hvxv65", "HexagonHVXVersion", "Hexagon::ArchEnum::V65", "Hexagon HVX instructions", [ExtensionHVX,ExtensionHVXV60, ExtensionHVXV62]>; -def ExtensionHVX64B - : SubtargetFeature<"hvx-length64b", "UseHVX64BOps", "true", - "Hexagon HVX 64B instructions", [ExtensionHVX]>; -def ExtensionHVX128B - : SubtargetFeature<"hvx-length128b", "UseHVX128BOps", "true", - "Hexagon HVX 128B instructions", [ExtensionHVX]>; - -// This is an alias to ExtensionHVX128B to accept the hvx-double as -// an acceptable subtarget feature. -def ExtensionHVXDbl - : SubtargetFeature<"hvx-double", "UseHVX128BOps", "true", - "Hexagon HVX 128B instructions", [ExtensionHVX128B]>; +def ExtensionHVX64B: SubtargetFeature<"hvx-length64b", "UseHVX64BOps", + "true", "Hexagon HVX 64B instructions", [ExtensionHVX]>; +def ExtensionHVX128B: SubtargetFeature<"hvx-length128b", "UseHVX128BOps", + "true", "Hexagon HVX 128B instructions", [ExtensionHVX]>; + +def FeaturePackets: SubtargetFeature<"packets", "UsePackets", "true", + "Support for instruction packets">; def FeatureLongCalls: SubtargetFeature<"long-calls", "UseLongCalls", "true", "Use constant-extended calls">; def FeatureMemNoShuf: SubtargetFeature<"mem_noshuf", "HasMemNoShuf", "false", "Supports mem_noshuf feature">; -def FeatureDuplex : SubtargetFeature<"duplex", "EnableDuplex", "true", +def FeatureMemops: SubtargetFeature<"memops", "UseMemops", "true", + "Use memop instructions">; +def FeatureNVJ: SubtargetFeature<"nvj", "UseNewValueJumps", "true", + "Support for new-value jumps", [FeaturePackets]>; +def FeatureNVS: SubtargetFeature<"nvs", "UseNewValueStores", "true", + "Support for new-value stores", [FeaturePackets]>; +def FeatureSmallData: SubtargetFeature<"small-data", "UseSmallData", "true", + "Allow GP-relative addressing of global variables">; +def FeatureDuplex: SubtargetFeature<"duplex", "EnableDuplex", "true", "Enable generation of duplex instruction">; +def FeatureReservedR19: SubtargetFeature<"reserved-r19", "ReservedR19", + "true", "Reserve register R19">; //===----------------------------------------------------------------------===// // Hexagon Instruction Predicate Definitions. //===----------------------------------------------------------------------===// -def UseMEMOP : Predicate<"HST->useMemOps()">; -def IEEERndNearV5T : Predicate<"HST->modeIEEERndNear()">; +def UseMEMOPS : Predicate<"HST->useMemops()">; def UseHVX64B : Predicate<"HST->useHVX64BOps()">, AssemblerPredicate<"ExtensionHVX64B">; def UseHVX128B : Predicate<"HST->useHVX128BOps()">, @@ -75,10 +79,8 @@ def UseHVXV62 : Predicate<"HST->useHVXOps()">, def UseHVXV65 : Predicate<"HST->useHVXOps()">, AssemblerPredicate<"ExtensionHVXV65">; -def Hvx64 : HwMode<"+hvx-length64b">; -def Hvx64old : HwMode<"-hvx-double">; -def Hvx128 : HwMode<"+hvx-length128b">; -def Hvx128old : HwMode<"+hvx-double">; +def Hvx64: HwMode<"+hvx-length64b">; +def Hvx128: HwMode<"+hvx-length128b">; //===----------------------------------------------------------------------===// // Classes used for relation maps. @@ -300,8 +302,10 @@ include "HexagonDepITypes.td" include "HexagonInstrFormats.td" include "HexagonDepInstrFormats.td" include "HexagonDepInstrInfo.td" +include "HexagonCallingConv.td" include "HexagonPseudo.td" include "HexagonPatterns.td" +include "HexagonPatternsHVX.td" include "HexagonPatternsV65.td" include "HexagonDepMappings.td" include "HexagonIntrinsics.td" @@ -318,19 +322,34 @@ class Proc<string Name, SchedMachineModel Model, list<SubtargetFeature> Features> : ProcessorModel<Name, Model, Features>; +def : Proc<"generic", HexagonModelV60, + [ArchV4, ArchV5, ArchV55, ArchV60, + FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS, + FeaturePackets, FeatureSmallData]>; def : Proc<"hexagonv4", HexagonModelV4, - [ArchV4, FeatureDuplex]>; + [ArchV4, + FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS, + FeaturePackets, FeatureSmallData]>; def : Proc<"hexagonv5", HexagonModelV4, - [ArchV4, ArchV5, FeatureDuplex]>; + [ArchV4, ArchV5, + FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS, + FeaturePackets, FeatureSmallData]>; def : Proc<"hexagonv55", HexagonModelV55, - [ArchV4, ArchV5, ArchV55, FeatureDuplex]>; + [ArchV4, ArchV5, ArchV55, + FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS, + FeaturePackets, FeatureSmallData]>; def : Proc<"hexagonv60", HexagonModelV60, - [ArchV4, ArchV5, ArchV55, ArchV60, FeatureDuplex]>; + [ArchV4, ArchV5, ArchV55, ArchV60, + FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS, + FeaturePackets, FeatureSmallData]>; def : Proc<"hexagonv62", HexagonModelV62, - [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62, FeatureDuplex]>; + [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62, + FeatureDuplex, FeatureMemops, FeatureNVJ, FeatureNVS, + FeaturePackets, FeatureSmallData]>; def : Proc<"hexagonv65", HexagonModelV65, [ArchV4, ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, - FeatureMemNoShuf, FeatureDuplex]>; + FeatureDuplex, FeatureMemNoShuf, FeatureMemops, FeatureNVJ, + FeatureNVS, FeaturePackets, FeatureSmallData]>; //===----------------------------------------------------------------------===// // Declare the target which we are implementing @@ -357,4 +376,5 @@ def Hexagon : Target { let AssemblyParsers = [HexagonAsmParser]; let AssemblyParserVariants = [HexagonAsmParserVariant]; let AssemblyWriters = [HexagonAsmWriter]; + let AllowRegisterRenaming = 1; } diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 68b1fe6bf4b1..0ac83ea7c5fc 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -62,10 +62,6 @@ void HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI, #define DEBUG_TYPE "asm-printer" -static cl::opt<bool> AlignCalls( - "hexagon-align-calls", cl::Hidden, cl::init(true), - cl::desc("Insert falign after call instruction for Hexagon target")); - // Given a scalar register return its pair. inline static unsigned getHexagonRegisterPair(unsigned Reg, const MCRegisterInfo *RI) { @@ -76,16 +72,13 @@ inline static unsigned getHexagonRegisterPair(unsigned Reg, return Pair; } -HexagonAsmPrinter::HexagonAsmPrinter(TargetMachine &TM, - std::unique_ptr<MCStreamer> Streamer) - : AsmPrinter(TM, std::move(Streamer)) {} - void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(OpNo); switch (MO.getType()) { - default: llvm_unreachable ("<unknown operand type>"); + default: + llvm_unreachable ("<unknown operand type>"); case MachineOperand::MO_Register: O << HexagonInstPrinter::getRegisterName(MO.getReg()); return; @@ -112,8 +105,8 @@ void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, // for the case in which the basic block is reachable by a fall through but // through an indirect from a jump table. In this case, the jump table // will contain a label not defined by AsmPrinter. -bool HexagonAsmPrinter:: -isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { +bool HexagonAsmPrinter::isBlockOnlyReachableByFallthrough( + const MachineBasicBlock *MBB) const { if (MBB->hasAddressTaken()) return false; return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB); @@ -167,7 +160,8 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, } bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNo, unsigned AsmVariant, + unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { if (ExtraCode && ExtraCode[0]) @@ -183,10 +177,10 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, if (Offset.isImm()) { if (Offset.getImm()) - O << " + #" << Offset.getImm(); - } - else + O << "+#" << Offset.getImm(); + } else { llvm_unreachable("Unimplemented"); + } return false; } @@ -285,7 +279,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, unsigned VectorSize = HRI.getRegSizeInBits(Hexagon::HvxVRRegClass) / 8; switch (Inst.getOpcode()) { - default: return; + default: + return; case Hexagon::A2_iconst: { Inst.setOpcode(Hexagon::A2_addi); @@ -300,30 +295,40 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, break; } - case Hexagon::A2_tfrf: + case Hexagon::A2_tfrf: { + const MCConstantExpr *Zero = MCConstantExpr::create(0, OutContext); Inst.setOpcode(Hexagon::A2_paddif); - Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext))); + Inst.addOperand(MCOperand::createExpr(Zero)); break; + } - case Hexagon::A2_tfrt: + case Hexagon::A2_tfrt: { + const MCConstantExpr *Zero = MCConstantExpr::create(0, OutContext); Inst.setOpcode(Hexagon::A2_paddit); - Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext))); + Inst.addOperand(MCOperand::createExpr(Zero)); break; + } - case Hexagon::A2_tfrfnew: + case Hexagon::A2_tfrfnew: { + const MCConstantExpr *Zero = MCConstantExpr::create(0, OutContext); Inst.setOpcode(Hexagon::A2_paddifnew); - Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext))); + Inst.addOperand(MCOperand::createExpr(Zero)); break; + } - case Hexagon::A2_tfrtnew: + case Hexagon::A2_tfrtnew: { + const MCConstantExpr *Zero = MCConstantExpr::create(0, OutContext); Inst.setOpcode(Hexagon::A2_padditnew); - Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(0, OutContext))); + Inst.addOperand(MCOperand::createExpr(Zero)); break; + } - case Hexagon::A2_zxtb: + case Hexagon::A2_zxtb: { + const MCConstantExpr *C255 = MCConstantExpr::create(255, OutContext); Inst.setOpcode(Hexagon::A2_andir); - Inst.addOperand(MCOperand::createExpr(MCConstantExpr::create(255, OutContext))); + Inst.addOperand(MCOperand::createExpr(C255)); break; + } // "$dst = CONST64(#$src1)", case Hexagon::CONST64: @@ -525,10 +530,12 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, bool Success = MO.getExpr()->evaluateAsAbsolute(Imm); if (Success && Imm < 0) { const MCExpr *MOne = MCConstantExpr::create(-1, OutContext); - TmpInst.addOperand(MCOperand::createExpr(HexagonMCExpr::create(MOne, OutContext))); + const HexagonMCExpr *E = HexagonMCExpr::create(MOne, OutContext); + TmpInst.addOperand(MCOperand::createExpr(E)); } else { const MCExpr *Zero = MCConstantExpr::create(0, OutContext); - TmpInst.addOperand(MCOperand::createExpr(HexagonMCExpr::create(Zero, OutContext))); + const HexagonMCExpr *E = HexagonMCExpr::create(Zero, OutContext); + TmpInst.addOperand(MCOperand::createExpr(E)); } TmpInst.addOperand(MO); MappedInst = TmpInst; @@ -569,9 +576,9 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, MO.setReg(High); // Add a new operand for the second register in the pair. MappedInst.addOperand(MCOperand::createReg(Low)); - MappedInst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew) - ? Hexagon::C2_ccombinewnewt - : Hexagon::C2_ccombinewnewf); + MappedInst.setOpcode(Inst.getOpcode() == Hexagon::A2_tfrptnew + ? Hexagon::C2_ccombinewnewt + : Hexagon::C2_ccombinewnewf); return; } @@ -615,6 +622,7 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, MappedInst = TmpInst; return; } + case Hexagon::V6_vdd0: { MCInst TmpInst; assert (Inst.getOperand(0).isReg() && @@ -627,6 +635,7 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, MappedInst = TmpInst; return; } + case Hexagon::V6_vL32Ub_pi: case Hexagon::V6_vL32b_cur_pi: case Hexagon::V6_vL32b_nt_cur_pi: @@ -735,12 +744,10 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst, case Hexagon::V6_vS32b_srls_pi: MappedInst = ScaleVectorOffset(Inst, 2, VectorSize, OutContext); return; - } } -/// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to -/// the current output stream. +/// Print out a single Hexagon MI to the current output stream. void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst MCB; MCB.setOpcode(Hexagon::BUNDLE); @@ -748,21 +755,27 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MCInstrInfo &MCII = *Subtarget->getInstrInfo(); if (MI->isBundle()) { + assert(Subtarget->usePackets() && "Support for packets is disabled"); const MachineBasicBlock* MBB = MI->getParent(); MachineBasicBlock::const_instr_iterator MII = MI->getIterator(); for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII) - if (!MII->isDebugValue() && !MII->isImplicitDef()) + if (!MII->isDebugInstr() && !MII->isImplicitDef()) HexagonLowerToMC(MCII, &*MII, MCB, *this); - } - else + } else { HexagonLowerToMC(MCII, MI, MCB, *this); + } + + const MachineFunction &MF = *MI->getParent()->getParent(); + const auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + if (MI->isBundle() && HII.getBundleNoShuf(*MI)) + HexagonMCInstrInfo::setMemReorderDisabled(MCB); - bool Ok = HexagonMCInstrInfo::canonicalizePacket( - MCII, *Subtarget, OutStreamer->getContext(), MCB, nullptr); - assert(Ok); - (void)Ok; - if(HexagonMCInstrInfo::bundleSize(MCB) == 0) + MCContext &Ctx = OutStreamer->getContext(); + bool Ok = HexagonMCInstrInfo::canonicalizePacket(MCII, *Subtarget, Ctx, + MCB, nullptr); + assert(Ok); (void)Ok; + if (HexagonMCInstrInfo::bundleSize(MCB) == 0) return; OutStreamer->EmitInstruction(MCB, getSubtargetInfo()); } diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h index 4b8865672cf4..d0629d173a65 100755 --- a/lib/Target/Hexagon/HexagonAsmPrinter.h +++ b/lib/Target/Hexagon/HexagonAsmPrinter.h @@ -18,7 +18,8 @@ #include "HexagonSubtarget.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunction.h" -#include <memory> +#include "llvm/MC/MCStreamer.h" +#include <utility> namespace llvm { @@ -32,7 +33,8 @@ class TargetMachine; public: explicit HexagonAsmPrinter(TargetMachine &TM, - std::unique_ptr<MCStreamer> Streamer); + std::unique_ptr<MCStreamer> Streamer) + : AsmPrinter(TM, std::move(Streamer)) {} bool runOnMachineFunction(MachineFunction &Fn) override { Subtarget = &Fn.getSubtarget<HexagonSubtarget>(); @@ -43,13 +45,11 @@ class TargetMachine; return "Hexagon Assembly Printer"; } - bool isBlockOnlyReachableByFallthrough( - const MachineBasicBlock *MBB) const override; + bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) + const override; void EmitInstruction(const MachineInstr *MI) override; - - void HexagonProcessInstruction(MCInst &Inst, - const MachineInstr &MBB); + void HexagonProcessInstruction(MCInst &Inst, const MachineInstr &MBB); void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -58,8 +58,6 @@ class TargetMachine; bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS) override; - - static const char *getRegisterName(unsigned RegNo); }; } // end namespace llvm diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp index 9e73766b6fdc..4791b067aa8d 100644 --- a/lib/Target/Hexagon/HexagonBitSimplify.cpp +++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp @@ -184,9 +184,7 @@ namespace { public: static char ID; - HexagonBitSimplify() : MachineFunctionPass(ID) { - initializeHexagonBitSimplifyPass(*PassRegistry::getPassRegistry()); - } + HexagonBitSimplify() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "Hexagon bit simplification"; @@ -257,10 +255,10 @@ namespace { char HexagonBitSimplify::ID = 0; -INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexbit", +INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexagon-bit-simplify", "Hexagon bit simplification", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit", +INITIALIZE_PASS_END(HexagonBitSimplify, "hexagon-bit-simplify", "Hexagon bit simplification", false, false) bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T, @@ -622,7 +620,7 @@ bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits, // operand may be a subregister of a larger register, while Bits would // correspond to the larger register in its entirety. Because of that, // the parameter Begin can be used to indicate which bit of Bits should be -// considered the LSB of of the operand. +// considered the LSB of the operand. bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) { using namespace Hexagon; @@ -2452,7 +2450,7 @@ bool BitSimplification::simplifyExtractLow(MachineInstr *MI, if (Len == RW) return false; - DEBUG({ + LLVM_DEBUG({ dbgs() << __func__ << " on reg: " << printReg(RD.Reg, &HRI, RD.Sub) << ", MI: " << *MI; dbgs() << "Cell: " << RC << '\n'; @@ -2646,7 +2644,7 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) { const HexagonEvaluator HE(HRI, MRI, HII, MF); BitTracker BT(HE, MF); - DEBUG(BT.trace(true)); + LLVM_DEBUG(BT.trace(true)); BT.run(); MachineBasicBlock &Entry = MF.front(); @@ -2977,7 +2975,8 @@ void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB, } bool HexagonLoopRescheduling::processLoop(LoopCand &C) { - DEBUG(dbgs() << "Processing loop in " << printMBBReference(*C.LB) << "\n"); + LLVM_DEBUG(dbgs() << "Processing loop in " << printMBBReference(*C.LB) + << "\n"); std::vector<PhiInfo> Phis; for (auto &I : *C.LB) { if (!I.isPHI()) @@ -3001,7 +3000,7 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) { Phis.push_back(PhiInfo(I, *C.LB)); } - DEBUG({ + LLVM_DEBUG({ dbgs() << "Phis: {"; for (auto &I : Phis) { dbgs() << ' ' << printReg(I.DefR, HRI) << "=phi(" @@ -3122,7 +3121,7 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) { Groups.push_back(G); } - DEBUG({ + LLVM_DEBUG({ for (unsigned i = 0, n = Groups.size(); i < n; ++i) { InstrGroup &G = Groups[i]; dbgs() << "Group[" << i << "] inp: " @@ -3190,7 +3189,7 @@ bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); const HexagonEvaluator HE(*HRI, *MRI, *HII, MF); BitTracker BT(HE, MF); - DEBUG(BT.trace(true)); + LLVM_DEBUG(BT.trace(true)); BT.run(); BTP = &BT; diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp index b6e220beb0c6..e13cfd3f655a 100644 --- a/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -325,7 +325,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, int FI = op(1).getIndex(); int Off = op(2).getImm(); unsigned A = MFI.getObjectAlignment(FI) + std::abs(Off); - unsigned L = Log2_32(A); + unsigned L = countTrailingZeros(A); RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0); RC.fill(0, L, BT::BitValue::Zero); return rr0(RC, Outputs); diff --git a/lib/Target/Hexagon/HexagonBlockRanges.cpp b/lib/Target/Hexagon/HexagonBlockRanges.cpp index ff915ca59dae..48a4505458ae 100644 --- a/lib/Target/Hexagon/HexagonBlockRanges.cpp +++ b/lib/Target/Hexagon/HexagonBlockRanges.cpp @@ -85,7 +85,7 @@ void HexagonBlockRanges::RangeList::unionize(bool MergeAdjacent) { if (empty()) return; - std::sort(begin(), end()); + llvm::sort(begin(), end()); iterator Iter = begin(); while (Iter != end()-1) { @@ -160,7 +160,7 @@ HexagonBlockRanges::InstrIndexMap::InstrIndexMap(MachineBasicBlock &B) IndexType Idx = IndexType::First; First = Idx; for (auto &In : B) { - if (In.isDebugValue()) + if (In.isDebugInstr()) continue; assert(getIndex(&In) == IndexType::None && "Instruction already in map"); Map.insert(std::make_pair(Idx, &In)); @@ -314,7 +314,7 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap, RegisterSet Defs, Clobbers; for (auto &In : B) { - if (In.isDebugValue()) + if (In.isDebugInstr()) continue; IndexType Index = IndexMap.getIndex(&In); // Process uses first. @@ -422,10 +422,10 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap, HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeLiveMap( InstrIndexMap &IndexMap) { RegToRangeMap LiveMap; - DEBUG(dbgs() << __func__ << ": index map\n" << IndexMap << '\n'); + LLVM_DEBUG(dbgs() << __func__ << ": index map\n" << IndexMap << '\n'); computeInitialLiveRanges(IndexMap, LiveMap); - DEBUG(dbgs() << __func__ << ": live map\n" - << PrintRangeMap(LiveMap, TRI) << '\n'); + LLVM_DEBUG(dbgs() << __func__ << ": live map\n" + << PrintRangeMap(LiveMap, TRI) << '\n'); return LiveMap; } @@ -486,8 +486,8 @@ HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeDeadMap( if (TargetRegisterInfo::isVirtualRegister(P.first.Reg)) addDeadRanges(P.first); - DEBUG(dbgs() << __func__ << ": dead map\n" - << PrintRangeMap(DeadMap, TRI) << '\n'); + LLVM_DEBUG(dbgs() << __func__ << ": dead map\n" + << PrintRangeMap(DeadMap, TRI) << '\n'); return DeadMap; } diff --git a/lib/Target/Hexagon/HexagonBranchRelaxation.cpp b/lib/Target/Hexagon/HexagonBranchRelaxation.cpp index 84af4b14b9f7..2fa7888dd02b 100644 --- a/lib/Target/Hexagon/HexagonBranchRelaxation.cpp +++ b/lib/Target/Hexagon/HexagonBranchRelaxation.cpp @@ -90,7 +90,7 @@ FunctionPass *llvm::createHexagonBranchRelaxation() { } bool HexagonBranchRelaxation::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "****** Hexagon Branch Relaxation ******\n"); + LLVM_DEBUG(dbgs() << "****** Hexagon Branch Relaxation ******\n"); auto &HST = MF.getSubtarget<HexagonSubtarget>(); HII = HST.getInstrInfo(); @@ -114,8 +114,12 @@ void HexagonBranchRelaxation::computeOffset(MachineFunction &MF, InstOffset = (InstOffset + ByteAlign) & ~(ByteAlign); } OffsetMap[&B] = InstOffset; - for (auto &MI : B.instrs()) + for (auto &MI : B.instrs()) { InstOffset += HII->getSize(MI); + // Assume that all extendable branches will be extended. + if (MI.isBranch() && HII->isExtendable(MI)) + InstOffset += HEXAGON_INSTR_SIZE; + } } } @@ -145,6 +149,9 @@ bool HexagonBranchRelaxation::isJumpOutOfRange(MachineInstr &MI, if (FirstTerm == B.instr_end()) return false; + if (HII->isExtended(MI)) + return false; + unsigned InstOffset = BlockToInstOffset[&B]; unsigned Distance = 0; @@ -193,14 +200,14 @@ bool HexagonBranchRelaxation::reGenerateBranch(MachineFunction &MF, for (auto &MI : B) { if (!MI.isBranch() || !isJumpOutOfRange(MI, BlockToInstOffset)) continue; - DEBUG(dbgs() << "Long distance jump. isExtendable(" - << HII->isExtendable(MI) << ") isConstExtended(" - << HII->isConstExtended(MI) << ") " << MI); + LLVM_DEBUG(dbgs() << "Long distance jump. isExtendable(" + << HII->isExtendable(MI) << ") isConstExtended(" + << HII->isConstExtended(MI) << ") " << MI); // Since we have not merged HW loops relaxation into // this code (yet), soften our approach for the moment. if (!HII->isExtendable(MI) && !HII->isExtended(MI)) { - DEBUG(dbgs() << "\tUnderimplemented relax branch instruction.\n"); + LLVM_DEBUG(dbgs() << "\tUnderimplemented relax branch instruction.\n"); } else { // Find which operand is expandable. int ExtOpNum = HII->getCExtOpNum(MI); diff --git a/lib/Target/Hexagon/HexagonCallingConv.td b/lib/Target/Hexagon/HexagonCallingConv.td new file mode 100644 index 000000000000..ed2f87570d6b --- /dev/null +++ b/lib/Target/Hexagon/HexagonCallingConv.td @@ -0,0 +1,134 @@ +//===- HexagonCallingConv.td ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +class CCIfArgIsVarArg<CCAction A> + : CCIf<"State.isVarArg() && " + "ValNo >= static_cast<HexagonCCState&>(State)" + ".getNumNamedVarArgParams()", A>; + +def CC_HexagonStack: CallingConv<[ + CCIfType<[i32,v2i16,v4i8], + CCAssignToStack<4,4>>, + CCIfType<[i64,v2i32,v4i16,v8i8], + CCAssignToStack<8,8>> +]>; + +def CC_Hexagon: CallingConv<[ + CCIfType<[i1,i8,i16], + CCPromoteToType<i32>>, + CCIfType<[f32], + CCBitConvertToType<i32>>, + CCIfType<[f64], + CCBitConvertToType<i64>>, + + CCIfByVal< + CCPassByVal<8,8>>, + CCIfArgIsVarArg< + CCDelegateTo<CC_HexagonStack>>, + + // Pass split values in pairs, allocate odd register if necessary. + CCIfType<[i32], + CCIfSplit< + CCCustom<"CC_SkipOdd">>>, + + CCIfType<[i32,v2i16,v4i8], + CCAssignToReg<[R0,R1,R2,R3,R4,R5]>>, + // Make sure to allocate any skipped 32-bit register, so it does not get + // allocated to a subsequent 32-bit value. + CCIfType<[i64,v2i32,v4i16,v8i8], + CCCustom<"CC_SkipOdd">>, + CCIfType<[i64,v2i32,v4i16,v8i8], + CCAssignToReg<[D0,D1,D2]>>, + + CCDelegateTo<CC_HexagonStack> +]>; + +def RetCC_Hexagon: CallingConv<[ + CCIfType<[i1,i8,i16], + CCPromoteToType<i32>>, + CCIfType<[f32], + CCBitConvertToType<i32>>, + CCIfType<[f64], + CCBitConvertToType<i64>>, + + // Small structures are returned in a pair of registers, (which is + // always r1:0). In such case, what is returned are two i32 values + // without any additional information (in ArgFlags) stating that + // they are parts of a structure. Because of that there is no way + // to differentiate that situation from an attempt to return two + // values, so always assign R0 and R1. + CCIfSplit< + CCAssignToReg<[R0,R1]>>, + CCIfType<[i32,v2i16,v4i8], + CCAssignToReg<[R0,R1]>>, + CCIfType<[i64,v2i32,v4i16,v8i8], + CCAssignToReg<[D0]>> +]>; + + +class CCIfHvx64<CCAction A> + : CCIf<"State.getMachineFunction().getSubtarget<HexagonSubtarget>()" + ".useHVX64BOps()", A>; + +class CCIfHvx128<CCAction A> + : CCIf<"State.getMachineFunction().getSubtarget<HexagonSubtarget>()" + ".useHVX128BOps()", A>; + +def CC_Hexagon_HVX: CallingConv<[ + // HVX 64-byte mode + CCIfHvx64< + CCIfType<[v16i32,v32i16,v64i8], + CCAssignToReg<[V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15]>>>, + CCIfHvx64< + CCIfType<[v32i32,v64i16,v128i8], + CCAssignToReg<[W0,W1,W2,W3,W4,W5,W6,W7]>>>, + CCIfHvx64< + CCIfType<[v16i32,v32i16,v64i8], + CCAssignToStack<64,64>>>, + CCIfHvx64< + CCIfType<[v32i32,v64i16,v128i8], + CCAssignToStack<128,64>>>, + + // HVX 128-byte mode + CCIfHvx128< + CCIfType<[v32i32,v64i16,v128i8], + CCAssignToReg<[V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15]>>>, + CCIfHvx128< + CCIfType<[v64i32,v128i16,v256i8], + CCAssignToReg<[W0,W1,W2,W3,W4,W5,W6,W7]>>>, + CCIfHvx128< + CCIfType<[v32i32,v64i16,v128i8], + CCAssignToStack<128,128>>>, + CCIfHvx128< + CCIfType<[v64i32,v128i16,v256i8], + CCAssignToStack<256,128>>>, + + CCDelegateTo<CC_Hexagon> +]>; + +def RetCC_Hexagon_HVX: CallingConv<[ + // HVX 64-byte mode + CCIfHvx64< + CCIfType<[v16i32,v32i16,v64i8], + CCAssignToReg<[V0]>>>, + CCIfHvx64< + CCIfType<[v32i32,v64i16,v128i8], + CCAssignToReg<[W0]>>>, + + // HVX 128-byte mode + CCIfHvx128< + CCIfType<[v32i32,v64i16,v128i8], + CCAssignToReg<[V0]>>>, + CCIfHvx128< + CCIfType<[v64i32,v128i16,v256i8], + CCAssignToReg<[W0]>>>, + + CCDelegateTo<RetCC_Hexagon> +]>; + diff --git a/lib/Target/Hexagon/HexagonCommonGEP.cpp b/lib/Target/Hexagon/HexagonCommonGEP.cpp index 7e3d049d337f..f315e24eba62 100644 --- a/lib/Target/Hexagon/HexagonCommonGEP.cpp +++ b/lib/Target/Hexagon/HexagonCommonGEP.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/PostDominators.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -36,7 +37,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <cassert> #include <cstddef> @@ -342,7 +342,7 @@ bool HexagonCommonGEP::isHandledGepForm(GetElementPtrInst *GepI) { void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI, ValueToNodeMap &NM) { - DEBUG(dbgs() << "Visiting GEP: " << *GepI << '\n'); + LLVM_DEBUG(dbgs() << "Visiting GEP: " << *GepI << '\n'); GepNode *N = new (*Mem) GepNode; Value *PtrOp = GepI->getPointerOperand(); uint32_t InBounds = GepI->isInBounds() ? GepNode::InBounds : 0; @@ -426,7 +426,7 @@ void HexagonCommonGEP::collect() { } } - DEBUG(dbgs() << "Gep nodes after initial collection:\n" << Nodes); + LLVM_DEBUG(dbgs() << "Gep nodes after initial collection:\n" << Nodes); } static void invert_find_roots(const NodeVect &Nodes, NodeChildrenMap &NCM, @@ -575,7 +575,7 @@ void HexagonCommonGEP::common() { } } - DEBUG({ + LLVM_DEBUG({ dbgs() << "Gep node equality:\n"; for (NodePairSet::iterator I = Eq.begin(), E = Eq.end(); I != E; ++I) dbgs() << "{ " << I->first << ", " << I->second << " }\n"; @@ -642,7 +642,7 @@ void HexagonCommonGEP::common() { N->Parent = Rep; } - DEBUG(dbgs() << "Gep nodes after commoning:\n" << Nodes); + LLVM_DEBUG(dbgs() << "Gep nodes after commoning:\n" << Nodes); // Finally, erase the nodes that are no longer used. NodeSet Erase; @@ -662,35 +662,35 @@ void HexagonCommonGEP::common() { NodeVect::iterator NewE = remove_if(Nodes, in_set(Erase)); Nodes.resize(std::distance(Nodes.begin(), NewE)); - DEBUG(dbgs() << "Gep nodes after post-commoning cleanup:\n" << Nodes); + LLVM_DEBUG(dbgs() << "Gep nodes after post-commoning cleanup:\n" << Nodes); } template <typename T> static BasicBlock *nearest_common_dominator(DominatorTree *DT, T &Blocks) { - DEBUG({ - dbgs() << "NCD of {"; - for (typename T::iterator I = Blocks.begin(), E = Blocks.end(); - I != E; ++I) { - if (!*I) - continue; - BasicBlock *B = cast<BasicBlock>(*I); - dbgs() << ' ' << B->getName(); - } - dbgs() << " }\n"; - }); + LLVM_DEBUG({ + dbgs() << "NCD of {"; + for (typename T::iterator I = Blocks.begin(), E = Blocks.end(); I != E; + ++I) { + if (!*I) + continue; + BasicBlock *B = cast<BasicBlock>(*I); + dbgs() << ' ' << B->getName(); + } + dbgs() << " }\n"; + }); - // Allow null basic blocks in Blocks. In such cases, return nullptr. - typename T::iterator I = Blocks.begin(), E = Blocks.end(); - if (I == E || !*I) + // Allow null basic blocks in Blocks. In such cases, return nullptr. + typename T::iterator I = Blocks.begin(), E = Blocks.end(); + if (I == E || !*I) + return nullptr; + BasicBlock *Dom = cast<BasicBlock>(*I); + while (++I != E) { + BasicBlock *B = cast_or_null<BasicBlock>(*I); + Dom = B ? DT->findNearestCommonDominator(Dom, B) : nullptr; + if (!Dom) return nullptr; - BasicBlock *Dom = cast<BasicBlock>(*I); - while (++I != E) { - BasicBlock *B = cast_or_null<BasicBlock>(*I); - Dom = B ? DT->findNearestCommonDominator(Dom, B) : nullptr; - if (!Dom) - return nullptr; } - DEBUG(dbgs() << "computed:" << Dom->getName() << '\n'); + LLVM_DEBUG(dbgs() << "computed:" << Dom->getName() << '\n'); return Dom; } @@ -753,7 +753,7 @@ static bool is_empty(const BasicBlock *B) { BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node, NodeChildrenMap &NCM, NodeToValueMap &Loc) { - DEBUG(dbgs() << "Loc for node:" << Node << '\n'); + LLVM_DEBUG(dbgs() << "Loc for node:" << Node << '\n'); // Recalculate the placement for Node, assuming that the locations of // its children in Loc are valid. // Return nullptr if there is no valid placement for Node (for example, it @@ -820,7 +820,7 @@ BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node, BasicBlock *HexagonCommonGEP::recalculatePlacementRec(GepNode *Node, NodeChildrenMap &NCM, NodeToValueMap &Loc) { - DEBUG(dbgs() << "LocRec begin for node:" << Node << '\n'); + LLVM_DEBUG(dbgs() << "LocRec begin for node:" << Node << '\n'); // Recalculate the placement of Node, after recursively recalculating the // placements of all its children. NodeChildrenMap::iterator CF = NCM.find(Node); @@ -830,7 +830,7 @@ BasicBlock *HexagonCommonGEP::recalculatePlacementRec(GepNode *Node, recalculatePlacementRec(*I, NCM, Loc); } BasicBlock *LB = recalculatePlacement(Node, NCM, Loc); - DEBUG(dbgs() << "LocRec end for node:" << Node << '\n'); + LLVM_DEBUG(dbgs() << "LocRec end for node:" << Node << '\n'); return LB; } @@ -952,8 +952,8 @@ namespace { void HexagonCommonGEP::separateChainForNode(GepNode *Node, Use *U, NodeToValueMap &Loc) { User *R = U->getUser(); - DEBUG(dbgs() << "Separating chain for node (" << Node << ") user: " - << *R << '\n'); + LLVM_DEBUG(dbgs() << "Separating chain for node (" << Node << ") user: " << *R + << '\n'); BasicBlock *PB = cast<Instruction>(R)->getParent(); GepNode *N = Node; @@ -996,7 +996,7 @@ void HexagonCommonGEP::separateChainForNode(GepNode *Node, Use *U, // Should at least have U in NewUs. NewNode->Flags |= GepNode::Used; - DEBUG(dbgs() << "new node: " << NewNode << " " << *NewNode << '\n'); + LLVM_DEBUG(dbgs() << "new node: " << NewNode << " " << *NewNode << '\n'); assert(!NewUs.empty()); Uses[NewNode] = NewUs; } @@ -1007,7 +1007,7 @@ void HexagonCommonGEP::separateConstantChains(GepNode *Node, NodeSet Ns; nodes_for_root(Node, NCM, Ns); - DEBUG(dbgs() << "Separating constant chains for node: " << Node << '\n'); + LLVM_DEBUG(dbgs() << "Separating constant chains for node: " << Node << '\n'); // Collect all used nodes together with the uses from loads and stores, // where the GEP node could be folded into the load/store instruction. NodeToUsesMap FNs; // Foldable nodes. @@ -1044,7 +1044,7 @@ void HexagonCommonGEP::separateConstantChains(GepNode *Node, FNs.insert(std::make_pair(N, LSs)); } - DEBUG(dbgs() << "Nodes with foldable users:\n" << FNs); + LLVM_DEBUG(dbgs() << "Nodes with foldable users:\n" << FNs); for (NodeToUsesMap::iterator I = FNs.begin(), E = FNs.end(); I != E; ++I) { GepNode *N = I->first; @@ -1066,32 +1066,33 @@ void HexagonCommonGEP::computeNodePlacement(NodeToValueMap &Loc) { for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I) recalculatePlacementRec(*I, NCM, Loc); - DEBUG(dbgs() << "Initial node placement:\n" << LocationAsBlock(Loc)); + LLVM_DEBUG(dbgs() << "Initial node placement:\n" << LocationAsBlock(Loc)); if (OptEnableInv) { for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I) adjustForInvariance(*I, NCM, Loc); - DEBUG(dbgs() << "Node placement after adjustment for invariance:\n" - << LocationAsBlock(Loc)); + LLVM_DEBUG(dbgs() << "Node placement after adjustment for invariance:\n" + << LocationAsBlock(Loc)); } if (OptEnableConst) { for (NodeVect::iterator I = Roots.begin(), E = Roots.end(); I != E; ++I) separateConstantChains(*I, NCM, Loc); } - DEBUG(dbgs() << "Node use information:\n" << Uses); + LLVM_DEBUG(dbgs() << "Node use information:\n" << Uses); // At the moment, there is no further refinement of the initial placement. // Such a refinement could include splitting the nodes if they are placed // too far from some of its users. - DEBUG(dbgs() << "Final node placement:\n" << LocationAsBlock(Loc)); + LLVM_DEBUG(dbgs() << "Final node placement:\n" << LocationAsBlock(Loc)); } Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At, BasicBlock *LocB) { - DEBUG(dbgs() << "Fabricating GEP in " << LocB->getName() - << " for nodes:\n" << NA); + LLVM_DEBUG(dbgs() << "Fabricating GEP in " << LocB->getName() + << " for nodes:\n" + << NA); unsigned Num = NA.size(); GepNode *RN = NA[0]; assert((RN->Flags & GepNode::Root) && "Creating GEP for non-root"); @@ -1128,7 +1129,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At, Type *ElTy = cast<PointerType>(InpTy->getScalarType())->getElementType(); NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", &*At); NewInst->setIsInBounds(RN->Flags & GepNode::InBounds); - DEBUG(dbgs() << "new GEP: " << *NewInst << '\n'); + LLVM_DEBUG(dbgs() << "new GEP: " << *NewInst << '\n'); Input = NewInst; } while (nax <= Num); @@ -1161,7 +1162,7 @@ void HexagonCommonGEP::getAllUsersForNode(GepNode *Node, ValueVect &Values, } void HexagonCommonGEP::materialize(NodeToValueMap &Loc) { - DEBUG(dbgs() << "Nodes before materialization:\n" << Nodes << '\n'); + LLVM_DEBUG(dbgs() << "Nodes before materialization:\n" << Nodes << '\n'); NodeChildrenMap NCM; NodeVect Roots; // Compute the inversion again, since computing placement could alter diff --git a/lib/Target/Hexagon/HexagonConstExtenders.cpp b/lib/Target/Hexagon/HexagonConstExtenders.cpp index 294a6da69f51..cbce61bc63c9 100644 --- a/lib/Target/Hexagon/HexagonConstExtenders.cpp +++ b/lib/Target/Hexagon/HexagonConstExtenders.cpp @@ -39,31 +39,57 @@ namespace llvm { FunctionPass *createHexagonConstExtenders(); } +static int32_t adjustUp(int32_t V, uint8_t A, uint8_t O) { + assert(isPowerOf2_32(A)); + int32_t U = (V & -A) + O; + return U >= V ? U : U+A; +} + +static int32_t adjustDown(int32_t V, uint8_t A, uint8_t O) { + assert(isPowerOf2_32(A)); + int32_t U = (V & -A) + O; + return U <= V ? U : U-A; +} + namespace { struct OffsetRange { + // The range of values between Min and Max that are of form Align*N+Offset, + // for some integer N. Min and Max are required to be of that form as well, + // except in the case of an empty range. int32_t Min = INT_MIN, Max = INT_MAX; uint8_t Align = 1; + uint8_t Offset = 0; OffsetRange() = default; - OffsetRange(int32_t L, int32_t H, uint8_t A) - : Min(L), Max(H), Align(A) {} + OffsetRange(int32_t L, int32_t H, uint8_t A, uint8_t O = 0) + : Min(L), Max(H), Align(A), Offset(O) {} OffsetRange &intersect(OffsetRange A) { - Align = std::max(Align, A.Align); - Min = std::max(Min, A.Min); - Max = std::min(Max, A.Max); + if (Align < A.Align) + std::swap(*this, A); + + // Align >= A.Align. + if (Offset >= A.Offset && (Offset - A.Offset) % A.Align == 0) { + Min = adjustUp(std::max(Min, A.Min), Align, Offset); + Max = adjustDown(std::min(Max, A.Max), Align, Offset); + } else { + // Make an empty range. + Min = 0; + Max = -1; + } // Canonicalize empty ranges. if (Min > Max) std::tie(Min, Max, Align) = std::make_tuple(0, -1, 1); return *this; } OffsetRange &shift(int32_t S) { - assert(alignTo(std::abs(S), Align) == uint64_t(std::abs(S))); Min += S; Max += S; + Offset = (Offset+S) % Align; return *this; } OffsetRange &extendBy(int32_t D) { // If D < 0, extend Min, otherwise extend Max. + assert(D % Align == 0); if (D < 0) Min = (INT_MIN-D < Min) ? Min+D : INT_MIN; else @@ -74,7 +100,7 @@ namespace { return Min > Max; } bool contains(int32_t V) const { - return Min <= V && V <= Max && (V % Align) == 0; + return Min <= V && V <= Max && (V-Offset) % Align == 0; } bool operator==(const OffsetRange &R) const { return Min == R.Min && Max == R.Max && Align == R.Align; @@ -408,7 +434,8 @@ namespace { raw_ostream &operator<< (raw_ostream &OS, const OffsetRange &OR) { if (OR.Min > OR.Max) OS << '!'; - OS << '[' << OR.Min << ',' << OR.Max << "]a" << unsigned(OR.Align); + OS << '[' << OR.Min << ',' << OR.Max << "]a" << unsigned(OR.Align) + << '+' << unsigned(OR.Offset); return OS; } @@ -703,9 +730,21 @@ bool HCE::ExtRoot::operator< (const HCE::ExtRoot &ER) const { } case MachineOperand::MO_ExternalSymbol: return StringRef(V.SymbolName) < StringRef(ER.V.SymbolName); - case MachineOperand::MO_GlobalAddress: - assert(V.GV->hasName() && ER.V.GV->hasName()); - return V.GV->getName() < ER.V.GV->getName(); + case MachineOperand::MO_GlobalAddress: { + // Global values may not have names, so compare their positions + // in the parent module. + const Module &M = *V.GV->getParent(); + auto FindPos = [&M] (const GlobalValue &V) { + unsigned P = 0; + for (const GlobalValue &T : M.global_values()) { + if (&T == &V) + return P; + P++; + } + llvm_unreachable("Global value not found in module"); + }; + return FindPos(*V.GV) < FindPos(*ER.V.GV); + } case MachineOperand::MO_BlockAddress: { const BasicBlock *ThisB = V.BA->getBasicBlock(); const BasicBlock *OtherB = ER.V.BA->getBasicBlock(); @@ -999,15 +1038,19 @@ unsigned HCE::getDirectRegReplacement(unsigned ExtOpc) const { return 0; } -// Return the allowable deviation from the current value of Rb which the +// Return the allowable deviation from the current value of Rb (i.e. the +// range of values that can be added to the current value) which the // instruction MI can accommodate. // The instruction MI is a user of register Rb, which is defined via an // extender. It may be possible for MI to be tweaked to work for a register // defined with a slightly different value. For example -// ... = L2_loadrub_io Rb, 0 +// ... = L2_loadrub_io Rb, 1 // can be modifed to be -// ... = L2_loadrub_io Rb', 1 -// if Rb' = Rb-1. +// ... = L2_loadrub_io Rb', 0 +// if Rb' = Rb+1. +// The range for Rb would be [Min+1, Max+1], where [Min, Max] is a range +// for L2_loadrub with offset 0. That means that Rb could be replaced with +// Rc, where Rc-Rb belongs to [Min+1, Max+1]. OffsetRange HCE::getOffsetRange(Register Rb, const MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); // Instructions that are constant-extended may be replaced with something @@ -1109,6 +1152,13 @@ void HCE::recordExtender(MachineInstr &MI, unsigned OpNum) { bool IsLoad = MI.mayLoad(); bool IsStore = MI.mayStore(); + // Fixed stack slots have negative indexes, and they cannot be used + // with TRI::stackSlot2Index and TRI::index2StackSlot. This is somewhat + // unfortunate, but should not be a frequent thing. + for (MachineOperand &Op : MI.operands()) + if (Op.isFI() && Op.getIndex() < 0) + return; + if (IsLoad || IsStore) { unsigned AM = HII->getAddrMode(MI); switch (AM) { @@ -1220,7 +1270,7 @@ void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End, if (!ED.IsDef) continue; ExtValue EV(ED); - DEBUG(dbgs() << " =" << I << ". " << EV << " " << ED << '\n'); + LLVM_DEBUG(dbgs() << " =" << I << ". " << EV << " " << ED << '\n'); assert(ED.Rd.Reg != 0); Ranges[I-Begin] = getOffsetRange(ED.Rd).shift(EV.Offset); // A2_tfrsi is a special case: it will be replaced with A2_addi, which @@ -1240,7 +1290,7 @@ void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End, if (ED.IsDef) continue; ExtValue EV(ED); - DEBUG(dbgs() << " " << I << ". " << EV << " " << ED << '\n'); + LLVM_DEBUG(dbgs() << " " << I << ". " << EV << " " << ED << '\n'); OffsetRange Dev = getOffsetRange(ED); Ranges[I-Begin].intersect(Dev.shift(EV.Offset)); } @@ -1252,7 +1302,7 @@ void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End, for (unsigned I = Begin; I != End; ++I) RangeMap[Ranges[I-Begin]].insert(I); - DEBUG({ + LLVM_DEBUG({ dbgs() << "Ranges\n"; for (unsigned I = Begin; I != End; ++I) dbgs() << " " << I << ". " << Ranges[I-Begin] << '\n'; @@ -1280,11 +1330,17 @@ void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End, SmallVector<RangeTree::Node*,8> Nodes; Tree.order(Nodes); - auto MaxAlign = [](const SmallVectorImpl<RangeTree::Node*> &Nodes) { - uint8_t Align = 1; - for (RangeTree::Node *N : Nodes) - Align = std::max(Align, N->Range.Align); - return Align; + auto MaxAlign = [](const SmallVectorImpl<RangeTree::Node*> &Nodes, + uint8_t Align, uint8_t Offset) { + for (RangeTree::Node *N : Nodes) { + if (N->Range.Align <= Align || N->Range.Offset < Offset) + continue; + if ((N->Range.Offset - Offset) % Align != 0) + continue; + Align = N->Range.Align; + Offset = N->Range.Offset; + } + return std::make_pair(Align, Offset); }; // Construct the set of all potential definition points from the endpoints @@ -1294,14 +1350,14 @@ void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End, std::set<int32_t> CandSet; for (RangeTree::Node *N : Nodes) { const OffsetRange &R = N->Range; - uint8_t A0 = MaxAlign(Tree.nodesWith(R.Min, false)); + auto P0 = MaxAlign(Tree.nodesWith(R.Min, false), R.Align, R.Offset); CandSet.insert(R.Min); - if (R.Align < A0) - CandSet.insert(R.Min < 0 ? -alignDown(-R.Min, A0) : alignTo(R.Min, A0)); - uint8_t A1 = MaxAlign(Tree.nodesWith(R.Max, false)); + if (R.Align < P0.first) + CandSet.insert(adjustUp(R.Min, P0.first, P0.second)); + auto P1 = MaxAlign(Tree.nodesWith(R.Max, false), R.Align, R.Offset); CandSet.insert(R.Max); - if (R.Align < A1) - CandSet.insert(R.Max < 0 ? -alignTo(-R.Max, A1) : alignDown(R.Max, A1)); + if (R.Align < P1.first) + CandSet.insert(adjustDown(R.Max, P1.first, P1.second)); } // Build the assignment map: candidate C -> { list of extender indexes }. @@ -1340,7 +1396,7 @@ void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End, } } - DEBUG(dbgs() << "IMap (before fixup) = " << PrintIMap(IMap, *HRI)); + LLVM_DEBUG(dbgs() << "IMap (before fixup) = " << PrintIMap(IMap, *HRI)); // There is some ambiguity in what initializer should be used, if the // descriptor's subexpression is non-trivial: it can be the entire @@ -1359,10 +1415,50 @@ void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End, AssignmentMap::iterator F = IMap.find({EV, ExtExpr()}); if (F == IMap.end()) continue; + // Finally, check if all extenders have the same value as the initializer. - auto SameValue = [&EV,this](unsigned I) { + // Make sure that extenders that are a part of a stack address are not + // merged with those that aren't. Stack addresses need an offset field + // (to be used by frame index elimination), while non-stack expressions + // can be replaced with forms (such as rr) that do not have such a field. + // Example: + // + // Collected 3 extenders + // =2. imm:0 off:32968 bb#2: %7 = ## + __ << 0, def + // 0. imm:0 off:267 bb#0: __ = ## + SS#1 << 0 + // 1. imm:0 off:267 bb#1: __ = ## + SS#1 << 0 + // Ranges + // 0. [-756,267]a1+0 + // 1. [-756,267]a1+0 + // 2. [201,65735]a1+0 + // RangeMap + // [-756,267]a1+0 -> 0 1 + // [201,65735]a1+0 -> 2 + // IMap (before fixup) = { + // [imm:0 off:267, ## + __ << 0] -> { 2 } + // [imm:0 off:267, ## + SS#1 << 0] -> { 0 1 } + // } + // IMap (after fixup) = { + // [imm:0 off:267, ## + __ << 0] -> { 2 0 1 } + // [imm:0 off:267, ## + SS#1 << 0] -> { } + // } + // Inserted def in bb#0 for initializer: [imm:0 off:267, ## + __ << 0] + // %12:intregs = A2_tfrsi 267 + // + // The result was + // %12:intregs = A2_tfrsi 267 + // S4_pstorerbt_rr %3, %12, %stack.1, 0, killed %4 + // Which became + // r0 = #267 + // if (p0.new) memb(r0+r29<<#4) = r2 + + bool IsStack = any_of(F->second, [this](unsigned I) { + return Extenders[I].Expr.Rs.isSlot(); + }); + auto SameValue = [&EV,this,IsStack](unsigned I) { const ExtDesc &ED = Extenders[I]; - return ExtValue(ED).Offset == EV.Offset; + return ED.Expr.Rs.isSlot() == IsStack && + ExtValue(ED).Offset == EV.Offset; }; if (all_of(P.second, SameValue)) { F->second.insert(P.second.begin(), P.second.end()); @@ -1370,7 +1466,7 @@ void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End, } } - DEBUG(dbgs() << "IMap (after fixup) = " << PrintIMap(IMap, *HRI)); + LLVM_DEBUG(dbgs() << "IMap (after fixup) = " << PrintIMap(IMap, *HRI)); } void HCE::calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs, @@ -1473,9 +1569,9 @@ HCE::Register HCE::insertInitializer(Loc DefL, const ExtenderInit &ExtI) { assert(InitI); (void)InitI; - DEBUG(dbgs() << "Inserted def in bb#" << MBB.getNumber() - << " for initializer: " << PrintInit(ExtI, *HRI) - << "\n " << *InitI); + LLVM_DEBUG(dbgs() << "Inserted def in bb#" << MBB.getNumber() + << " for initializer: " << PrintInit(ExtI, *HRI) << "\n " + << *InitI); return { DefR, 0 }; } @@ -1618,7 +1714,7 @@ bool HCE::replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI, assert(IdxOpc == Hexagon::A2_addi); // Clamp Diff to the 16 bit range. - int32_t D = isInt<16>(Diff) ? Diff : (Diff > 32767 ? 32767 : -32767); + int32_t D = isInt<16>(Diff) ? Diff : (Diff > 0 ? 32767 : -32768); BuildMI(MBB, At, dl, HII->get(IdxOpc)) .add(MI.getOperand(0)) .add(MachineOperand(ExtR)) @@ -1626,11 +1722,13 @@ bool HCE::replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI, Diff -= D; #ifndef NDEBUG // Make sure the output is within allowable range for uses. + // "Diff" is a difference in the "opposite direction", i.e. Ext - DefV, + // not DefV - Ext, as the getOffsetRange would calculate. OffsetRange Uses = getOffsetRange(MI.getOperand(0)); - if (!Uses.contains(Diff)) - dbgs() << "Diff: " << Diff << " out of range " << Uses + if (!Uses.contains(-Diff)) + dbgs() << "Diff: " << -Diff << " out of range " << Uses << " for " << MI; - assert(Uses.contains(Diff)); + assert(Uses.contains(-Diff)); #endif MBB.erase(MI); return true; @@ -1726,8 +1824,8 @@ bool HCE::replaceInstr(unsigned Idx, Register ExtR, const ExtenderInit &ExtI) { ExtValue EV(ED); int32_t Diff = EV.Offset - DefV.Offset; const MachineInstr &MI = *ED.UseMI; - DEBUG(dbgs() << __func__ << " Idx:" << Idx << " ExtR:" - << PrintRegister(ExtR, *HRI) << " Diff:" << Diff << '\n'); + LLVM_DEBUG(dbgs() << __func__ << " Idx:" << Idx << " ExtR:" + << PrintRegister(ExtR, *HRI) << " Diff:" << Diff << '\n'); // These two addressing modes must be converted into indexed forms // regardless of what the initializer looks like. @@ -1833,7 +1931,7 @@ const MachineOperand &HCE::getStoredValueOp(const MachineInstr &MI) const { bool HCE::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; - DEBUG(MF.print(dbgs() << "Before " << getPassName() << '\n', nullptr)); + LLVM_DEBUG(MF.print(dbgs() << "Before " << getPassName() << '\n', nullptr)); HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); @@ -1842,13 +1940,13 @@ bool HCE::runOnMachineFunction(MachineFunction &MF) { AssignmentMap IMap; collect(MF); - std::sort(Extenders.begin(), Extenders.end(), + llvm::sort(Extenders.begin(), Extenders.end(), [](const ExtDesc &A, const ExtDesc &B) { return ExtValue(A) < ExtValue(B); }); bool Changed = false; - DEBUG(dbgs() << "Collected " << Extenders.size() << " extenders\n"); + LLVM_DEBUG(dbgs() << "Collected " << Extenders.size() << " extenders\n"); for (unsigned I = 0, E = Extenders.size(); I != E; ) { unsigned B = I; const ExtRoot &T = Extenders[B].getOp(); @@ -1860,7 +1958,7 @@ bool HCE::runOnMachineFunction(MachineFunction &MF) { Changed |= replaceExtenders(IMap); } - DEBUG({ + LLVM_DEBUG({ if (Changed) MF.print(dbgs() << "After " << getPassName() << '\n', nullptr); else diff --git a/lib/Target/Hexagon/HexagonConstPropagation.cpp b/lib/Target/Hexagon/HexagonConstPropagation.cpp index 8ac96f3a4bfa..8f22a71dc1f3 100644 --- a/lib/Target/Hexagon/HexagonConstPropagation.cpp +++ b/lib/Target/Hexagon/HexagonConstPropagation.cpp @@ -617,7 +617,7 @@ void MachineConstPropagator::CellMap::print(raw_ostream &os, void MachineConstPropagator::visitPHI(const MachineInstr &PN) { const MachineBasicBlock *MB = PN.getParent(); unsigned MBN = MB->getNumber(); - DEBUG(dbgs() << "Visiting FI(" << printMBBReference(*MB) << "): " << PN); + LLVM_DEBUG(dbgs() << "Visiting FI(" << printMBBReference(*MB) << "): " << PN); const MachineOperand &MD = PN.getOperand(0); Register DefR(MD); @@ -642,8 +642,8 @@ Bottomize: const MachineBasicBlock *PB = PN.getOperand(i+1).getMBB(); unsigned PBN = PB->getNumber(); if (!EdgeExec.count(CFGEdge(PBN, MBN))) { - DEBUG(dbgs() << " edge " << printMBBReference(*PB) << "->" - << printMBBReference(*MB) << " not executable\n"); + LLVM_DEBUG(dbgs() << " edge " << printMBBReference(*PB) << "->" + << printMBBReference(*MB) << " not executable\n"); continue; } const MachineOperand &SO = PN.getOperand(i); @@ -658,8 +658,9 @@ Bottomize: LatticeCell SrcC; bool Eval = MCE.evaluate(UseR, Cells.get(UseR.Reg), SrcC); - DEBUG(dbgs() << " edge from " << printMBBReference(*PB) << ": " - << printReg(UseR.Reg, &MCE.TRI, UseR.SubReg) << SrcC << '\n'); + LLVM_DEBUG(dbgs() << " edge from " << printMBBReference(*PB) << ": " + << printReg(UseR.Reg, &MCE.TRI, UseR.SubReg) << SrcC + << '\n'); Changed |= Eval ? DefC.meet(SrcC) : DefC.setBottom(); Cells.update(DefR.Reg, DefC); @@ -671,11 +672,11 @@ Bottomize: } void MachineConstPropagator::visitNonBranch(const MachineInstr &MI) { - DEBUG(dbgs() << "Visiting MI(" << printMBBReference(*MI.getParent()) - << "): " << MI); + LLVM_DEBUG(dbgs() << "Visiting MI(" << printMBBReference(*MI.getParent()) + << "): " << MI); CellMap Outputs; bool Eval = MCE.evaluate(MI, Cells, Outputs); - DEBUG({ + LLVM_DEBUG({ if (Eval) { dbgs() << " outputs:"; for (auto &I : Outputs) @@ -713,7 +714,7 @@ void MachineConstPropagator::visitNonBranch(const MachineInstr &MI) { } } -// \brief Starting at a given branch, visit remaining branches in the block. +// Starting at a given branch, visit remaining branches in the block. // Traverse over the subsequent branches for as long as the preceding one // can fall through. Add all the possible targets to the flow work queue, // including the potential fall-through to the layout-successor block. @@ -728,8 +729,8 @@ void MachineConstPropagator::visitBranchesFrom(const MachineInstr &BrI) { while (It != End) { const MachineInstr &MI = *It; InstrExec.insert(&MI); - DEBUG(dbgs() << "Visiting " << (EvalOk ? "BR" : "br") << "(" - << printMBBReference(B) << "): " << MI); + LLVM_DEBUG(dbgs() << "Visiting " << (EvalOk ? "BR" : "br") << "(" + << printMBBReference(B) << "): " << MI); // Do not evaluate subsequent branches if the evaluation of any of the // previous branches failed. Keep iterating over the branches only // to mark them as executable. @@ -763,23 +764,23 @@ void MachineConstPropagator::visitBranchesFrom(const MachineInstr &BrI) { // last one set "FallsThru", then add an edge to the layout successor // to the targets. Targets.clear(); - DEBUG(dbgs() << " failed to evaluate a branch...adding all CFG " - "successors\n"); + LLVM_DEBUG(dbgs() << " failed to evaluate a branch...adding all CFG " + "successors\n"); for (const MachineBasicBlock *SB : B.successors()) Targets.insert(SB); } for (const MachineBasicBlock *TB : Targets) { unsigned TBN = TB->getNumber(); - DEBUG(dbgs() << " pushing edge " << printMBBReference(B) << " -> " - << printMBBReference(*TB) << "\n"); + LLVM_DEBUG(dbgs() << " pushing edge " << printMBBReference(B) << " -> " + << printMBBReference(*TB) << "\n"); FlowQ.push(CFGEdge(MBN, TBN)); } } void MachineConstPropagator::visitUsesOf(unsigned Reg) { - DEBUG(dbgs() << "Visiting uses of " << printReg(Reg, &MCE.TRI) - << Cells.get(Reg) << '\n'); + LLVM_DEBUG(dbgs() << "Visiting uses of " << printReg(Reg, &MCE.TRI) + << Cells.get(Reg) << '\n'); for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { // Do not process non-executable instructions. They can become exceutable // later (via a flow-edge in the work queue). In such case, the instruc- @@ -799,7 +800,7 @@ bool MachineConstPropagator::computeBlockSuccessors(const MachineBasicBlock *MB, SetVector<const MachineBasicBlock*> &Targets) { MachineBasicBlock::const_iterator FirstBr = MB->end(); for (const MachineInstr &MI : *MB) { - if (MI.isDebugValue()) + if (MI.isDebugInstr()) continue; if (MI.isBranch()) { FirstBr = MI.getIterator(); @@ -814,7 +815,7 @@ bool MachineConstPropagator::computeBlockSuccessors(const MachineBasicBlock *MB, for (MachineBasicBlock::const_iterator I = FirstBr; I != End; ++I) { const MachineInstr &MI = *I; // Can there be debug instructions between branches? - if (MI.isDebugValue()) + if (MI.isDebugInstr()) continue; if (!InstrExec.count(&MI)) continue; @@ -870,10 +871,10 @@ void MachineConstPropagator::propagate(MachineFunction &MF) { CFGEdge Edge = FlowQ.front(); FlowQ.pop(); - DEBUG(dbgs() << "Picked edge " - << printMBBReference(*MF.getBlockNumbered(Edge.first)) << "->" - << printMBBReference(*MF.getBlockNumbered(Edge.second)) - << '\n'); + LLVM_DEBUG( + dbgs() << "Picked edge " + << printMBBReference(*MF.getBlockNumbered(Edge.first)) << "->" + << printMBBReference(*MF.getBlockNumbered(Edge.second)) << '\n'); if (Edge.first != EntryNum) if (EdgeExec.count(Edge)) continue; @@ -896,7 +897,7 @@ void MachineConstPropagator::propagate(MachineFunction &MF) { // If the successor block just became executable, visit all instructions. // To see if this is the first time we're visiting it, check the first // non-debug instruction to see if it is executable. - while (It != End && It->isDebugValue()) + while (It != End && It->isDebugInstr()) ++It; assert(It == End || !It->isPHI()); // If this block has been visited, go on to the next one. @@ -905,7 +906,7 @@ void MachineConstPropagator::propagate(MachineFunction &MF) { // For now, scan all non-branch instructions. Branches require different // processing. while (It != End && !It->isBranch()) { - if (!It->isDebugValue()) { + if (!It->isDebugInstr()) { InstrExec.insert(&*It); visitNonBranch(*It); } @@ -927,7 +928,7 @@ void MachineConstPropagator::propagate(MachineFunction &MF) { } } // while (FlowQ) - DEBUG({ + LLVM_DEBUG({ dbgs() << "Cells after propagation:\n"; Cells.print(dbgs(), MCE.TRI); dbgs() << "Dead CFG edges:\n"; @@ -1042,7 +1043,7 @@ bool MachineConstPropagator::rewrite(MachineFunction &MF) { // This is the constant propagation algorithm as described by Wegman-Zadeck. // Most of the terminology comes from there. bool MachineConstPropagator::run(MachineFunction &MF) { - DEBUG(MF.print(dbgs() << "Starting MachineConstPropagator\n", nullptr)); + LLVM_DEBUG(MF.print(dbgs() << "Starting MachineConstPropagator\n", nullptr)); MRI = &MF.getRegInfo(); @@ -1054,7 +1055,7 @@ bool MachineConstPropagator::run(MachineFunction &MF) { propagate(MF); bool Changed = rewrite(MF); - DEBUG({ + LLVM_DEBUG({ dbgs() << "End of MachineConstPropagator (Changed=" << Changed << ")\n"; if (Changed) MF.print(dbgs(), nullptr); @@ -1880,10 +1881,7 @@ namespace { public: static char ID; - HexagonConstPropagation() : MachineFunctionPass(ID) { - PassRegistry &Registry = *PassRegistry::getPassRegistry(); - initializeHexagonConstPropagationPass(Registry); - } + HexagonConstPropagation() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "Hexagon Constant Propagation"; @@ -1903,8 +1901,8 @@ namespace { char HexagonConstPropagation::ID = 0; -INITIALIZE_PASS(HexagonConstPropagation, "hcp", "Hexagon Constant Propagation", - false, false) +INITIALIZE_PASS(HexagonConstPropagation, "hexagon-constp", + "Hexagon Constant Propagation", false, false) HexagonConstEvaluator::HexagonConstEvaluator(MachineFunction &Fn) : MachineConstEvaluator(Fn), @@ -2022,6 +2020,8 @@ bool HexagonConstEvaluator::evaluate(const MachineInstr &MI, case Hexagon::A2_combineii: // combine(#s8Ext, #s8) case Hexagon::A4_combineii: // combine(#s8, #u6Ext) { + if (!MI.getOperand(1).isImm() || !MI.getOperand(2).isImm()) + return false; uint64_t Hi = MI.getOperand(1).getImm(); uint64_t Lo = MI.getOperand(2).getImm(); uint64_t Res = (Hi << 32) | (Lo & 0xFFFFFFFF); @@ -2631,6 +2631,8 @@ bool HexagonConstEvaluator::evaluateHexLogical(const MachineInstr &MI, Eval = evaluateANDrr(R1, Register(Src2), Inputs, RC); break; case Hexagon::A2_andir: { + if (!Src2.isImm()) + return false; APInt A(32, Src2.getImm(), true); Eval = evaluateANDri(R1, A, Inputs, RC); break; @@ -2640,6 +2642,8 @@ bool HexagonConstEvaluator::evaluateHexLogical(const MachineInstr &MI, Eval = evaluateORrr(R1, Register(Src2), Inputs, RC); break; case Hexagon::A2_orir: { + if (!Src2.isImm()) + return false; APInt A(32, Src2.getImm(), true); Eval = evaluateORri(R1, A, Inputs, RC); break; @@ -2775,7 +2779,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI, AllDefs = false; // Some diagnostics. - // DEBUG({...}) gets confused with all this code as an argument. + // LLVM_DEBUG({...}) gets confused with all this code as an argument. #ifndef NDEBUG bool Debugging = DebugFlag && isCurrentDebugType(DEBUG_TYPE); if (Debugging) { @@ -2920,7 +2924,7 @@ bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI, ChangedNum++; } - DEBUG({ + LLVM_DEBUG({ if (!NewInstrs.empty()) { MachineFunction &MF = *MI.getParent()->getParent(); dbgs() << "In function: " << MF.getName() << "\n"; @@ -3087,7 +3091,7 @@ bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI, MO.setIsKill(false); } - DEBUG({ + LLVM_DEBUG({ if (NewMI) { dbgs() << "Rewrite: for " << MI; if (NewMI != &MI) @@ -3127,7 +3131,7 @@ bool HexagonConstEvaluator::rewriteHexBranch(MachineInstr &BrI, if (BrI.getOpcode() == Hexagon::J2_jump) return false; - DEBUG(dbgs() << "Rewrite(" << printMBBReference(B) << "):" << BrI); + LLVM_DEBUG(dbgs() << "Rewrite(" << printMBBReference(B) << "):" << BrI); bool Rewritten = false; if (NumTargets > 0) { assert(!FallsThru && "This should have been checked before"); diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp index 087a77203fcb..fccde96d8a32 100644 --- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -300,7 +300,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr &I1, // * reads I2's def reg // * or has unmodelled side effects // we can't move I2 across it. - if (I->isDebugValue()) + if (I->isDebugInstr()) continue; if (isUnsafeToMoveAcross(*I, I2UseReg, I2DestReg, TRI)) { @@ -358,7 +358,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr &I1, // to remove the implicit killed %d4 operand. For now, we are // conservative and disallow the move. // we can't move I1 across it. - if (MI.isDebugValue()) { + if (MI.isDebugInstr()) { if (MI.readsRegister(I1DestReg, TRI)) // Move this instruction after I2. DbgMItoMove.push_back(&MI); continue; @@ -396,7 +396,7 @@ void HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) { DenseMap<unsigned, MachineInstr *> LastDef; for (MachineInstr &MI : BB) { - if (MI.isDebugValue()) + if (MI.isDebugInstr()) continue; // Mark TFRs that feed a potential new value store as such. @@ -423,7 +423,7 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) { MachineBasicBlock::iterator It(DefInst); unsigned NumInstsToDef = 0; while (&*It != &MI) { - if (!It->isDebugValue()) + if (!It->isDebugInstr()) ++NumInstsToDef; ++It; } @@ -489,7 +489,7 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) { MI != End;) { MachineInstr &I1 = *MI++; - if (I1.isDebugValue()) + if (I1.isDebugInstr()) continue; // Don't combine a TFR whose user could be newified (instructions that @@ -526,7 +526,7 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr &I1, bool &DoInsertAtI1, bool AllowC64) { MachineBasicBlock::iterator I2 = std::next(MachineBasicBlock::iterator(I1)); - while (I2 != I1.getParent()->end() && I2->isDebugValue()) + while (I2 != I1.getParent()->end() && I2->isDebugInstr()) ++I2; unsigned I1DestReg = I1.getOperand(0).getReg(); @@ -649,7 +649,7 @@ void HexagonCopyToCombine::emitConst64(MachineBasicBlock::iterator &InsertPt, unsigned DoubleDestReg, MachineOperand &HiOperand, MachineOperand &LoOperand) { - DEBUG(dbgs() << "Found a CONST64\n"); + LLVM_DEBUG(dbgs() << "Found a CONST64\n"); DebugLoc DL = InsertPt->getDebugLoc(); MachineBasicBlock *BB = InsertPt->getParent(); diff --git a/lib/Target/Hexagon/HexagonDepArch.td b/lib/Target/Hexagon/HexagonDepArch.td index 87dcd966f2ed..3594379aa841 100644 --- a/lib/Target/Hexagon/HexagonDepArch.td +++ b/lib/Target/Hexagon/HexagonDepArch.td @@ -11,14 +11,14 @@ def ArchV65: SubtargetFeature<"v65", "HexagonArchVersion", "Hexagon::ArchEnum::V65", "Enable Hexagon V65 architecture">; -def HasV65T : Predicate<"HST->hasV65TOps()">, AssemblerPredicate<"ArchV65">; +def HasV65 : Predicate<"HST->hasV65Ops()">, AssemblerPredicate<"ArchV65">; def ArchV62: SubtargetFeature<"v62", "HexagonArchVersion", "Hexagon::ArchEnum::V62", "Enable Hexagon V62 architecture">; -def HasV62T : Predicate<"HST->hasV62TOps()">, AssemblerPredicate<"ArchV62">; +def HasV62 : Predicate<"HST->hasV62Ops()">, AssemblerPredicate<"ArchV62">; def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "Hexagon::ArchEnum::V60", "Enable Hexagon V60 architecture">; -def HasV60T : Predicate<"HST->hasV60TOps()">, AssemblerPredicate<"ArchV60">; +def HasV60 : Predicate<"HST->hasV60Ops()">, AssemblerPredicate<"ArchV60">; def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "Hexagon::ArchEnum::V55", "Enable Hexagon V55 architecture">; -def HasV55T : Predicate<"HST->hasV55TOps()">, AssemblerPredicate<"ArchV55">; +def HasV55 : Predicate<"HST->hasV55Ops()">, AssemblerPredicate<"ArchV55">; def ArchV4: SubtargetFeature<"v4", "HexagonArchVersion", "Hexagon::ArchEnum::V4", "Enable Hexagon V4 architecture">; -def HasV4T : Predicate<"HST->hasV4TOps()">, AssemblerPredicate<"ArchV4">; +def HasV4 : Predicate<"HST->hasV4Ops()">, AssemblerPredicate<"ArchV4">; def ArchV5: SubtargetFeature<"v5", "HexagonArchVersion", "Hexagon::ArchEnum::V5", "Enable Hexagon V5 architecture">; -def HasV5T : Predicate<"HST->hasV5TOps()">, AssemblerPredicate<"ArchV5">; +def HasV5 : Predicate<"HST->hasV5Ops()">, AssemblerPredicate<"ArchV5">; diff --git a/lib/Target/Hexagon/HexagonDepDecoders.h b/lib/Target/Hexagon/HexagonDepDecoders.h deleted file mode 100644 index 020362a95909..000000000000 --- a/lib/Target/Hexagon/HexagonDepDecoders.h +++ /dev/null @@ -1,13 +0,0 @@ -//===- HexagonDepDecoders.h -----------------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Automatically generated file, please consult code owner before editing. -//===----------------------------------------------------------------------===// - - - diff --git a/lib/Target/Hexagon/HexagonDepIICScalar.td b/lib/Target/Hexagon/HexagonDepIICScalar.td index 083ec7753e04..931504b56ccb 100644 --- a/lib/Target/Hexagon/HexagonDepIICScalar.td +++ b/lib/Target/Hexagon/HexagonDepIICScalar.td @@ -10,21 +10,17 @@ //===----------------------------------------------------------------------===// -def tc_0077f68c : InstrItinClass; def tc_00afc57e : InstrItinClass; def tc_00e7c26e : InstrItinClass; def tc_03220ffa : InstrItinClass; def tc_038a1342 : InstrItinClass; def tc_04c9decc : InstrItinClass; def tc_05b6c987 : InstrItinClass; -def tc_0a2b8c7c : InstrItinClass; def tc_0cd51c76 : InstrItinClass; def tc_0dc560de : InstrItinClass; def tc_0fc1ae07 : InstrItinClass; def tc_10b97e27 : InstrItinClass; -def tc_128f96e3 : InstrItinClass; def tc_1372bca1 : InstrItinClass; -def tc_1432937d : InstrItinClass; def tc_14cd4cfa : InstrItinClass; def tc_15411484 : InstrItinClass; def tc_16d0d8d5 : InstrItinClass; @@ -32,18 +28,14 @@ def tc_181af5d0 : InstrItinClass; def tc_1853ea6d : InstrItinClass; def tc_1b82a277 : InstrItinClass; def tc_1b9c9ee5 : InstrItinClass; -def tc_1c0005f9 : InstrItinClass; def tc_1d5a38a8 : InstrItinClass; def tc_1e856f58 : InstrItinClass; -def tc_20280784 : InstrItinClass; def tc_234a11a5 : InstrItinClass; def tc_238d91d2 : InstrItinClass; def tc_29175780 : InstrItinClass; -def tc_29641329 : InstrItinClass; def tc_2a160009 : InstrItinClass; def tc_2b2f4060 : InstrItinClass; def tc_2b6f77c6 : InstrItinClass; -def tc_2e00db30 : InstrItinClass; def tc_2f185f5c : InstrItinClass; def tc_2fc0c436 : InstrItinClass; def tc_351fed2d : InstrItinClass; @@ -71,22 +63,19 @@ def tc_51b866be : InstrItinClass; def tc_523fcf30 : InstrItinClass; def tc_5274e61a : InstrItinClass; def tc_52d7bbea : InstrItinClass; -def tc_53173427 : InstrItinClass; def tc_53bc8a6a : InstrItinClass; def tc_53bdb2f6 : InstrItinClass; def tc_540fdfbc : InstrItinClass; def tc_55050d58 : InstrItinClass; -def tc_56d25411 : InstrItinClass; def tc_57288781 : InstrItinClass; def tc_594ab548 : InstrItinClass; +def tc_59a01ead : InstrItinClass; def tc_5acef64a : InstrItinClass; def tc_5ba5997d : InstrItinClass; def tc_5eb851fc : InstrItinClass; def tc_5f6847a1 : InstrItinClass; def tc_60571023 : InstrItinClass; def tc_609d2efe : InstrItinClass; -def tc_60d76817 : InstrItinClass; -def tc_60f5738d : InstrItinClass; def tc_63fe3df7 : InstrItinClass; def tc_66888ded : InstrItinClass; def tc_6792d5ff : InstrItinClass; @@ -96,6 +85,7 @@ def tc_6aa5711a : InstrItinClass; def tc_6ac37025 : InstrItinClass; def tc_6ebb4a12 : InstrItinClass; def tc_6efc556e : InstrItinClass; +def tc_6fa4db47 : InstrItinClass; def tc_73043bf4 : InstrItinClass; def tc_746baa8e : InstrItinClass; def tc_74e47fd9 : InstrItinClass; @@ -103,18 +93,16 @@ def tc_7934b9df : InstrItinClass; def tc_7a830544 : InstrItinClass; def tc_7f881c76 : InstrItinClass; def tc_84df2cd3 : InstrItinClass; -def tc_85523bcb : InstrItinClass; def tc_855b0b61 : InstrItinClass; def tc_87735c3b : InstrItinClass; -def tc_88fa1a78 : InstrItinClass; def tc_897d1a9d : InstrItinClass; def tc_8b15472a : InstrItinClass; -def tc_8bb285ec : InstrItinClass; def tc_8fd5f294 : InstrItinClass; def tc_8fe6b782 : InstrItinClass; def tc_90f3e30c : InstrItinClass; def tc_976ddc4f : InstrItinClass; def tc_97743097 : InstrItinClass; +def tc_994333cd : InstrItinClass; def tc_999d32db : InstrItinClass; def tc_99be14ca : InstrItinClass; def tc_9c00ce8d : InstrItinClass; @@ -133,7 +121,6 @@ def tc_adb14c66 : InstrItinClass; def tc_b13761ae : InstrItinClass; def tc_b166348b : InstrItinClass; def tc_b44c6e2a : InstrItinClass; -def tc_b5a33b22 : InstrItinClass; def tc_b77c481f : InstrItinClass; def tc_b7dd427e : InstrItinClass; def tc_b9488031 : InstrItinClass; @@ -141,7 +128,6 @@ def tc_b9c0b731 : InstrItinClass; def tc_b9c4623f : InstrItinClass; def tc_bad2bcaf : InstrItinClass; def tc_bcc96cee : InstrItinClass; -def tc_bd90564c : InstrItinClass; def tc_bde7aaf4 : InstrItinClass; def tc_be706f30 : InstrItinClass; def tc_c2f7d806 : InstrItinClass; @@ -166,24 +152,20 @@ def tc_d9f95eef : InstrItinClass; def tc_daa058fa : InstrItinClass; def tc_dbdffe3d : InstrItinClass; def tc_e0739b8c : InstrItinClass; -def tc_e1e0a2dc : InstrItinClass; def tc_e1e99bfa : InstrItinClass; def tc_e216a5db : InstrItinClass; def tc_e421e012 : InstrItinClass; -def tc_e6b38e01 : InstrItinClass; def tc_e7624c08 : InstrItinClass; def tc_e7d02c66 : InstrItinClass; def tc_e913dc32 : InstrItinClass; def tc_e9c822f7 : InstrItinClass; def tc_e9fae2d6 : InstrItinClass; -def tc_ef20db1c : InstrItinClass; def tc_ef52ed71 : InstrItinClass; def tc_ef84f62f : InstrItinClass; def tc_f2704b9a : InstrItinClass; def tc_f3eaa14b : InstrItinClass; def tc_f47d212f : InstrItinClass; def tc_f49e76f4 : InstrItinClass; -def tc_f4f43fb5 : InstrItinClass; def tc_f7dd9c9f : InstrItinClass; def tc_f86c328a : InstrItinClass; def tc_f8eeed7a : InstrItinClass; @@ -192,21 +174,17 @@ def tc_ff9ee76e : InstrItinClass; class DepScalarItinV4 { list<InstrItinData> DepScalarItinV4_list = [ - InstrItinData <tc_0077f68c, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_00afc57e, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_00e7c26e, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_03220ffa, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_038a1342, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_04c9decc, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_05b6c987, [InstrStage<1, [SLOT0, SLOT1]>]>, - InstrItinData <tc_0a2b8c7c, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_0cd51c76, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_0dc560de, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_0fc1ae07, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_10b97e27, [InstrStage<1, [SLOT2]>]>, - InstrItinData <tc_128f96e3, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_1372bca1, [InstrStage<1, [SLOT0]>]>, - InstrItinData <tc_1432937d, [InstrStage<1, [SLOT2]>]>, InstrItinData <tc_14cd4cfa, [InstrStage<1, [SLOT2]>]>, InstrItinData <tc_15411484, [InstrStage<1, [SLOT2]>]>, InstrItinData <tc_16d0d8d5, [InstrStage<1, [SLOT2, SLOT3]>]>, @@ -214,18 +192,14 @@ class DepScalarItinV4 { InstrItinData <tc_1853ea6d, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_1b82a277, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_1b9c9ee5, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_1c0005f9, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_1d5a38a8, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_1e856f58, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_20280784, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_234a11a5, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_238d91d2, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_29175780, [InstrStage<1, [SLOT3]>]>, - InstrItinData <tc_29641329, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_2a160009, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_2b2f4060, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, InstrItinData <tc_2b6f77c6, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_2e00db30, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_2f185f5c, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_2fc0c436, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_351fed2d, [InstrStage<1, [SLOT2, SLOT3]>]>, @@ -253,22 +227,19 @@ class DepScalarItinV4 { InstrItinData <tc_523fcf30, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_5274e61a, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_52d7bbea, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, - InstrItinData <tc_53173427, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_53bc8a6a, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_53bdb2f6, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_540fdfbc, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_55050d58, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_56d25411, [InstrStage<1, [SLOT2]>]>, InstrItinData <tc_57288781, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_594ab548, [InstrStage<1, [SLOT0]>]>, + InstrItinData <tc_59a01ead, [InstrStage<1, [SLOT2]>]>, InstrItinData <tc_5acef64a, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_5ba5997d, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, InstrItinData <tc_5eb851fc, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_5f6847a1, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, InstrItinData <tc_60571023, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_609d2efe, [InstrStage<1, [SLOT0, SLOT1]>]>, - InstrItinData <tc_60d76817, [InstrStage<1, [SLOT3]>]>, - InstrItinData <tc_60f5738d, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_63fe3df7, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_66888ded, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_6792d5ff, [InstrStage<1, [SLOT2, SLOT3]>]>, @@ -278,6 +249,7 @@ class DepScalarItinV4 { InstrItinData <tc_6ac37025, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_6ebb4a12, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, InstrItinData <tc_6efc556e, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData <tc_6fa4db47, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_73043bf4, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_746baa8e, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_74e47fd9, [InstrStage<1, [SLOT0, SLOT1]>]>, @@ -285,18 +257,16 @@ class DepScalarItinV4 { InstrItinData <tc_7a830544, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_7f881c76, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_84df2cd3, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_85523bcb, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_855b0b61, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_87735c3b, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_88fa1a78, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_897d1a9d, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_8b15472a, [InstrStage<1, [SLOT0, SLOT1]>]>, - InstrItinData <tc_8bb285ec, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_8fd5f294, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_8fe6b782, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, InstrItinData <tc_90f3e30c, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_976ddc4f, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_97743097, [InstrStage<1, [SLOT2]>]>, + InstrItinData <tc_994333cd, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_999d32db, [InstrStage<1, [SLOT2]>]>, InstrItinData <tc_99be14ca, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_9c00ce8d, [InstrStage<1, [SLOT2, SLOT3]>]>, @@ -315,7 +285,6 @@ class DepScalarItinV4 { InstrItinData <tc_b13761ae, [InstrStage<1, [SLOT2]>]>, InstrItinData <tc_b166348b, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_b44c6e2a, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_b5a33b22, [InstrStage<1, [SLOT2]>]>, InstrItinData <tc_b77c481f, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_b7dd427e, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_b9488031, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, @@ -323,7 +292,6 @@ class DepScalarItinV4 { InstrItinData <tc_b9c4623f, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_bad2bcaf, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_bcc96cee, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_bd90564c, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_bde7aaf4, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_be706f30, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_c2f7d806, [InstrStage<1, [SLOT2, SLOT3]>]>, @@ -348,24 +316,20 @@ class DepScalarItinV4 { InstrItinData <tc_daa058fa, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_dbdffe3d, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_e0739b8c, [InstrStage<1, [SLOT2]>]>, - InstrItinData <tc_e1e0a2dc, [InstrStage<1, [SLOT2]>]>, InstrItinData <tc_e1e99bfa, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_e216a5db, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_e421e012, [InstrStage<1, [SLOT0]>]>, - InstrItinData <tc_e6b38e01, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_e7624c08, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_e7d02c66, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_e913dc32, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_e9c822f7, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_e9fae2d6, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_ef20db1c, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_ef52ed71, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_ef84f62f, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_f2704b9a, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_f3eaa14b, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_f47d212f, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_f49e76f4, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_f4f43fb5, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_f7dd9c9f, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_f86c328a, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_f8eeed7a, [InstrStage<1, [SLOT2, SLOT3]>]>, @@ -375,21 +339,17 @@ class DepScalarItinV4 { class DepScalarItinV5 { list<InstrItinData> DepScalarItinV5_list = [ - InstrItinData <tc_0077f68c, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_00afc57e, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_00e7c26e, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_03220ffa, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_038a1342, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_04c9decc, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_05b6c987, [InstrStage<1, [SLOT0, SLOT1]>]>, - InstrItinData <tc_0a2b8c7c, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_0cd51c76, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_0dc560de, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_0fc1ae07, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_10b97e27, [InstrStage<1, [SLOT2]>]>, - InstrItinData <tc_128f96e3, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_1372bca1, [InstrStage<1, [SLOT0]>]>, - InstrItinData <tc_1432937d, [InstrStage<1, [SLOT2]>]>, InstrItinData <tc_14cd4cfa, [InstrStage<1, [SLOT2]>]>, InstrItinData <tc_15411484, [InstrStage<1, [SLOT2]>]>, InstrItinData <tc_16d0d8d5, [InstrStage<1, [SLOT2, SLOT3]>]>, @@ -397,18 +357,14 @@ class DepScalarItinV5 { InstrItinData <tc_1853ea6d, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_1b82a277, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_1b9c9ee5, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_1c0005f9, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_1d5a38a8, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_1e856f58, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_20280784, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_234a11a5, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_238d91d2, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_29175780, [InstrStage<1, [SLOT3]>]>, - InstrItinData <tc_29641329, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_2a160009, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_2b2f4060, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, InstrItinData <tc_2b6f77c6, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_2e00db30, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_2f185f5c, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_2fc0c436, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_351fed2d, [InstrStage<1, [SLOT2, SLOT3]>]>, @@ -436,22 +392,19 @@ class DepScalarItinV5 { InstrItinData <tc_523fcf30, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_5274e61a, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_52d7bbea, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, - InstrItinData <tc_53173427, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_53bc8a6a, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_53bdb2f6, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_540fdfbc, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_55050d58, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_56d25411, [InstrStage<1, [SLOT2]>]>, InstrItinData <tc_57288781, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_594ab548, [InstrStage<1, [SLOT0]>]>, + InstrItinData <tc_59a01ead, [InstrStage<1, [SLOT2]>]>, InstrItinData <tc_5acef64a, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_5ba5997d, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, InstrItinData <tc_5eb851fc, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_5f6847a1, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, InstrItinData <tc_60571023, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_609d2efe, [InstrStage<1, [SLOT0, SLOT1]>]>, - InstrItinData <tc_60d76817, [InstrStage<1, [SLOT3]>]>, - InstrItinData <tc_60f5738d, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_63fe3df7, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_66888ded, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_6792d5ff, [InstrStage<1, [SLOT2, SLOT3]>]>, @@ -461,6 +414,7 @@ class DepScalarItinV5 { InstrItinData <tc_6ac37025, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_6ebb4a12, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, InstrItinData <tc_6efc556e, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, + InstrItinData <tc_6fa4db47, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_73043bf4, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_746baa8e, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_74e47fd9, [InstrStage<1, [SLOT0, SLOT1]>]>, @@ -468,18 +422,16 @@ class DepScalarItinV5 { InstrItinData <tc_7a830544, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_7f881c76, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_84df2cd3, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_85523bcb, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_855b0b61, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_87735c3b, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_88fa1a78, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_897d1a9d, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_8b15472a, [InstrStage<1, [SLOT0, SLOT1]>]>, - InstrItinData <tc_8bb285ec, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_8fd5f294, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_8fe6b782, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, InstrItinData <tc_90f3e30c, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_976ddc4f, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_97743097, [InstrStage<1, [SLOT2]>]>, + InstrItinData <tc_994333cd, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_999d32db, [InstrStage<1, [SLOT2]>]>, InstrItinData <tc_99be14ca, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_9c00ce8d, [InstrStage<1, [SLOT2, SLOT3]>]>, @@ -498,7 +450,6 @@ class DepScalarItinV5 { InstrItinData <tc_b13761ae, [InstrStage<1, [SLOT2]>]>, InstrItinData <tc_b166348b, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_b44c6e2a, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_b5a33b22, [InstrStage<1, [SLOT2]>]>, InstrItinData <tc_b77c481f, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_b7dd427e, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_b9488031, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>, @@ -506,7 +457,6 @@ class DepScalarItinV5 { InstrItinData <tc_b9c4623f, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_bad2bcaf, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_bcc96cee, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_bd90564c, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_bde7aaf4, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_be706f30, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_c2f7d806, [InstrStage<1, [SLOT2, SLOT3]>]>, @@ -531,24 +481,20 @@ class DepScalarItinV5 { InstrItinData <tc_daa058fa, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_dbdffe3d, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_e0739b8c, [InstrStage<1, [SLOT2]>]>, - InstrItinData <tc_e1e0a2dc, [InstrStage<1, [SLOT2]>]>, InstrItinData <tc_e1e99bfa, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_e216a5db, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_e421e012, [InstrStage<1, [SLOT0]>]>, - InstrItinData <tc_e6b38e01, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_e7624c08, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_e7d02c66, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_e913dc32, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_e9c822f7, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_e9fae2d6, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_ef20db1c, [InstrStage<1, [SLOT3]>]>, InstrItinData <tc_ef52ed71, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_ef84f62f, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_f2704b9a, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_f3eaa14b, [InstrStage<1, [SLOT2, SLOT3]>]>, InstrItinData <tc_f47d212f, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_f49e76f4, [InstrStage<1, [SLOT2, SLOT3]>]>, - InstrItinData <tc_f4f43fb5, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_f7dd9c9f, [InstrStage<1, [SLOT0]>]>, InstrItinData <tc_f86c328a, [InstrStage<1, [SLOT0, SLOT1]>]>, InstrItinData <tc_f8eeed7a, [InstrStage<1, [SLOT2, SLOT3]>]>, @@ -558,10 +504,6 @@ class DepScalarItinV5 { class DepScalarItinV55 { list<InstrItinData> DepScalarItinV55_list = [ - InstrItinData <tc_0077f68c, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [2], - [Hex_FWD]>, - InstrItinData <tc_00afc57e, /*tc_2*/ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, @@ -586,10 +528,6 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_0a2b8c7c, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_0cd51c76, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -606,18 +544,10 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT2]>], [2, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_128f96e3, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [1, 2], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_1372bca1, /*tc_3stall*/ [InstrStage<1, [SLOT0]>], [4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_1432937d, /*tc_2early*/ - [InstrStage<1, [SLOT2]>], [1, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_14cd4cfa, /*tc_2early*/ [InstrStage<1, [SLOT2]>], [2], [Hex_FWD]>, @@ -646,10 +576,6 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_1c0005f9, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_1d5a38a8, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -658,10 +584,6 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_20280784, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_234a11a5, /*tc_3x*/ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, @@ -674,10 +596,6 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_29641329, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_2a160009, /*tc_2early*/ [InstrStage<1, [SLOT0]>], [], []>, @@ -690,10 +608,6 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_2e00db30, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [], - []>, - InstrItinData <tc_2f185f5c, /*tc_2early*/ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2], [Hex_FWD, Hex_FWD]>, @@ -802,10 +716,6 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [], []>, - InstrItinData <tc_53173427, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [1, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_53bc8a6a, /*tc_2early*/ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -822,10 +732,6 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_56d25411, /*tc_3stall*/ - [InstrStage<1, [SLOT2]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_57288781, /*tc_st*/ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2], [Hex_FWD, Hex_FWD]>, @@ -834,6 +740,10 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT0]>], [2, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData <tc_59a01ead, /*tc_2early*/ + [InstrStage<1, [SLOT2]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData <tc_5acef64a, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -858,14 +768,6 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_60d76817, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [], - []>, - - InstrItinData <tc_60f5738d, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [1], - [Hex_FWD]>, - InstrItinData <tc_63fe3df7, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -902,6 +804,10 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [], []>, + InstrItinData <tc_6fa4db47, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + InstrItinData <tc_73043bf4, /*tc_2early*/ [InstrStage<1, [SLOT3]>], [1, 2], [Hex_FWD, Hex_FWD]>, @@ -930,10 +836,6 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_85523bcb, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_855b0b61, /*tc_2early*/ [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -942,10 +844,6 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_88fa1a78, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_897d1a9d, /*tc_1*/ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -954,10 +852,6 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_8bb285ec, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [1], - [Hex_FWD]>, - InstrItinData <tc_8fd5f294, /*tc_3x*/ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -978,6 +872,10 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT2]>], [2, 1], [Hex_FWD, Hex_FWD]>, + InstrItinData <tc_994333cd, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + InstrItinData <tc_999d32db, /*tc_2early*/ [InstrStage<1, [SLOT2]>], [1], [Hex_FWD]>, @@ -1050,10 +948,6 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_b5a33b22, /*tc_2early*/ - [InstrStage<1, [SLOT2]>], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_b77c481f, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -1082,10 +976,6 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_bd90564c, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [1, 2], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_bde7aaf4, /*tc_3stall*/ [InstrStage<1, [SLOT0]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -1182,10 +1072,6 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT2]>], [2, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_e1e0a2dc, /*tc_2early*/ - [InstrStage<1, [SLOT2]>], [], - []>, - InstrItinData <tc_e1e99bfa, /*tc_2early*/ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2], [Hex_FWD, Hex_FWD]>, @@ -1198,10 +1084,6 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_e6b38e01, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 2], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_e7624c08, /*tc_3stall*/ [InstrStage<1, [SLOT0]>], [3], [Hex_FWD]>, @@ -1222,10 +1104,6 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT2, SLOT3]>], [2, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_ef20db1c, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 2], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_ef52ed71, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -1250,10 +1128,6 @@ class DepScalarItinV55 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_f4f43fb5, /*tc_ld*/ - [InstrStage<1, [SLOT0]>], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_f7dd9c9f, /*tc_st*/ [InstrStage<1, [SLOT0]>], [1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -1278,10 +1152,6 @@ class DepScalarItinV55 { class DepScalarItinV60 { list<InstrItinData> DepScalarItinV60_list = [ - InstrItinData <tc_0077f68c, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [2], - [Hex_FWD]>, - InstrItinData <tc_00afc57e, /*tc_2*/ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, @@ -1306,10 +1176,6 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_0a2b8c7c, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_0cd51c76, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -1326,18 +1192,10 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT2]>], [2, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_128f96e3, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [1, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_1372bca1, /*tc_3stall*/ [InstrStage<1, [SLOT0]>], [4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_1432937d, /*tc_2early*/ - [InstrStage<1, [SLOT2]>], [1, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_14cd4cfa, /*tc_2early*/ [InstrStage<1, [SLOT2]>], [2], [Hex_FWD]>, @@ -1366,10 +1224,6 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_1c0005f9, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_1d5a38a8, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -1378,10 +1232,6 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_20280784, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_234a11a5, /*tc_2*/ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, @@ -1394,10 +1244,6 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_29641329, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_2a160009, /*tc_2early*/ [InstrStage<1, [SLOT0]>], [], []>, @@ -1410,10 +1256,6 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_2e00db30, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [], - []>, - InstrItinData <tc_2f185f5c, /*tc_2early*/ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2], [Hex_FWD, Hex_FWD]>, @@ -1522,10 +1364,6 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [], []>, - InstrItinData <tc_53173427, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [1, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_53bc8a6a, /*tc_2early*/ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -1542,10 +1380,6 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_56d25411, /*tc_3stall*/ - [InstrStage<1, [SLOT2]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_57288781, /*tc_st*/ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2], [Hex_FWD, Hex_FWD]>, @@ -1554,6 +1388,10 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT0]>], [2, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData <tc_59a01ead, /*tc_2early*/ + [InstrStage<1, [SLOT2]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData <tc_5acef64a, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -1578,14 +1416,6 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_60d76817, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [], - []>, - - InstrItinData <tc_60f5738d, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [1], - [Hex_FWD]>, - InstrItinData <tc_63fe3df7, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -1622,6 +1452,10 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [], []>, + InstrItinData <tc_6fa4db47, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + InstrItinData <tc_73043bf4, /*tc_2early*/ [InstrStage<1, [SLOT3]>], [1, 2], [Hex_FWD, Hex_FWD]>, @@ -1650,10 +1484,6 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_85523bcb, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_855b0b61, /*tc_2early*/ [InstrStage<1, [SLOT2, SLOT3]>], [1, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -1662,10 +1492,6 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_88fa1a78, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_897d1a9d, /*tc_1*/ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -1674,10 +1500,6 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_8bb285ec, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [1], - [Hex_FWD]>, - InstrItinData <tc_8fd5f294, /*tc_3x*/ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -1698,6 +1520,10 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT2]>], [2, 1], [Hex_FWD, Hex_FWD]>, + InstrItinData <tc_994333cd, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + InstrItinData <tc_999d32db, /*tc_2early*/ [InstrStage<1, [SLOT2]>], [1], [Hex_FWD]>, @@ -1770,10 +1596,6 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_b5a33b22, /*tc_2early*/ - [InstrStage<1, [SLOT2]>], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_b77c481f, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -1802,10 +1624,6 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_bd90564c, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [1, 2], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_bde7aaf4, /*tc_newvjump*/ [InstrStage<1, [SLOT0]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -1902,10 +1720,6 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT2]>], [2, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_e1e0a2dc, /*tc_3stall*/ - [InstrStage<1, [SLOT2]>], [], - []>, - InstrItinData <tc_e1e99bfa, /*tc_2early*/ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2], [Hex_FWD, Hex_FWD]>, @@ -1918,10 +1732,6 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_e6b38e01, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 2], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_e7624c08, /*tc_newvjump*/ [InstrStage<1, [SLOT0]>], [3], [Hex_FWD]>, @@ -1942,10 +1752,6 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT2, SLOT3]>], [2, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_ef20db1c, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_ef52ed71, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -1970,10 +1776,6 @@ class DepScalarItinV60 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_f4f43fb5, /*tc_ld*/ - [InstrStage<1, [SLOT0]>], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_f7dd9c9f, /*tc_st*/ [InstrStage<1, [SLOT0]>], [1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -1996,765 +1798,8 @@ class DepScalarItinV60 { ]; } -class DepScalarItinV60se { - list<InstrItinData> DepScalarItinV60se_list = [ - InstrItinData <tc_0077f68c, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [2], - [Hex_FWD]>, - - InstrItinData <tc_00afc57e, /*tc_2*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_00e7c26e, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [1], - [Hex_FWD]>, - - InstrItinData <tc_03220ffa, /*tc_ld*/ - [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_038a1342, /*tc_4x*/ - [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_04c9decc, /*tc_3stall*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_05b6c987, /*tc_st*/ - [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_0a2b8c7c, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_0cd51c76, /*tc_ld*/ - [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_0dc560de, /*tc_st*/ - [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_0fc1ae07, /*tc_ld*/ - [InstrStage<1, [SLOT0]>], [2], - [Hex_FWD]>, - - InstrItinData <tc_10b97e27, /*tc_2early*/ - [InstrStage<1, [SLOT2], 0>, - InstrStage<1, [CVI_ST]>], [2, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_128f96e3, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [1, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_1372bca1, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_1432937d, /*tc_2early*/ - [InstrStage<1, [SLOT2]>], [1, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_14cd4cfa, /*tc_2early*/ - [InstrStage<1, [SLOT2], 0>, - InstrStage<1, [CVI_ST]>], [2], - [Hex_FWD]>, - - InstrItinData <tc_15411484, /*tc_2early*/ - [InstrStage<1, [SLOT2], 0>, - InstrStage<1, [CVI_ST]>], [1], - [Hex_FWD]>, - - InstrItinData <tc_16d0d8d5, /*tc_3x*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_181af5d0, /*tc_2early*/ - [InstrStage<1, [SLOT2], 0>, - InstrStage<1, [CVI_ST]>], [3, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_1853ea6d, /*tc_3x*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_1b82a277, /*tc_1*/ - [InstrStage<1, [SLOT0, SLOT1]>], [3], - [Hex_FWD]>, - - InstrItinData <tc_1b9c9ee5, /*tc_2*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_1c0005f9, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_1d5a38a8, /*tc_ld*/ - [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_1e856f58, /*tc_2early*/ - [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_20280784, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_234a11a5, /*tc_2*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_238d91d2, /*tc_st*/ - [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_29175780, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_29641329, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_2a160009, /*tc_2early*/ - [InstrStage<1, [SLOT0], 0>, - InstrStage<1, [CVI_ST]>], [], - []>, - - InstrItinData <tc_2b2f4060, /*tc_1*/ - [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_2b6f77c6, /*tc_2*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_2e00db30, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [], - []>, - - InstrItinData <tc_2f185f5c, /*tc_2early*/ - [InstrStage<1, [SLOT2, SLOT3], 0>, - InstrStage<1, [CVI_ST]>], [2, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_2fc0c436, /*tc_ld*/ - [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_351fed2d, /*tc_2early*/ - [InstrStage<1, [SLOT2, SLOT3]>], [3, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_3669266a, /*tc_2early*/ - [InstrStage<1, [SLOT2, SLOT3], 0>, - InstrStage<1, [CVI_ST]>], [2], - [Hex_FWD]>, - - InstrItinData <tc_367f7f3d, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [], - []>, - - InstrItinData <tc_36c68ad1, /*tc_ld*/ - [InstrStage<1, [SLOT0, SLOT1]>], [], - []>, - - InstrItinData <tc_395dc00f, /*tc_newvjump*/ - [InstrStage<1, [SLOT0], 0>, - InstrStage<1, [CVI_ST]>], [3, 3, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_3bc2c5d3, /*tc_newvjump*/ - [InstrStage<1, [SLOT0], 0>, - InstrStage<1, [CVI_ST]>], [2], - [Hex_FWD]>, - - InstrItinData <tc_3cb8ea06, /*tc_2early*/ - [InstrStage<1, [SLOT2, SLOT3], 0>, - InstrStage<1, [CVI_ST]>], [1, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_3d04548d, /*tc_newvjump*/ - [InstrStage<1, [SLOT0], 0>, - InstrStage<1, [CVI_ST]>], [3, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_3da80ba5, /*tc_ld*/ - [InstrStage<1, [SLOT0]>], [1], - [Hex_FWD]>, - - InstrItinData <tc_3e07fb90, /*tc_st*/ - [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_41d5298e, /*tc_2*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_4403ca65, /*tc_ld*/ - [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_44126683, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_452f85af, /*tc_1*/ - [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2], - [Hex_FWD]>, - - InstrItinData <tc_481e5e5c, /*tc_2early*/ - [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_49eb22c8, /*tc_1*/ - [InstrStage<1, [SLOT2, SLOT3], 0>, - InstrStage<1, [CVI_ST]>], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_4ca572d4, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [], - []>, - - InstrItinData <tc_4d9914c9, /*tc_ld*/ - [InstrStage<1, [SLOT0]>], [1, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_4d99bca9, /*tc_4x*/ - [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_4f7cd700, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [2, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_513bef45, /*tc_newvjump*/ - [InstrStage<1, [SLOT0], 0>, - InstrStage<1, [CVI_ST]>], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_51b866be, /*tc_newvjump*/ - [InstrStage<1, [SLOT0], 0>, - InstrStage<1, [CVI_ST]>], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_523fcf30, /*tc_3stall*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_5274e61a, /*tc_st*/ - [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_52d7bbea, /*tc_2early*/ - [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>, - InstrStage<1, [CVI_ST]>], [], - []>, - - InstrItinData <tc_53173427, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [1, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_53bc8a6a, /*tc_2early*/ - [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_53bdb2f6, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [3, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_540fdfbc, /*tc_1*/ - [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_55050d58, /*tc_1*/ - [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_56d25411, /*tc_3stall*/ - [InstrStage<1, [SLOT2]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_57288781, /*tc_st*/ - [InstrStage<1, [SLOT0, SLOT1]>], [1, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_594ab548, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [2, 1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_5acef64a, /*tc_ld*/ - [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_5ba5997d, /*tc_2*/ - [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_5eb851fc, /*tc_newvjump*/ - [InstrStage<1, [SLOT0], 0>, - InstrStage<1, [CVI_ST]>], [2, 3, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_5f6847a1, /*tc_1*/ - [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 3, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_60571023, /*tc_3x*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_609d2efe, /*tc_1*/ - [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_60d76817, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [], - []>, - - InstrItinData <tc_60f5738d, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [1], - [Hex_FWD]>, - - InstrItinData <tc_63fe3df7, /*tc_ld*/ - [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_66888ded, /*tc_st*/ - [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_6792d5ff, /*tc_4x*/ - [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_681a2300, /*tc_3stall*/ - [InstrStage<1, [SLOT2], 0>, - InstrStage<1, [CVI_ST]>], [2], - [Hex_FWD]>, - - InstrItinData <tc_68cb12ce, /*tc_1*/ - [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_6aa5711a, /*tc_ld*/ - [InstrStage<1, [SLOT0]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_6ac37025, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [2, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_6ebb4a12, /*tc_2early*/ - [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_6efc556e, /*tc_1*/ - [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [], - []>, - - InstrItinData <tc_73043bf4, /*tc_2early*/ - [InstrStage<1, [SLOT3], 0>, - InstrStage<1, [CVI_ST]>], [1, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_746baa8e, /*tc_newvjump*/ - [InstrStage<1, [SLOT0], 0>, - InstrStage<1, [CVI_ST]>], [3, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_74e47fd9, /*tc_st*/ - [InstrStage<1, [SLOT0, SLOT1]>], [3, 3, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_7934b9df, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [2, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_7a830544, /*tc_2early*/ - [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_7f881c76, /*tc_ld*/ - [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_84df2cd3, /*tc_2*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_85523bcb, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_855b0b61, /*tc_2early*/ - [InstrStage<1, [SLOT2, SLOT3], 0>, - InstrStage<1, [CVI_ST]>], [1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_87735c3b, /*tc_2*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_88fa1a78, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_897d1a9d, /*tc_1*/ - [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_8b15472a, /*tc_st*/ - [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_8bb285ec, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [1], - [Hex_FWD]>, - - InstrItinData <tc_8fd5f294, /*tc_3x*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_8fe6b782, /*tc_1*/ - [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_90f3e30c, /*tc_2early*/ - [InstrStage<1, [SLOT0, SLOT1]>], [1, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_976ddc4f, /*tc_2*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_97743097, /*tc_2early*/ - [InstrStage<1, [SLOT2], 0>, - InstrStage<1, [CVI_ST]>], [2, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_999d32db, /*tc_2early*/ - [InstrStage<1, [SLOT2]>], [1], - [Hex_FWD]>, - - InstrItinData <tc_99be14ca, /*tc_2early*/ - [InstrStage<1, [SLOT2, SLOT3], 0>, - InstrStage<1, [CVI_ST]>], [1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_9c00ce8d, /*tc_4x*/ - [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_9c98e8af, /*tc_ld*/ - [InstrStage<1, [SLOT0, SLOT1]>], [4, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_9d5941c7, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_9ef61e5c, /*tc_ld*/ - [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_9faf76ae, /*tc_2early*/ - [InstrStage<1, [SLOT2], 0>, - InstrStage<1, [CVI_ST]>], [1], - [Hex_FWD]>, - - InstrItinData <tc_9fdb5406, /*tc_st*/ - [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_a21dc435, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_a27582fa, /*tc_2early*/ - [InstrStage<1, [SLOT2, SLOT3], 0>, - InstrStage<1, [CVI_ST]>], [2], - [Hex_FWD]>, - - InstrItinData <tc_a46f0df5, /*tc_2early*/ - [InstrStage<1, [SLOT2, SLOT3], 0>, - InstrStage<1, [CVI_ST]>], [3, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_a788683e, /*tc_st*/ - [InstrStage<1, [SLOT0, SLOT1]>], [2, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_a8acdac0, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [1, 2, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_a904d137, /*tc_1*/ - [InstrStage<1, [SLOT0, SLOT1]>], [3, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_adb14c66, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_b13761ae, /*tc_2early*/ - [InstrStage<1, [SLOT2]>], [], - []>, - - InstrItinData <tc_b166348b, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [1, 1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_b44c6e2a, /*tc_2*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_b5a33b22, /*tc_2early*/ - [InstrStage<1, [SLOT2], 0>, - InstrStage<1, [CVI_ST]>], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_b77c481f, /*tc_ld*/ - [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_b7dd427e, /*tc_ld*/ - [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_b9488031, /*tc_1*/ - [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_b9c0b731, /*tc_3x*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_b9c4623f, /*tc_2*/ - [InstrStage<1, [SLOT3]>], [4, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_bad2bcaf, /*tc_ld*/ - [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_bcc96cee, /*tc_3x*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_bd90564c, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [1, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_bde7aaf4, /*tc_newvjump*/ - [InstrStage<1, [SLOT0], 0>, - InstrStage<1, [CVI_ST]>], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_be706f30, /*tc_1*/ - [InstrStage<1, [SLOT2, SLOT3]>], [3, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_c2f7d806, /*tc_2*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_c5e2426d, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [2, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_c6aa82f7, /*tc_2early*/ - [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_c6ce9b3f, /*tc_3x*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_c6ebf8dd, /*tc_3stall*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_c74f796f, /*tc_2*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_c82dc1ff, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [1], - [Hex_FWD]>, - - InstrItinData <tc_caaebcba, /*tc_3stall*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_cd7374a0, /*tc_st*/ - [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_cde8b071, /*tc_1*/ - [InstrStage<1, [SLOT2, SLOT3]>], [3, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_cf47a43f, /*tc_ld*/ - [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_cf59f215, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [2, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_d088982c, /*tc_2*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_d1090e34, /*tc_ld*/ - [InstrStage<1, [SLOT0, SLOT1]>], [4, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_d24b2d85, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [3, 3, 1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_d580173f, /*tc_4x*/ - [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_d6bf0472, /*tc_1*/ - [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_d9709180, /*tc_st*/ - [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_d9f95eef, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_daa058fa, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [1, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_dbdffe3d, /*tc_1*/ - [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_e0739b8c, /*tc_2early*/ - [InstrStage<1, [SLOT2], 0>, - InstrStage<1, [CVI_ST]>], [2, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_e1e0a2dc, /*tc_3stall*/ - [InstrStage<1, [SLOT2], 0>, - InstrStage<1, [CVI_ST]>], [], - []>, - - InstrItinData <tc_e1e99bfa, /*tc_2early*/ - [InstrStage<1, [SLOT2, SLOT3], 0>, - InstrStage<1, [CVI_ST]>], [2, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_e216a5db, /*tc_ld*/ - [InstrStage<1, [SLOT0]>], [4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_e421e012, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_e6b38e01, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_e7624c08, /*tc_newvjump*/ - [InstrStage<1, [SLOT0], 0>, - InstrStage<1, [CVI_ST]>], [3], - [Hex_FWD]>, - - InstrItinData <tc_e7d02c66, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [3, 1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_e913dc32, /*tc_3x*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_e9c822f7, /*tc_1*/ - [InstrStage<1, [SLOT0, SLOT1]>], [3], - [Hex_FWD]>, - - InstrItinData <tc_e9fae2d6, /*tc_2early*/ - [InstrStage<1, [SLOT2, SLOT3], 0>, - InstrStage<1, [CVI_ST]>], [2, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_ef20db1c, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_ef52ed71, /*tc_ld*/ - [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_ef84f62f, /*tc_2*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_f2704b9a, /*tc_2early*/ - [InstrStage<1, [SLOT2, SLOT3]>], [3, 2], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_f3eaa14b, /*tc_4x*/ - [InstrStage<1, [SLOT2, SLOT3]>], [5, 1], - [Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_f47d212f, /*tc_ld*/ - [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_f49e76f4, /*tc_2*/ - [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_f4f43fb5, /*tc_ld*/ - [InstrStage<1, [SLOT0]>], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_f7dd9c9f, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [1, 2, 3], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_f86c328a, /*tc_st*/ - [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_f8eeed7a, /*tc_1*/ - [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - - InstrItinData <tc_fcab4871, /*tc_newvjump*/ - [InstrStage<1, [SLOT0], 0>, - InstrStage<1, [CVI_ST]>], [], - []>, - - InstrItinData <tc_ff9ee76e, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [2, 3], - [Hex_FWD, Hex_FWD]> - ]; -} - class DepScalarItinV62 { list<InstrItinData> DepScalarItinV62_list = [ - InstrItinData <tc_0077f68c, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [2], - [Hex_FWD]>, - InstrItinData <tc_00afc57e, /*tc_2*/ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, @@ -2779,10 +1824,6 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_0a2b8c7c, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_0cd51c76, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -2799,18 +1840,10 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT2]>], [2, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_128f96e3, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [1, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_1372bca1, /*tc_3stall*/ [InstrStage<1, [SLOT0]>], [4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_1432937d, /*tc_2early*/ - [InstrStage<1, [SLOT2]>], [1, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_14cd4cfa, /*tc_2early*/ [InstrStage<1, [SLOT2]>], [2], [Hex_FWD]>, @@ -2839,10 +1872,6 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_1c0005f9, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_1d5a38a8, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -2851,10 +1880,6 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT2, SLOT3]>], [3, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_20280784, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_234a11a5, /*tc_2*/ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, @@ -2867,10 +1892,6 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_29641329, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_2a160009, /*tc_2early*/ [InstrStage<1, [SLOT0]>], [], []>, @@ -2883,10 +1904,6 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_2e00db30, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [], - []>, - InstrItinData <tc_2f185f5c, /*tc_3*/ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2], [Hex_FWD, Hex_FWD]>, @@ -2995,10 +2012,6 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [], []>, - InstrItinData <tc_53173427, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [1, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_53bc8a6a, /*tc_2early*/ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3015,10 +2028,6 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_56d25411, /*tc_3stall*/ - [InstrStage<1, [SLOT2]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_57288781, /*tc_st*/ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2], [Hex_FWD, Hex_FWD]>, @@ -3027,6 +2036,10 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT0]>], [2, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData <tc_59a01ead, /*tc_2early*/ + [InstrStage<1, [SLOT2]>], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData <tc_5acef64a, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3051,14 +2064,6 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_60d76817, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [], - []>, - - InstrItinData <tc_60f5738d, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [1], - [Hex_FWD]>, - InstrItinData <tc_63fe3df7, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3095,6 +2100,10 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [], []>, + InstrItinData <tc_6fa4db47, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + InstrItinData <tc_73043bf4, /*tc_2early*/ [InstrStage<1, [SLOT3]>], [1, 2], [Hex_FWD, Hex_FWD]>, @@ -3123,10 +2132,6 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_85523bcb, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_855b0b61, /*tc_2early*/ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3135,10 +2140,6 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_88fa1a78, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_897d1a9d, /*tc_2*/ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3147,10 +2148,6 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_8bb285ec, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [1], - [Hex_FWD]>, - InstrItinData <tc_8fd5f294, /*tc_3x*/ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3167,6 +2164,10 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData <tc_994333cd, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + InstrItinData <tc_97743097, /*tc_2early*/ [InstrStage<1, [SLOT2]>], [2, 1], [Hex_FWD, Hex_FWD]>, @@ -3243,10 +2244,6 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_b5a33b22, /*tc_2early*/ - [InstrStage<1, [SLOT2]>], [3, 2, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_b77c481f, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3275,10 +2272,6 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_bd90564c, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [1, 2], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_bde7aaf4, /*tc_newvjump*/ [InstrStage<1, [SLOT0]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3375,10 +2368,6 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT2]>], [2, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_e1e0a2dc, /*tc_3stall*/ - [InstrStage<1, [SLOT2]>], [], - []>, - InstrItinData <tc_e1e99bfa, /*tc_2early*/ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2], [Hex_FWD, Hex_FWD]>, @@ -3391,10 +2380,6 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_e6b38e01, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 2], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_e7624c08, /*tc_newvjump*/ [InstrStage<1, [SLOT0]>], [3], [Hex_FWD]>, @@ -3415,10 +2400,6 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_ef20db1c, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_ef52ed71, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3443,10 +2424,6 @@ class DepScalarItinV62 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_f4f43fb5, /*tc_ld*/ - [InstrStage<1, [SLOT0]>], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_f7dd9c9f, /*tc_st*/ [InstrStage<1, [SLOT0]>], [1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3471,10 +2448,6 @@ class DepScalarItinV62 { class DepScalarItinV65 { list<InstrItinData> DepScalarItinV65_list = [ - InstrItinData <tc_0077f68c, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [2], - [Hex_FWD]>, - InstrItinData <tc_00afc57e, /*tc_2*/ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, @@ -3499,10 +2472,6 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_0a2b8c7c, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_0cd51c76, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3519,18 +2488,10 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT2]>], [2, 1], [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_128f96e3, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [1, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_1372bca1, /*tc_3stall*/ [InstrStage<1, [SLOT0]>], [4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_1432937d, /*tc_3stall*/ - [InstrStage<1, [SLOT2]>], [1, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_14cd4cfa, /*tc_2early*/ [InstrStage<1, [SLOT2]>], [2], [Hex_FWD]>, @@ -3559,10 +2520,6 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_1c0005f9, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_1d5a38a8, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3571,10 +2528,6 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_20280784, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_234a11a5, /*tc_2*/ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, @@ -3587,10 +2540,6 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_29641329, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_2a160009, /*tc_2early*/ [InstrStage<1, [SLOT0]>], [], []>, @@ -3603,10 +2552,6 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_2e00db30, /*tc_3stall*/ - [InstrStage<1, [SLOT0]>], [], - []>, - InstrItinData <tc_2f185f5c, /*tc_3*/ [InstrStage<1, [SLOT2, SLOT3]>], [2, 2], [Hex_FWD, Hex_FWD]>, @@ -3715,10 +2660,6 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [], []>, - InstrItinData <tc_53173427, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [1, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_53bc8a6a, /*tc_1*/ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3735,10 +2676,6 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_56d25411, /*tc_3stall*/ - [InstrStage<1, [SLOT2]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_57288781, /*tc_st*/ [InstrStage<1, [SLOT0, SLOT1]>], [1, 2], [Hex_FWD, Hex_FWD]>, @@ -3747,6 +2684,10 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT0]>], [2, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData <tc_59a01ead, /*tc_3stall*/ + [InstrStage<1, [SLOT2]>], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData <tc_5acef64a, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3771,14 +2712,6 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_60d76817, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [], - []>, - - InstrItinData <tc_60f5738d, /*tc_3stall*/ - [InstrStage<1, [SLOT3]>], [1], - [Hex_FWD]>, - InstrItinData <tc_63fe3df7, /*tc_latepredldaia*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3815,6 +2748,10 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [], []>, + InstrItinData <tc_6fa4db47, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 2], + [Hex_FWD, Hex_FWD]>, + InstrItinData <tc_73043bf4, /*tc_1*/ [InstrStage<1, [SLOT3]>], [2, 2], [Hex_FWD, Hex_FWD]>, @@ -3843,10 +2780,6 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_85523bcb, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_855b0b61, /*tc_1*/ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3855,10 +2788,6 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_88fa1a78, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_897d1a9d, /*tc_2*/ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3867,10 +2796,6 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_8bb285ec, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [1], - [Hex_FWD]>, - InstrItinData <tc_8fd5f294, /*tc_3x*/ [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3891,6 +2816,10 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT2]>], [2, 2], [Hex_FWD, Hex_FWD]>, + InstrItinData <tc_994333cd, /*tc_3x*/ + [InstrStage<1, [SLOT3]>], [4, 1], + [Hex_FWD, Hex_FWD]>, + InstrItinData <tc_999d32db, /*tc_3stall*/ [InstrStage<1, [SLOT2]>], [1], [Hex_FWD]>, @@ -3963,10 +2892,6 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_b5a33b22, /*tc_3stall*/ - [InstrStage<1, [SLOT2]>], [4, 1, 2], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_b77c481f, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3995,10 +2920,6 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_bd90564c, /*tc_st*/ - [InstrStage<1, [SLOT0]>], [1, 2], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_bde7aaf4, /*tc_newvjump*/ [InstrStage<1, [SLOT0]>], [3, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -4095,10 +3016,6 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT2]>], [2, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_e1e0a2dc, /*tc_3stall*/ - [InstrStage<1, [SLOT2]>], [], - []>, - InstrItinData <tc_e1e99bfa, /*tc_1*/ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2], [Hex_FWD, Hex_FWD]>, @@ -4111,10 +3028,6 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_e6b38e01, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 2], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_e7624c08, /*tc_newvjump*/ [InstrStage<1, [SLOT0]>], [3], [Hex_FWD]>, @@ -4135,10 +3048,6 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2], [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_ef20db1c, /*tc_3x*/ - [InstrStage<1, [SLOT3]>], [4, 1], - [Hex_FWD, Hex_FWD]>, - InstrItinData <tc_ef52ed71, /*tc_ld*/ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -4163,10 +3072,6 @@ class DepScalarItinV65 { [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_f4f43fb5, /*tc_ld*/ - [InstrStage<1, [SLOT0]>], [4, 1, 1], - [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData <tc_f7dd9c9f, /*tc_st*/ [InstrStage<1, [SLOT0]>], [1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD]>, diff --git a/lib/Target/Hexagon/HexagonDepInstrInfo.td b/lib/Target/Hexagon/HexagonDepInstrInfo.td index 6e16762ac0eb..b6824fa33106 100644 --- a/lib/Target/Hexagon/HexagonDepInstrInfo.td +++ b/lib/Target/Hexagon/HexagonDepInstrInfo.td @@ -991,7 +991,7 @@ def A2_roundsat : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = round($Rss32):sat", -tc_c2f7d806, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { +tc_c2f7d806, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000110; let hasNewValue = 1; @@ -3301,7 +3301,7 @@ def A5_ACS : HInst< (outs DoubleRegs:$Rxx32, PredRegs:$Pe4), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32,$Pe4 = vacsh($Rss32,$Rtt32)", -tc_caaebcba, TypeM>, Enc_831a7d, Requires<[HasV55T]> { +tc_caaebcba, TypeM>, Enc_831a7d, Requires<[HasV55]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010101; @@ -3314,7 +3314,7 @@ def A5_vaddhubs : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rd32 = vaddhub($Rss32,$Rtt32):sat", -tc_2b6f77c6, TypeS_3op>, Enc_d2216a, Requires<[HasV5T]> { +tc_2b6f77c6, TypeS_3op>, Enc_d2216a, Requires<[HasV5]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001010; @@ -3327,7 +3327,7 @@ def A6_vcmpbeq_notany : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = !any8(vcmpb.eq($Rss32,$Rtt32))", -tc_55050d58, TypeALU64>, Enc_fcf7a7, Requires<[HasV65T]> { +tc_55050d58, TypeALU64>, Enc_fcf7a7, Requires<[HasV65]> { let Inst{7-2} = 0b001000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b11010010000; @@ -3336,7 +3336,7 @@ def A6_vminub_RdP : HInst< (outs DoubleRegs:$Rdd32, PredRegs:$Pe4), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32,$Pe4 = vminub($Rtt32,$Rss32)", -tc_ef84f62f, TypeM>, Enc_d2c7f1, Requires<[HasV62T]> { +tc_ef84f62f, TypeM>, Enc_d2c7f1, Requires<[HasV62]> { let Inst{7-7} = 0b0; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010111; @@ -4059,7 +4059,7 @@ def F2_conv_d2df : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = convert_d2df($Rss32)", -tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> { let Inst{13-5} = 0b000000011; let Inst{31-21} = 0b10000000111; let isFP = 1; @@ -4069,7 +4069,7 @@ def F2_conv_d2sf : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_d2sf($Rss32)", -tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000010; let hasNewValue = 1; @@ -4081,7 +4081,7 @@ def F2_conv_df2d : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = convert_df2d($Rss32)", -tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10000000111; let isFP = 1; @@ -4091,7 +4091,7 @@ def F2_conv_df2d_chop : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = convert_df2d($Rss32):chop", -tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10000000111; let isFP = 1; @@ -4101,7 +4101,7 @@ def F2_conv_df2sf : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_df2sf($Rss32)", -tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000000; let hasNewValue = 1; @@ -4113,7 +4113,7 @@ def F2_conv_df2ud : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = convert_df2ud($Rss32)", -tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10000000111; let isFP = 1; @@ -4123,7 +4123,7 @@ def F2_conv_df2ud_chop : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = convert_df2ud($Rss32):chop", -tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> { let Inst{13-5} = 0b000000111; let Inst{31-21} = 0b10000000111; let isFP = 1; @@ -4133,7 +4133,7 @@ def F2_conv_df2uw : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_df2uw($Rss32)", -tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000011; let hasNewValue = 1; @@ -4145,7 +4145,7 @@ def F2_conv_df2uw_chop : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_df2uw($Rss32):chop", -tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000101; let hasNewValue = 1; @@ -4157,7 +4157,7 @@ def F2_conv_df2w : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_df2w($Rss32)", -tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000100; let hasNewValue = 1; @@ -4169,7 +4169,7 @@ def F2_conv_df2w_chop : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_df2w($Rss32):chop", -tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000111; let hasNewValue = 1; @@ -4181,7 +4181,7 @@ def F2_conv_sf2d : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_sf2d($Rs32)", -tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10000100100; let isFP = 1; @@ -4191,7 +4191,7 @@ def F2_conv_sf2d_chop : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_sf2d($Rs32):chop", -tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> { let Inst{13-5} = 0b000000110; let Inst{31-21} = 0b10000100100; let isFP = 1; @@ -4201,7 +4201,7 @@ def F2_conv_sf2df : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_sf2df($Rs32)", -tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10000100100; let isFP = 1; @@ -4211,7 +4211,7 @@ def F2_conv_sf2ud : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_sf2ud($Rs32)", -tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> { let Inst{13-5} = 0b000000011; let Inst{31-21} = 0b10000100100; let isFP = 1; @@ -4221,7 +4221,7 @@ def F2_conv_sf2ud_chop : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_sf2ud($Rs32):chop", -tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> { let Inst{13-5} = 0b000000101; let Inst{31-21} = 0b10000100100; let isFP = 1; @@ -4231,7 +4231,7 @@ def F2_conv_sf2uw : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = convert_sf2uw($Rs32)", -tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001011011; let hasNewValue = 1; @@ -4243,7 +4243,7 @@ def F2_conv_sf2uw_chop : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = convert_sf2uw($Rs32):chop", -tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001011011; let hasNewValue = 1; @@ -4255,7 +4255,7 @@ def F2_conv_sf2w : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = convert_sf2w($Rs32)", -tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001011100; let hasNewValue = 1; @@ -4267,7 +4267,7 @@ def F2_conv_sf2w_chop : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = convert_sf2w($Rs32):chop", -tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001011100; let hasNewValue = 1; @@ -4279,7 +4279,7 @@ def F2_conv_ud2df : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32), "$Rdd32 = convert_ud2df($Rss32)", -tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_b9c5fb, Requires<[HasV5]> { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10000000111; let isFP = 1; @@ -4289,7 +4289,7 @@ def F2_conv_ud2sf : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = convert_ud2sf($Rss32)", -tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10001000001; let hasNewValue = 1; @@ -4301,7 +4301,7 @@ def F2_conv_uw2df : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_uw2df($Rs32)", -tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> { let Inst{13-5} = 0b000000001; let Inst{31-21} = 0b10000100100; let isFP = 1; @@ -4311,7 +4311,7 @@ def F2_conv_uw2sf : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = convert_uw2sf($Rs32)", -tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001011001; let hasNewValue = 1; @@ -4323,7 +4323,7 @@ def F2_conv_w2df : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = convert_w2df($Rs32)", -tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_3a3d62, Requires<[HasV5]> { let Inst{13-5} = 0b000000010; let Inst{31-21} = 0b10000100100; let isFP = 1; @@ -4333,7 +4333,7 @@ def F2_conv_w2sf : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = convert_w2sf($Rs32)", -tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001011010; let hasNewValue = 1; @@ -4345,7 +4345,7 @@ def F2_dfclass : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, u5_0Imm:$Ii), "$Pd4 = dfclass($Rss32,#$Ii)", -tc_7a830544, TypeALU64>, Enc_1f19b5, Requires<[HasV5T]> { +tc_7a830544, TypeALU64>, Enc_1f19b5, Requires<[HasV5]> { let Inst{4-2} = 0b100; let Inst{13-10} = 0b0000; let Inst{31-21} = 0b11011100100; @@ -4356,7 +4356,7 @@ def F2_dfcmpeq : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = dfcmp.eq($Rss32,$Rtt32)", -tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> { +tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010111; @@ -4368,7 +4368,7 @@ def F2_dfcmpge : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = dfcmp.ge($Rss32,$Rtt32)", -tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> { +tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> { let Inst{7-2} = 0b010000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010111; @@ -4380,7 +4380,7 @@ def F2_dfcmpgt : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = dfcmp.gt($Rss32,$Rtt32)", -tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> { +tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> { let Inst{7-2} = 0b001000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010111; @@ -4392,7 +4392,7 @@ def F2_dfcmpuo : HInst< (outs PredRegs:$Pd4), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Pd4 = dfcmp.uo($Rss32,$Rtt32)", -tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5T]> { +tc_1e856f58, TypeALU64>, Enc_fcf7a7, Requires<[HasV5]> { let Inst{7-2} = 0b011000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11010010111; @@ -4404,7 +4404,7 @@ def F2_dfimm_n : HInst< (outs DoubleRegs:$Rdd32), (ins u10_0Imm:$Ii), "$Rdd32 = dfmake(#$Ii):neg", -tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5T]> { +tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5]> { let Inst{20-16} = 0b00000; let Inst{31-22} = 0b1101100101; let prefersSlot3 = 1; @@ -4413,7 +4413,7 @@ def F2_dfimm_p : HInst< (outs DoubleRegs:$Rdd32), (ins u10_0Imm:$Ii), "$Rdd32 = dfmake(#$Ii):pos", -tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5T]> { +tc_234a11a5, TypeALU64>, Enc_e6c957, Requires<[HasV5]> { let Inst{20-16} = 0b00000; let Inst{31-22} = 0b1101100100; let prefersSlot3 = 1; @@ -4422,7 +4422,7 @@ def F2_sfadd : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sfadd($Rs32,$Rt32)", -tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { +tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011000; @@ -4436,7 +4436,7 @@ def F2_sfclass : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Pd4 = sfclass($Rs32,#$Ii)", -tc_7a830544, TypeS_2op>, Enc_83ee64, Requires<[HasV5T]> { +tc_7a830544, TypeS_2op>, Enc_83ee64, Requires<[HasV5]> { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10000101111; @@ -4447,7 +4447,7 @@ def F2_sfcmpeq : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = sfcmp.eq($Rs32,$Rt32)", -tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> { +tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> { let Inst{7-2} = 0b011000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111111; @@ -4459,7 +4459,7 @@ def F2_sfcmpge : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = sfcmp.ge($Rs32,$Rt32)", -tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> { +tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> { let Inst{7-2} = 0b000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111111; @@ -4471,7 +4471,7 @@ def F2_sfcmpgt : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = sfcmp.gt($Rs32,$Rt32)", -tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> { +tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> { let Inst{7-2} = 0b100000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111111; @@ -4483,7 +4483,7 @@ def F2_sfcmpuo : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Pd4 = sfcmp.uo($Rs32,$Rt32)", -tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5T]> { +tc_1e856f58, TypeS_3op>, Enc_c2b48e, Requires<[HasV5]> { let Inst{7-2} = 0b001000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000111111; @@ -4495,7 +4495,7 @@ def F2_sffixupd : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sffixupd($Rs32,$Rt32)", -tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { +tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011110; @@ -4507,7 +4507,7 @@ def F2_sffixupn : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sffixupn($Rs32,$Rt32)", -tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { +tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011110; @@ -4519,7 +4519,7 @@ def F2_sffixupr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = sffixupr($Rs32)", -tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5T]> { +tc_f3eaa14b, TypeS_2op>, Enc_5e2823, Requires<[HasV5]> { let Inst{13-5} = 0b000000000; let Inst{31-21} = 0b10001011101; let hasNewValue = 1; @@ -4530,7 +4530,7 @@ def F2_sffma : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += sfmpy($Rs32,$Rt32)", -tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5T]> { +tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> { let Inst{7-5} = 0b100; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111000; @@ -4544,7 +4544,7 @@ def F2_sffma_lib : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 += sfmpy($Rs32,$Rt32):lib", -tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5T]> { +tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111000; @@ -4558,7 +4558,7 @@ def F2_sffma_sc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32, PredRegs:$Pu4), "$Rx32 += sfmpy($Rs32,$Rt32,$Pu4):scale", -tc_038a1342, TypeM>, Enc_437f33, Requires<[HasV5T]> { +tc_038a1342, TypeM>, Enc_437f33, Requires<[HasV5]> { let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111011; @@ -4572,7 +4572,7 @@ def F2_sffms : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= sfmpy($Rs32,$Rt32)", -tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5T]> { +tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111000; @@ -4586,7 +4586,7 @@ def F2_sffms_lib : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, IntRegs:$Rt32), "$Rx32 -= sfmpy($Rs32,$Rt32):lib", -tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5T]> { +tc_d580173f, TypeM>, Enc_2ae154, Requires<[HasV5]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101111000; @@ -4600,7 +4600,7 @@ def F2_sfimm_n : HInst< (outs IntRegs:$Rd32), (ins u10_0Imm:$Ii), "$Rd32 = sfmake(#$Ii):neg", -tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5T]> { +tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5]> { let Inst{20-16} = 0b00000; let Inst{31-22} = 0b1101011001; let hasNewValue = 1; @@ -4611,7 +4611,7 @@ def F2_sfimm_p : HInst< (outs IntRegs:$Rd32), (ins u10_0Imm:$Ii), "$Rd32 = sfmake(#$Ii):pos", -tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5T]> { +tc_234a11a5, TypeALU64>, Enc_6c9440, Requires<[HasV5]> { let Inst{20-16} = 0b00000; let Inst{31-22} = 0b1101011000; let hasNewValue = 1; @@ -4622,7 +4622,7 @@ def F2_sfinvsqrta : HInst< (outs IntRegs:$Rd32, PredRegs:$Pe4), (ins IntRegs:$Rs32), "$Rd32,$Pe4 = sfinvsqrta($Rs32)", -tc_4d99bca9, TypeS_2op>, Enc_890909, Requires<[HasV5T]> { +tc_4d99bca9, TypeS_2op>, Enc_890909, Requires<[HasV5]> { let Inst{13-7} = 0b0000000; let Inst{31-21} = 0b10001011111; let hasNewValue = 1; @@ -4634,7 +4634,7 @@ def F2_sfmax : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sfmax($Rs32,$Rt32)", -tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { +tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011100; @@ -4648,7 +4648,7 @@ def F2_sfmin : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sfmin($Rs32,$Rt32)", -tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { +tc_976ddc4f, TypeM>, Enc_5ab2be, Requires<[HasV5]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011100; @@ -4662,7 +4662,7 @@ def F2_sfmpy : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sfmpy($Rs32,$Rt32)", -tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { +tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011010; @@ -4676,7 +4676,7 @@ def F2_sfrecipa : HInst< (outs IntRegs:$Rd32, PredRegs:$Pe4), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32,$Pe4 = sfrecipa($Rs32,$Rt32)", -tc_9c00ce8d, TypeM>, Enc_a94f3b, Requires<[HasV5T]> { +tc_9c00ce8d, TypeM>, Enc_a94f3b, Requires<[HasV5]> { let Inst{7-7} = 0b1; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011111; @@ -4689,7 +4689,7 @@ def F2_sfsub : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = sfsub($Rs32,$Rt32)", -tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5T]> { +tc_6792d5ff, TypeM>, Enc_5ab2be, Requires<[HasV5]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101011000; @@ -4698,6 +4698,44 @@ let opNewValue = 0; let isFP = 1; let Uses = [USR]; } +def G4_tfrgcpp : HInst< +(outs DoubleRegs:$Rdd32), +(ins GuestRegs64:$Gss32), +"$Rdd32 = $Gss32", +tc_6fa4db47, TypeCR>, Enc_0aa344 { +let Inst{13-5} = 0b000000000; +let Inst{31-21} = 0b01101000001; +} +def G4_tfrgcrr : HInst< +(outs IntRegs:$Rd32), +(ins GuestRegs:$Gs32), +"$Rd32 = $Gs32", +tc_6fa4db47, TypeCR>, Enc_44271f { +let Inst{13-5} = 0b000000000; +let Inst{31-21} = 0b01101010001; +let hasNewValue = 1; +let opNewValue = 0; +} +def G4_tfrgpcp : HInst< +(outs GuestRegs64:$Gdd32), +(ins DoubleRegs:$Rss32), +"$Gdd32 = $Rss32", +tc_994333cd, TypeCR>, Enc_ed5027 { +let Inst{13-5} = 0b000000000; +let Inst{31-21} = 0b01100011000; +let hasNewValue = 1; +let opNewValue = 0; +} +def G4_tfrgrcr : HInst< +(outs GuestRegs:$Gd32), +(ins IntRegs:$Rs32), +"$Gd32 = $Rs32", +tc_994333cd, TypeCR>, Enc_621fba { +let Inst{13-5} = 0b000000000; +let Inst{31-21} = 0b01100010000; +let hasNewValue = 1; +let opNewValue = 0; +} def J2_call : HInst< (outs), (ins a30_2Imm:$Ii), @@ -4905,7 +4943,7 @@ def J2_jumpf_nopred_map : HInst< (outs), (ins PredRegs:$Pu4, b15_2Imm:$Ii), "if (!$Pu4) jump $Ii", -tc_e9fae2d6, TypeMAPPING>, Requires<[HasV60T]> { +tc_e9fae2d6, TypeMAPPING>, Requires<[HasV60]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -4967,7 +5005,7 @@ def J2_jumpfpt : HInst< (outs), (ins PredRegs:$Pu4, b30_2Imm:$Ii), "if (!$Pu4) jump:t $Ii", -tc_e1e99bfa, TypeJ>, Enc_daea09, Requires<[HasV60T]>, PredNewRel { +tc_e1e99bfa, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b100; let Inst{21-21} = 0b1; @@ -5029,7 +5067,7 @@ def J2_jumprf_nopred_map : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) jumpr $Rs32", -tc_e0739b8c, TypeMAPPING>, Requires<[HasV60T]> { +tc_e0739b8c, TypeMAPPING>, Requires<[HasV60]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -5077,7 +5115,7 @@ def J2_jumprfpt : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if (!$Pu4) jumpr:t $Rs32", -tc_97743097, TypeJ>, Enc_88d4d9, Requires<[HasV60T]>, PredNewRel { +tc_97743097, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0100; let Inst{31-21} = 0b01010011011; @@ -5222,7 +5260,7 @@ def J2_jumprt_nopred_map : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) jumpr $Rs32", -tc_e0739b8c, TypeMAPPING>, Requires<[HasV60T]> { +tc_e0739b8c, TypeMAPPING>, Requires<[HasV60]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -5268,7 +5306,7 @@ def J2_jumprtpt : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), "if ($Pu4) jumpr:t $Rs32", -tc_97743097, TypeJ>, Enc_88d4d9, Requires<[HasV60T]>, PredNewRel { +tc_97743097, TypeJ>, Enc_88d4d9, Requires<[HasV60]>, PredNewRel { let Inst{7-0} = 0b00000000; let Inst{13-10} = 0b0100; let Inst{31-21} = 0b01010011010; @@ -5347,7 +5385,7 @@ def J2_jumpt_nopred_map : HInst< (outs), (ins PredRegs:$Pu4, b15_2Imm:$Ii), "if ($Pu4) jump $Ii", -tc_e9fae2d6, TypeMAPPING>, Requires<[HasV60T]> { +tc_e9fae2d6, TypeMAPPING>, Requires<[HasV60]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -5407,7 +5445,7 @@ def J2_jumptpt : HInst< (outs), (ins PredRegs:$Pu4, b30_2Imm:$Ii), "if ($Pu4) jump:t $Ii", -tc_e1e99bfa, TypeJ>, Enc_daea09, Requires<[HasV60T]>, PredNewRel { +tc_e1e99bfa, TypeJ>, Enc_daea09, Requires<[HasV60]>, PredNewRel { let Inst{0-0} = 0b0; let Inst{12-10} = 0b100; let Inst{21-21} = 0b0; @@ -5631,6 +5669,30 @@ let Inst{13-13} = 0b0; let Inst{31-16} = 0b0101010000000000; let isSolo = 1; } +def J2_trap1 : HInst< +(outs IntRegs:$Rx32), +(ins IntRegs:$Rx32in, u8_0Imm:$Ii), +"trap1($Rx32,#$Ii)", +tc_59a01ead, TypeJ>, Enc_33f8ba { +let Inst{1-0} = 0b00; +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b01010100100; +let hasNewValue = 1; +let opNewValue = 0; +let isSolo = 1; +let Uses = [GOSP]; +let Defs = [GOSP, PC]; +let Constraints = "$Rx32 = $Rx32in"; +} +def J2_trap1_noregmap : HInst< +(outs), +(ins u8_0Imm:$Ii), +"trap1(#$Ii)", +tc_59a01ead, TypeMAPPING> { +let isPseudo = 1; +let isCodeGenOnly = 1; +} def J4_cmpeq_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), @@ -13334,7 +13396,7 @@ def L4_return_map_to_raw_f : HInst< (outs), (ins PredRegs:$Pv4), "if (!$Pv4) dealloc_return", -tc_513bef45, TypeMAPPING>, Requires<[HasV65T]> { +tc_513bef45, TypeMAPPING>, Requires<[HasV65]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -13342,7 +13404,7 @@ def L4_return_map_to_raw_fnew_pnt : HInst< (outs), (ins PredRegs:$Pv4), "if (!$Pv4.new) dealloc_return:nt", -tc_395dc00f, TypeMAPPING>, Requires<[HasV65T]> { +tc_395dc00f, TypeMAPPING>, Requires<[HasV65]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -13350,7 +13412,7 @@ def L4_return_map_to_raw_fnew_pt : HInst< (outs), (ins PredRegs:$Pv4), "if (!$Pv4.new) dealloc_return:t", -tc_395dc00f, TypeMAPPING>, Requires<[HasV65T]> { +tc_395dc00f, TypeMAPPING>, Requires<[HasV65]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -13358,7 +13420,7 @@ def L4_return_map_to_raw_t : HInst< (outs), (ins PredRegs:$Pv4), "if ($Pv4) dealloc_return", -tc_3bc2c5d3, TypeMAPPING>, Requires<[HasV65T]> { +tc_3bc2c5d3, TypeMAPPING>, Requires<[HasV65]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -13366,7 +13428,7 @@ def L4_return_map_to_raw_tnew_pnt : HInst< (outs), (ins PredRegs:$Pv4), "if ($Pv4.new) dealloc_return:nt", -tc_e7624c08, TypeMAPPING>, Requires<[HasV65T]> { +tc_e7624c08, TypeMAPPING>, Requires<[HasV65]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -13374,7 +13436,7 @@ def L4_return_map_to_raw_tnew_pt : HInst< (outs), (ins PredRegs:$Pv4), "if ($Pv4.new) dealloc_return:t", -tc_e7624c08, TypeMAPPING>, Requires<[HasV65T]> { +tc_e7624c08, TypeMAPPING>, Requires<[HasV65]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -13528,7 +13590,7 @@ def L6_deallocframe_map_to_raw : HInst< (outs), (ins), "deallocframe", -tc_d1090e34, TypeMAPPING>, Requires<[HasV65T]> { +tc_d1090e34, TypeMAPPING>, Requires<[HasV65]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -13536,7 +13598,7 @@ def L6_return_map_to_raw : HInst< (outs), (ins), "dealloc_return", -tc_3d04548d, TypeMAPPING>, Requires<[HasV65T]> { +tc_3d04548d, TypeMAPPING>, Requires<[HasV65]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -16916,7 +16978,7 @@ def M4_cmpyi_whc : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rd32 = cmpyiwh($Rss32,$Rt32*):<<1:rnd:sat", -tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5T]> { +tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000101000; @@ -16942,7 +17004,7 @@ def M4_cmpyr_whc : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, IntRegs:$Rt32), "$Rd32 = cmpyrwh($Rss32,$Rt32*):<<1:rnd:sat", -tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5T]> { +tc_8fd5f294, TypeS_3op>, Enc_3d5b28, Requires<[HasV5]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000101000; @@ -17295,7 +17357,7 @@ def M5_vdmacbsu : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rxx32 += vdmpybsu($Rss32,$Rtt32):sat", -tc_e913dc32, TypeM>, Enc_88c16c, Requires<[HasV5T]> { +tc_e913dc32, TypeM>, Enc_88c16c, Requires<[HasV5]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101010001; @@ -17307,7 +17369,7 @@ def M5_vdmpybsu : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vdmpybsu($Rss32,$Rtt32):sat", -tc_8fd5f294, TypeM>, Enc_a56825, Requires<[HasV5T]> { +tc_8fd5f294, TypeM>, Enc_a56825, Requires<[HasV5]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000101; @@ -17402,7 +17464,7 @@ def M6_vabsdiffb : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vabsdiffb($Rtt32,$Rss32)", -tc_f49e76f4, TypeM>, Enc_ea23e4, Requires<[HasV62T]> { +tc_f49e76f4, TypeM>, Enc_ea23e4, Requires<[HasV62]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000111; @@ -17412,7 +17474,7 @@ def M6_vabsdiffub : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rtt32, DoubleRegs:$Rss32), "$Rdd32 = vabsdiffub($Rtt32,$Rss32)", -tc_f49e76f4, TypeM>, Enc_ea23e4, Requires<[HasV62T]> { +tc_f49e76f4, TypeM>, Enc_ea23e4, Requires<[HasV62]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11101000101; @@ -18142,7 +18204,7 @@ def S2_asr_i_p_rnd : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rdd32 = asr($Rss32,#$Ii):rnd", -tc_2b6f77c6, TypeS_2op>, Enc_5eac98, Requires<[HasV5T]> { +tc_2b6f77c6, TypeS_2op>, Enc_5eac98, Requires<[HasV5]> { let Inst{7-5} = 0b111; let Inst{31-21} = 0b10000000110; let prefersSlot3 = 1; @@ -18151,7 +18213,7 @@ def S2_asr_i_p_rnd_goodsyntax : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rdd32 = asrrnd($Rss32,#$Ii)", -tc_2b6f77c6, TypeS_2op>, Requires<[HasV5T]> { +tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> { let isPseudo = 1; } def S2_asr_i_r : HInst< @@ -25086,7 +25148,7 @@ def S5_asrhub_rnd_sat : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rd32 = vasrhub($Rss32,#$Ii):raw", -tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5T]> { +tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5]> { let Inst{7-5} = 0b100; let Inst{13-12} = 0b00; let Inst{31-21} = 0b10001000011; @@ -25099,7 +25161,7 @@ def S5_asrhub_rnd_sat_goodsyntax : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rd32 = vasrhub($Rss32,#$Ii):rnd:sat", -tc_2b6f77c6, TypeS_2op>, Requires<[HasV5T]> { +tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -25108,7 +25170,7 @@ def S5_asrhub_sat : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rd32 = vasrhub($Rss32,#$Ii):sat", -tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5T]> { +tc_2b6f77c6, TypeS_2op>, Enc_11a146, Requires<[HasV5]> { let Inst{7-5} = 0b101; let Inst{13-12} = 0b00; let Inst{31-21} = 0b10001000011; @@ -25121,7 +25183,7 @@ def S5_popcountp : HInst< (outs IntRegs:$Rd32), (ins DoubleRegs:$Rss32), "$Rd32 = popcount($Rss32)", -tc_00afc57e, TypeS_2op>, Enc_90cd8b, Requires<[HasV5T]> { +tc_00afc57e, TypeS_2op>, Enc_90cd8b, Requires<[HasV5]> { let Inst{13-5} = 0b000000011; let Inst{31-21} = 0b10001000011; let hasNewValue = 1; @@ -25132,7 +25194,7 @@ def S5_vasrhrnd : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rdd32 = vasrh($Rss32,#$Ii):raw", -tc_2b6f77c6, TypeS_2op>, Enc_12b6e9, Requires<[HasV5T]> { +tc_2b6f77c6, TypeS_2op>, Enc_12b6e9, Requires<[HasV5]> { let Inst{7-5} = 0b000; let Inst{13-12} = 0b00; let Inst{31-21} = 0b10000000001; @@ -25142,14 +25204,14 @@ def S5_vasrhrnd_goodsyntax : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u4_0Imm:$Ii), "$Rdd32 = vasrh($Rss32,#$Ii):rnd", -tc_2b6f77c6, TypeS_2op>, Requires<[HasV5T]> { +tc_2b6f77c6, TypeS_2op>, Requires<[HasV5]> { let isPseudo = 1; } def S6_allocframe_to_raw : HInst< (outs), (ins u11_3Imm:$Ii), "allocframe(#$Ii)", -tc_e216a5db, TypeMAPPING>, Requires<[HasV65T]> { +tc_e216a5db, TypeMAPPING>, Requires<[HasV65]> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -25157,7 +25219,7 @@ def S6_rol_i_p : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rdd32 = rol($Rss32,#$Ii)", -tc_55050d58, TypeS_2op>, Enc_5eac98, Requires<[HasV60T]> { +tc_55050d58, TypeS_2op>, Enc_5eac98, Requires<[HasV60]> { let Inst{7-5} = 0b011; let Inst{31-21} = 0b10000000000; } @@ -25165,7 +25227,7 @@ def S6_rol_i_p_acc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 += rol($Rss32,#$Ii)", -tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> { let Inst{7-5} = 0b111; let Inst{31-21} = 0b10000010000; let prefersSlot3 = 1; @@ -25175,7 +25237,7 @@ def S6_rol_i_p_and : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 &= rol($Rss32,#$Ii)", -tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> { let Inst{7-5} = 0b011; let Inst{31-21} = 0b10000010010; let prefersSlot3 = 1; @@ -25185,7 +25247,7 @@ def S6_rol_i_p_nac : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 -= rol($Rss32,#$Ii)", -tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> { let Inst{7-5} = 0b011; let Inst{31-21} = 0b10000010000; let prefersSlot3 = 1; @@ -25195,7 +25257,7 @@ def S6_rol_i_p_or : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 |= rol($Rss32,#$Ii)", -tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> { let Inst{7-5} = 0b111; let Inst{31-21} = 0b10000010010; let prefersSlot3 = 1; @@ -25205,7 +25267,7 @@ def S6_rol_i_p_xacc : HInst< (outs DoubleRegs:$Rxx32), (ins DoubleRegs:$Rxx32in, DoubleRegs:$Rss32, u6_0Imm:$Ii), "$Rxx32 ^= rol($Rss32,#$Ii)", -tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_70fb07, Requires<[HasV60]> { let Inst{7-5} = 0b011; let Inst{31-21} = 0b10000010100; let prefersSlot3 = 1; @@ -25215,7 +25277,7 @@ def S6_rol_i_r : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, u5_0Imm:$Ii), "$Rd32 = rol($Rs32,#$Ii)", -tc_55050d58, TypeS_2op>, Enc_a05677, Requires<[HasV60T]> { +tc_55050d58, TypeS_2op>, Enc_a05677, Requires<[HasV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001100000; @@ -25226,7 +25288,7 @@ def S6_rol_i_r_acc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 += rol($Rs32,#$Ii)", -tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110000; @@ -25239,7 +25301,7 @@ def S6_rol_i_r_and : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 &= rol($Rs32,#$Ii)", -tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110010; @@ -25252,7 +25314,7 @@ def S6_rol_i_r_nac : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 -= rol($Rs32,#$Ii)", -tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110000; @@ -25265,7 +25327,7 @@ def S6_rol_i_r_or : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 |= rol($Rs32,#$Ii)", -tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110010; @@ -25278,7 +25340,7 @@ def S6_rol_i_r_xacc : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, IntRegs:$Rs32, u5_0Imm:$Ii), "$Rx32 ^= rol($Rs32,#$Ii)", -tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60T]> { +tc_41d5298e, TypeS_2op>, Enc_28a2dc, Requires<[HasV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10001110100; @@ -25291,7 +25353,7 @@ def S6_vsplatrbp : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = vsplatb($Rs32)", -tc_be706f30, TypeS_2op>, Enc_3a3d62, Requires<[HasV62T]> { +tc_be706f30, TypeS_2op>, Enc_3a3d62, Requires<[HasV62]> { let Inst{13-5} = 0b000000100; let Inst{31-21} = 0b10000100010; } @@ -25299,7 +25361,7 @@ def S6_vtrunehb_ppp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vtrunehb($Rss32,$Rtt32)", -tc_55050d58, TypeS_3op>, Enc_a56825, Requires<[HasV62T]> { +tc_55050d58, TypeS_3op>, Enc_a56825, Requires<[HasV62]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001100; @@ -25308,7 +25370,7 @@ def S6_vtrunohb_ppp : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = vtrunohb($Rss32,$Rtt32)", -tc_55050d58, TypeS_3op>, Enc_a56825, Requires<[HasV62T]> { +tc_55050d58, TypeS_3op>, Enc_a56825, Requires<[HasV62]> { let Inst{7-5} = 0b101; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001100; @@ -26288,7 +26350,7 @@ def V6_ldntnt0 : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32), "$Vd32 = vmem($Rt32):nt", -PSEUDO, TypeMAPPING>, Requires<[HasV62T]> { +PSEUDO, TypeMAPPING>, Requires<[HasV62]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30301,7 +30363,7 @@ def V6_vasrhbrndsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = vasrhb($Vu32,$Vv32,$Rt8):rnd:sat", -tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> { +tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30335,7 +30397,7 @@ def V6_vasrhubrndsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):rnd:sat", -tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> { +tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30357,7 +30419,7 @@ def V6_vasrhubsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = vasrhub($Vu32,$Vv32,$Rt8):sat", -tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> { +tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30500,7 +30562,7 @@ def V6_vasrwh_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = vasrwh($Vu32,$Vv32,$Rt8)", -tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> { +tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30522,7 +30584,7 @@ def V6_vasrwhrndsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):rnd:sat", -tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> { +tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30544,7 +30606,7 @@ def V6_vasrwhsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = vasrwh($Vu32,$Vv32,$Rt8):sat", -tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> { +tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -30578,7 +30640,7 @@ def V6_vasrwuhsat_alt : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32, IntRegsLow8:$Rt8), "$Vd32 = vasrwuh($Vu32,$Vv32,$Rt8):sat", -tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60T]> { +tc_7fa8b40f, TypeMAPPING>, Requires<[HasV60]> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -36942,7 +37004,7 @@ def Y5_l2fetch : HInst< (outs), (ins IntRegs:$Rs32, DoubleRegs:$Rtt32), "l2fetch($Rs32,$Rtt32)", -tc_daa058fa, TypeST>, Enc_e6abcf, Requires<[HasV5T]> { +tc_daa058fa, TypeST>, Enc_e6abcf, Requires<[HasV5]> { let Inst{7-0} = 0b00000000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b10100110100; diff --git a/lib/Target/Hexagon/HexagonDepMappings.td b/lib/Target/Hexagon/HexagonDepMappings.td index 7a156c39da9c..03c504ff0b08 100644 --- a/lib/Target/Hexagon/HexagonDepMappings.td +++ b/lib/Target/Hexagon/HexagonDepMappings.td @@ -26,6 +26,7 @@ def J2_jumpf_nopred_mapAlias : InstAlias<"if (!$Pu4) jump $Ii", (J2_jumpf PredRe def J2_jumprf_nopred_mapAlias : InstAlias<"if (!$Pu4) jumpr $Rs32", (J2_jumprf PredRegs:$Pu4, IntRegs:$Rs32)>; def J2_jumprt_nopred_mapAlias : InstAlias<"if ($Pu4) jumpr $Rs32", (J2_jumprt PredRegs:$Pu4, IntRegs:$Rs32)>; def J2_jumpt_nopred_mapAlias : InstAlias<"if ($Pu4) jump $Ii", (J2_jumpt PredRegs:$Pu4, b30_2Imm:$Ii)>; +def J2_trap1_noregmapAlias : InstAlias<"trap1(#$Ii)", (J2_trap1 R0, u8_0Imm:$Ii)>; def L2_loadalignb_zomapAlias : InstAlias<"$Ryy32 = memb_fifo($Rs32)", (L2_loadalignb_io DoubleRegs:$Ryy32, IntRegs:$Rs32, 0)>; def L2_loadalignh_zomapAlias : InstAlias<"$Ryy32 = memh_fifo($Rs32)", (L2_loadalignh_io DoubleRegs:$Ryy32, IntRegs:$Rs32, 0)>; def L2_loadbsw2_zomapAlias : InstAlias<"$Rd32 = membh($Rs32)", (L2_loadbsw2_io IntRegs:$Rd32, IntRegs:$Rs32, 0)>; diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp index 0f1b9a4733c5..557e6384be6a 100644 --- a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp +++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp @@ -100,7 +100,7 @@ namespace llvm { } // end namespace llvm static cl::opt<bool> EnableHexagonBP("enable-hexagon-br-prob", cl::Hidden, - cl::init(false), cl::desc("Enable branch probability info")); + cl::init(true), cl::desc("Enable branch probability info")); static cl::opt<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion")); static cl::opt<bool> SkipExitBranches("eif-no-loop-exit", cl::init(false), @@ -191,6 +191,7 @@ namespace { bool isProfitable(const FlowPattern &FP) const; bool isPredicableStore(const MachineInstr *MI) const; bool isSafeToSpeculate(const MachineInstr *MI) const; + bool isPredicate(unsigned R) const; unsigned getCondStoreOpcode(unsigned Opc, bool IfTrue) const; void predicateInstr(MachineBasicBlock *ToB, MachineBasicBlock::iterator At, @@ -207,7 +208,6 @@ namespace { void removeBlock(MachineBasicBlock *B); void eliminatePhis(MachineBasicBlock *B); - void replacePhiEdges(MachineBasicBlock *OldB, MachineBasicBlock *NewB); void mergeBlocks(MachineBasicBlock *PredB, MachineBasicBlock *SuccB); void simplifyFlowGraph(const FlowPattern &FP); @@ -238,11 +238,12 @@ bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock *B) const { bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B, MachineLoop *L, FlowPattern &FP) { - DEBUG(dbgs() << "Checking flow pattern at " << printMBBReference(*B) << "\n"); + LLVM_DEBUG(dbgs() << "Checking flow pattern at " << printMBBReference(*B) + << "\n"); // Interested only in conditional branches, no .new, no new-value, etc. // Check the terminators directly, it's easier than handling all responses - // from AnalyzeBranch. + // from analyzeBranch. MachineBasicBlock *TB = nullptr, *FB = nullptr; MachineBasicBlock::const_iterator T1I = B->getFirstTerminator(); if (T1I == B->end()) @@ -325,17 +326,17 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B, } // Don't try to predicate loop preheaders. if ((TB && isPreheader(TB)) || (FB && isPreheader(FB))) { - DEBUG(dbgs() << "One of blocks " << PrintMB(TB) << ", " << PrintMB(FB) - << " is a loop preheader. Skipping.\n"); + LLVM_DEBUG(dbgs() << "One of blocks " << PrintMB(TB) << ", " << PrintMB(FB) + << " is a loop preheader. Skipping.\n"); return false; } FP = FlowPattern(B, PredR, TB, FB, JB); - DEBUG(dbgs() << "Detected " << PrintFP(FP, *TRI) << "\n"); + LLVM_DEBUG(dbgs() << "Detected " << PrintFP(FP, *TRI) << "\n"); return true; } -// KLUDGE: HexagonInstrInfo::AnalyzeBranch won't work on a block that +// KLUDGE: HexagonInstrInfo::analyzeBranch won't work on a block that // contains EH_LABEL. bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const { for (auto &I : *B) @@ -344,7 +345,7 @@ bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const { return false; } -// KLUDGE: HexagonInstrInfo::AnalyzeBranch may be unable to recognize +// KLUDGE: HexagonInstrInfo::analyzeBranch may be unable to recognize // that a block can never fall-through. bool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock *B) const { @@ -367,7 +368,7 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B) return false; for (auto &MI : *B) { - if (MI.isDebugValue()) + if (MI.isDebugInstr()) continue; if (MI.isConditionalBranch()) return false; @@ -387,13 +388,8 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B) unsigned R = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(R)) continue; - switch (MRI->getRegClass(R)->getID()) { - case Hexagon::PredRegsRegClassID: - case Hexagon::HvxQRRegClassID: - break; - default: - continue; - } + if (!isPredicate(R)) + continue; for (auto U = MRI->use_begin(R); U != MRI->use_end(); ++U) if (U->getParent()->isPHI()) return false; @@ -443,8 +439,7 @@ bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const { if (usesUndefVReg(&MI)) return false; unsigned DefR = MI.getOperand(0).getReg(); - const TargetRegisterClass *RC = MRI->getRegClass(DefR); - if (RC == &Hexagon::PredRegsRegClass) + if (isPredicate(DefR)) return false; } } @@ -500,7 +495,7 @@ unsigned HexagonEarlyIfConversion::countPredicateDefs( unsigned R = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(R)) continue; - if (MRI->getRegClass(R) == &Hexagon::PredRegsRegClass) + if (isPredicate(R)) PredDefs++; } } @@ -508,10 +503,21 @@ unsigned HexagonEarlyIfConversion::countPredicateDefs( } bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const { + BranchProbability JumpProb(1, 10); + BranchProbability Prob(9, 10); + if (MBPI && FP.TrueB && !FP.FalseB && + (MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) < JumpProb || + MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) > Prob)) + return false; + + if (MBPI && !FP.TrueB && FP.FalseB && + (MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) < JumpProb || + MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) > Prob)) + return false; + if (FP.TrueB && FP.FalseB) { // Do not IfCovert if the branch is one sided. if (MBPI) { - BranchProbability Prob(9, 10); if (MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) > Prob) return false; if (MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) > Prob) @@ -546,8 +552,9 @@ bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const { }; unsigned Spare = 0; unsigned TotalIn = TotalCount(FP.TrueB, Spare) + TotalCount(FP.FalseB, Spare); - DEBUG(dbgs() << "Total number of instructions to be predicated/speculated: " - << TotalIn << ", spare room: " << Spare << "\n"); + LLVM_DEBUG( + dbgs() << "Total number of instructions to be predicated/speculated: " + << TotalIn << ", spare room: " << Spare << "\n"); if (TotalIn >= SizeLimit+Spare) return false; @@ -574,12 +581,13 @@ bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const { PredDefs += countPredicateDefs(SB); } } - DEBUG(dbgs() << "Total number of extra muxes from converted phis: " - << TotalPh << "\n"); + LLVM_DEBUG(dbgs() << "Total number of extra muxes from converted phis: " + << TotalPh << "\n"); if (TotalIn+TotalPh >= SizeLimit+Spare) return false; - DEBUG(dbgs() << "Total number of predicate registers: " << PredDefs << "\n"); + LLVM_DEBUG(dbgs() << "Total number of predicate registers: " << PredDefs + << "\n"); if (PredDefs > 4) return false; @@ -620,11 +628,11 @@ bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B, return Changed; if (!isValid(FP)) { - DEBUG(dbgs() << "Conversion is not valid\n"); + LLVM_DEBUG(dbgs() << "Conversion is not valid\n"); return Changed; } if (!isProfitable(FP)) { - DEBUG(dbgs() << "Conversion is not profitable\n"); + LLVM_DEBUG(dbgs() << "Conversion is not profitable\n"); return Changed; } @@ -635,8 +643,9 @@ bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B, bool HexagonEarlyIfConversion::visitLoop(MachineLoop *L) { MachineBasicBlock *HB = L ? L->getHeader() : nullptr; - DEBUG((L ? dbgs() << "Visiting loop H:" << PrintMB(HB) - : dbgs() << "Visiting function") << "\n"); + LLVM_DEBUG((L ? dbgs() << "Visiting loop H:" << PrintMB(HB) + : dbgs() << "Visiting function") + << "\n"); bool Changed = false; if (L) { for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) @@ -680,10 +689,18 @@ bool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr *MI) return false; if (MI->hasUnmodeledSideEffects()) return false; + if (MI->getOpcode() == TargetOpcode::LIFETIME_END) + return false; return true; } +bool HexagonEarlyIfConversion::isPredicate(unsigned R) const { + const TargetRegisterClass *RC = MRI->getRegClass(R); + return RC == &Hexagon::PredRegsRegClass || + RC == &Hexagon::HvxQRRegClass; +} + unsigned HexagonEarlyIfConversion::getCondStoreOpcode(unsigned Opc, bool IfTrue) const { return HII->getCondOpcode(Opc, !IfTrue); @@ -745,7 +762,7 @@ void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB, void HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock *ToB, MachineBasicBlock::iterator At, MachineBasicBlock *FromB, unsigned PredR, bool IfTrue) { - DEBUG(dbgs() << "Predicating block " << PrintMB(FromB) << "\n"); + LLVM_DEBUG(dbgs() << "Predicating block " << PrintMB(FromB) << "\n"); MachineBasicBlock::iterator End = FromB->getFirstTerminator(); MachineBasicBlock::iterator I, NextI; @@ -765,9 +782,11 @@ unsigned HexagonEarlyIfConversion::buildMux(MachineBasicBlock *B, unsigned Opc = 0; switch (DRC->getID()) { case Hexagon::IntRegsRegClassID: + case Hexagon::IntRegsLow8RegClassID: Opc = Hexagon::C2_mux; break; case Hexagon::DoubleRegsRegClassID: + case Hexagon::GeneralDoubleLow8RegsRegClassID: Opc = Hexagon::PS_pselect; break; case Hexagon::HvxVRRegClassID: @@ -935,7 +954,7 @@ void HexagonEarlyIfConversion::convert(const FlowPattern &FP) { } void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) { - DEBUG(dbgs() << "Removing block " << PrintMB(B) << "\n"); + LLVM_DEBUG(dbgs() << "Removing block " << PrintMB(B) << "\n"); // Transfer the immediate dominator information from B to its descendants. MachineDomTreeNode *N = MDT->getNode(B); @@ -965,7 +984,7 @@ void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) { } void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) { - DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B) << "\n"); + LLVM_DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B) << "\n"); MachineBasicBlock::iterator I, NextI, NonPHI = B->getFirstNonPHI(); for (I = B->begin(); I != NonPHI; I = NextI) { NextI = std::next(I); @@ -990,34 +1009,16 @@ void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) { } } -void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB, - MachineBasicBlock *NewB) { - for (auto I = OldB->succ_begin(), E = OldB->succ_end(); I != E; ++I) { - MachineBasicBlock *SB = *I; - MachineBasicBlock::iterator P, N = SB->getFirstNonPHI(); - for (P = SB->begin(); P != N; ++P) { - MachineInstr &PN = *P; - for (MachineOperand &MO : PN.operands()) - if (MO.isMBB() && MO.getMBB() == OldB) - MO.setMBB(NewB); - } - } -} - void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock *PredB, MachineBasicBlock *SuccB) { - DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB) << " and " - << PrintMB(SuccB) << "\n"); + LLVM_DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB) << " and " + << PrintMB(SuccB) << "\n"); bool TermOk = hasUncondBranch(SuccB); eliminatePhis(SuccB); HII->removeBranch(*PredB); PredB->removeSuccessor(SuccB); PredB->splice(PredB->end(), SuccB, SuccB->begin(), SuccB->end()); - MachineBasicBlock::succ_iterator I, E = SuccB->succ_end(); - for (I = SuccB->succ_begin(); I != E; ++I) - PredB->addSuccessor(*I); - PredB->normalizeSuccProbs(); - replacePhiEdges(SuccB, PredB); + PredB->transferSuccessorsAndUpdatePHIs(SuccB); removeBlock(SuccB); if (!TermOk) PredB->updateTerminator(); @@ -1039,7 +1040,7 @@ void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) { // By now, the split block has only one successor (SB), and SB has only // one predecessor. We can try to merge them. We will need to update ter- - // minators in FP.Split+SB, and that requires working AnalyzeBranch, which + // minators in FP.Split+SB, and that requires working analyzeBranch, which // fails on Hexagon for blocks that have EH_LABELs. However, if SB ends // with an unconditional branch, we won't need to touch the terminators. if (!hasEHLabel(SB) || hasUncondBranch(SB)) diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp index c2feaf5737b2..7e774674e0c0 100644 --- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp +++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -316,8 +316,10 @@ void HexagonExpandCondsets::updateKillFlags(unsigned Reg) { auto KillAt = [this,Reg] (SlotIndex K, LaneBitmask LM) -> void { // Set the <kill> flag on a use of Reg whose lane mask is contained in LM. MachineInstr *MI = LIS->getInstructionFromIndex(K); - for (auto &Op : MI->operands()) { - if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg) + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &Op = MI->getOperand(i); + if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg || + MI->isRegTiedToDefOperand(i)) continue; LaneBitmask SLM = getLaneMask(Reg, Op.getSubReg()); if ((SLM & LM) == SLM) { @@ -497,14 +499,18 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, if (!Op.isReg() || !DefRegs.count(Op)) continue; if (Op.isDef()) { - ImpUses.insert({Op, i}); + // Tied defs will always have corresponding uses, so no extra + // implicit uses are needed. + if (!Op.isTied()) + ImpUses.insert({Op, i}); } else { // This function can be called for the same register with different // lane masks. If the def in this instruction was for the whole // register, we can get here more than once. Avoid adding multiple // implicit uses (or adding an implicit use when an explicit one is // present). - ImpUses.erase(Op); + if (Op.isTied()) + ImpUses.erase(Op); } } if (ImpUses.empty()) @@ -545,7 +551,14 @@ void HexagonExpandCondsets::removeInstr(MachineInstr &MI) { void HexagonExpandCondsets::updateLiveness(std::set<unsigned> &RegSet, bool Recalc, bool UpdateKills, bool UpdateDeads) { UpdateKills |= UpdateDeads; - for (auto R : RegSet) { + for (unsigned R : RegSet) { + if (!TargetRegisterInfo::isVirtualRegister(R)) { + assert(TargetRegisterInfo::isPhysicalRegister(R)); + // There shouldn't be any physical registers as operands, except + // possibly reserved registers. + assert(MRI->isReserved(R)); + continue; + } if (Recalc) recalculateLiveInterval(R); if (UpdateKills) @@ -641,7 +654,7 @@ MachineInstr *HexagonExpandCondsets::genCondTfrFor(MachineOperand &SrcOp, .add(SrcOp); } - DEBUG(dbgs() << "created an initial copy: " << *MIB); + LLVM_DEBUG(dbgs() << "created an initial copy: " << *MIB); return &*MIB; } @@ -654,8 +667,8 @@ bool HexagonExpandCondsets::split(MachineInstr &MI, return false; TfrCounter++; } - DEBUG(dbgs() << "\nsplitting " << printMBBReference(*MI.getParent()) << ": " - << MI); + LLVM_DEBUG(dbgs() << "\nsplitting " << printMBBReference(*MI.getParent()) + << ": " << MI); MachineOperand &MD = MI.getOperand(0); // Definition MachineOperand &MP = MI.getOperand(1); // Predicate register assert(MD.isDef()); @@ -932,8 +945,8 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond, unsigned Opc = TfrI.getOpcode(); (void)Opc; assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf); - DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false") - << ": " << TfrI); + LLVM_DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false") + << ": " << TfrI); MachineOperand &MD = TfrI.getOperand(0); MachineOperand &MP = TfrI.getOperand(1); @@ -954,7 +967,7 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond, if (!DefI || !isPredicable(DefI)) return false; - DEBUG(dbgs() << "Source def: " << *DefI); + LLVM_DEBUG(dbgs() << "Source def: " << *DefI); // Collect the information about registers defined and used between the // DefI and the TfrI. @@ -1039,8 +1052,8 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond, if (!canMoveMemTo(*DefI, TfrI, true)) CanDown = false; - DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no") - << ", can move down: " << (CanDown ? "yes\n" : "no\n")); + LLVM_DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no") + << ", can move down: " << (CanDown ? "yes\n" : "no\n")); MachineBasicBlock::iterator PastDefIt = std::next(DefIt); if (CanUp) predicateAt(MD, *DefI, PastDefIt, MP, Cond, UpdRegs); @@ -1135,10 +1148,10 @@ bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) { return false; bool Overlap = L1.overlaps(L2); - DEBUG(dbgs() << "compatible registers: (" - << (Overlap ? "overlap" : "disjoint") << ")\n " - << printReg(R1.Reg, TRI, R1.Sub) << " " << L1 << "\n " - << printReg(R2.Reg, TRI, R2.Sub) << " " << L2 << "\n"); + LLVM_DEBUG(dbgs() << "compatible registers: (" + << (Overlap ? "overlap" : "disjoint") << ")\n " + << printReg(R1.Reg, TRI, R1.Sub) << " " << L1 << "\n " + << printReg(R2.Reg, TRI, R2.Sub) << " " << L2 << "\n"); if (R1.Sub || R2.Sub) return false; if (Overlap) @@ -1171,7 +1184,7 @@ bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) { LIS->removeInterval(R2.Reg); updateKillFlags(R1.Reg); - DEBUG(dbgs() << "coalesced: " << L1 << "\n"); + LLVM_DEBUG(dbgs() << "coalesced: " << L1 << "\n"); L1.verify(); return true; @@ -1252,8 +1265,8 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { LIS = &getAnalysis<LiveIntervals>(); MRI = &MF.getRegInfo(); - DEBUG(LIS->print(dbgs() << "Before expand-condsets\n", - MF.getFunction().getParent())); + LLVM_DEBUG(LIS->print(dbgs() << "Before expand-condsets\n", + MF.getFunction().getParent())); bool Changed = false; std::set<unsigned> CoalUpd, PredUpd; @@ -1280,8 +1293,8 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { if (!CoalUpd.count(Op.getReg())) KillUpd.insert(Op.getReg()); updateLiveness(KillUpd, false, true, false); - DEBUG(LIS->print(dbgs() << "After coalescing\n", - MF.getFunction().getParent())); + LLVM_DEBUG( + LIS->print(dbgs() << "After coalescing\n", MF.getFunction().getParent())); // First, simply split all muxes into a pair of conditional transfers // and update the live intervals to reflect the new arrangement. The @@ -1297,8 +1310,8 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { // predication, and after splitting they are difficult to recalculate // (because of predicated defs), so make sure they are left untouched. // Predication does not use live intervals. - DEBUG(LIS->print(dbgs() << "After splitting\n", - MF.getFunction().getParent())); + LLVM_DEBUG( + LIS->print(dbgs() << "After splitting\n", MF.getFunction().getParent())); // Traverse all blocks and collapse predicable instructions feeding // conditional transfers into predicated instructions. @@ -1306,13 +1319,13 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { // cases that were not created in the previous step. for (auto &B : MF) Changed |= predicateInBlock(B, PredUpd); - DEBUG(LIS->print(dbgs() << "After predicating\n", - MF.getFunction().getParent())); + LLVM_DEBUG(LIS->print(dbgs() << "After predicating\n", + MF.getFunction().getParent())); PredUpd.insert(CoalUpd.begin(), CoalUpd.end()); updateLiveness(PredUpd, true, true, true); - DEBUG({ + LLVM_DEBUG({ if (Changed) LIS->print(dbgs() << "After expand-condsets\n", MF.getFunction().getParent()); @@ -1324,7 +1337,6 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) { //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// - FunctionPass *llvm::createHexagonExpandCondsets() { return new HexagonExpandCondsets(); } diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp index a842b672736c..e9067e2285a8 100644 --- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp +++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/Support/MathExtras.h" #include "llvm/PassSupport.h" using namespace llvm; @@ -59,12 +60,12 @@ namespace { } private: - /// \brief Check the offset between each loop instruction and + /// Check the offset between each loop instruction and /// the loop basic block to determine if we can use the LOOP instruction /// or if we need to set the LC/SA registers explicitly. bool fixupLoopInstrs(MachineFunction &MF); - /// \brief Replace loop instruction with the constant extended + /// Replace loop instruction with the constant extended /// version if the loop label is too far from the loop instruction. void useExtLoopInstr(MachineFunction &MF, MachineBasicBlock::iterator &MII); @@ -80,7 +81,7 @@ FunctionPass *llvm::createHexagonFixupHwLoops() { return new HexagonFixupHwLoops(); } -/// \brief Returns true if the instruction is a hardware loop instruction. +/// Returns true if the instruction is a hardware loop instruction. static bool isHardwareLoop(const MachineInstr &MI) { return MI.getOpcode() == Hexagon::J2_loop0r || MI.getOpcode() == Hexagon::J2_loop0i || @@ -94,7 +95,7 @@ bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) { return fixupLoopInstrs(MF); } -/// \brief For Hexagon, if the loop label is to far from the +/// For Hexagon, if the loop label is to far from the /// loop instruction then we need to set the LC0 and SA0 registers /// explicitly instead of using LOOP(start,count). This function /// checks the distance, and generates register assignments if needed. @@ -137,7 +138,7 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { MachineBasicBlock::iterator MII = MBB.begin(); MachineBasicBlock::iterator MIE = MBB.end(); while (MII != MIE) { - InstOffset += HII->getSize(*MII); + unsigned InstSize = HII->getSize(*MII); if (MII->isMetaInstruction()) { ++MII; continue; @@ -145,8 +146,10 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { if (isHardwareLoop(*MII)) { assert(MII->getOperand(0).isMBB() && "Expect a basic block as loop operand"); - int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()]; - if ((unsigned)abs(diff) > MaxLoopRange) { + MachineBasicBlock *TargetBB = MII->getOperand(0).getMBB(); + unsigned Diff = AbsoluteDifference(InstOffset, + BlockToInstOffset[TargetBB]); + if (Diff > MaxLoopRange) { useExtLoopInstr(MF, MII); MII = MBB.erase(MII); Changed = true; @@ -156,13 +159,14 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) { } else { ++MII; } + InstOffset += InstSize; } } return Changed; } -/// \brief Replace loop instructions with the constant extended version. +/// Replace loop instructions with the constant extended version. void HexagonFixupHwLoops::useExtLoopInstr(MachineFunction &MF, MachineBasicBlock::iterator &MII) { const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 65a2fc35b11b..97b02e2b34cb 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -442,7 +442,7 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF, if (needsStackFrame(I, CSR, HRI)) SFBlocks.push_back(&I); - DEBUG({ + LLVM_DEBUG({ dbgs() << "Blocks needing SF: {"; for (auto &B : SFBlocks) dbgs() << " " << printMBBReference(*B); @@ -465,7 +465,7 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF, if (!PDomB) break; } - DEBUG({ + LLVM_DEBUG({ dbgs() << "Computed dom block: "; if (DomB) dbgs() << printMBBReference(*DomB); @@ -483,11 +483,11 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF, // Make sure that DomB dominates PDomB and PDomB post-dominates DomB. if (!MDT.dominates(DomB, PDomB)) { - DEBUG(dbgs() << "Dom block does not dominate pdom block\n"); + LLVM_DEBUG(dbgs() << "Dom block does not dominate pdom block\n"); return; } if (!MPT.dominates(PDomB, DomB)) { - DEBUG(dbgs() << "PDom block does not post-dominate dom block\n"); + LLVM_DEBUG(dbgs() << "PDom block does not post-dominate dom block\n"); return; } @@ -1396,7 +1396,7 @@ static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) { bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const { - DEBUG(dbgs() << __func__ << " on " << MF.getName() << '\n'); + LLVM_DEBUG(dbgs() << __func__ << " on " << MF.getName() << '\n'); MachineFrameInfo &MFI = MF.getFrameInfo(); BitVector SRegs(Hexagon::NUM_TARGET_REGS); @@ -1406,15 +1406,16 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, // (1) For each callee-saved register, add that register and all of its // sub-registers to SRegs. - DEBUG(dbgs() << "Initial CS registers: {"); + LLVM_DEBUG(dbgs() << "Initial CS registers: {"); for (unsigned i = 0, n = CSI.size(); i < n; ++i) { unsigned R = CSI[i].getReg(); - DEBUG(dbgs() << ' ' << printReg(R, TRI)); + LLVM_DEBUG(dbgs() << ' ' << printReg(R, TRI)); for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) SRegs[*SR] = true; } - DEBUG(dbgs() << " }\n"); - DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << " }\n"); + LLVM_DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI); + dbgs() << "\n"); // (2) For each reserved register, remove that register and all of its // sub- and super-registers from SRegs. @@ -1424,8 +1425,10 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, for (MCSuperRegIterator SR(R, TRI, true); SR.isValid(); ++SR) SRegs[*SR] = false; } - DEBUG(dbgs() << "Res: "; dump_registers(Reserved, *TRI); dbgs() << "\n"); - DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Res: "; dump_registers(Reserved, *TRI); + dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI); + dbgs() << "\n"); // (3) Collect all registers that have at least one sub-register in SRegs, // and also have no sub-registers that are reserved. These will be the can- @@ -1446,11 +1449,13 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, break; } } - DEBUG(dbgs() << "TmpSup: "; dump_registers(TmpSup, *TRI); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "TmpSup: "; dump_registers(TmpSup, *TRI); + dbgs() << "\n"); // (4) Include all super-registers found in (3) into SRegs. SRegs |= TmpSup; - DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI); + dbgs() << "\n"); // (5) For each register R in SRegs, if any super-register of R is in SRegs, // remove R from SRegs. @@ -1463,7 +1468,8 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, break; } } - DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI); + dbgs() << "\n"); // Now, for each register that has a fixed stack slot, create the stack // object for it. @@ -1501,7 +1507,7 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, SRegs[R] = false; } - DEBUG({ + LLVM_DEBUG({ dbgs() << "CS information: {"; for (unsigned i = 0, n = CSI.size(); i < n; ++i) { int FI = CSI[i].getFrameIdx(); @@ -1706,11 +1712,6 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B, for (auto R = B.begin(); R != It; ++R) { Clobbers.clear(); LPR.stepForward(*R, Clobbers); - // Dead defs are recorded in Clobbers, but are not automatically removed - // from the live set. - for (auto &C : Clobbers) - if (C.second->isReg() && C.second->isDead()) - LPR.removeReg(C.first); } DebugLoc DL = MI->getDebugLoc(); @@ -1867,11 +1868,11 @@ bool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF, Changed |= expandCopy(B, I, MRI, HII, NewRegs); break; case Hexagon::STriw_pred: - case Hexagon::STriw_mod: + case Hexagon::STriw_ctr: Changed |= expandStoreInt(B, I, MRI, HII, NewRegs); break; case Hexagon::LDriw_pred: - case Hexagon::LDriw_mod: + case Hexagon::LDriw_ctr: Changed |= expandLoadInt(B, I, MRI, HII, NewRegs); break; case Hexagon::PS_vstorerq_ai: @@ -1914,7 +1915,7 @@ void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF, if (OptimizeSpillSlots && !isOptNone(MF)) optimizeSpillSlots(MF, NewRegs); - // We need to reserve a a spill slot if scavenging could potentially require + // We need to reserve a spill slot if scavenging could potentially require // spilling a scavenged register. if (!NewRegs.empty() || mayOverflowFrameOffset(MF)) { MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -2026,8 +2027,8 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, auto P = BlockIndexes.insert( std::make_pair(&B, HexagonBlockRanges::InstrIndexMap(B))); auto &IndexMap = P.first->second; - DEBUG(dbgs() << "Index map for " << printMBBReference(B) << "\n" - << IndexMap << '\n'); + LLVM_DEBUG(dbgs() << "Index map for " << printMBBReference(B) << "\n" + << IndexMap << '\n'); for (auto &In : B) { int LFI, SFI; @@ -2134,7 +2135,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, } } - DEBUG({ + LLVM_DEBUG({ for (auto &P : FIRangeMap) { dbgs() << "fi#" << P.first; if (BadFIs.count(P.first)) @@ -2173,7 +2174,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, } } - DEBUG({ + LLVM_DEBUG({ dbgs() << "Block-to-FI map (* -- live-on-exit):\n"; for (auto &P : BlockFIMap) { auto &FIs = P.second; @@ -2200,16 +2201,16 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, HexagonBlockRanges::InstrIndexMap &IM = F->second; HexagonBlockRanges::RegToRangeMap LM = HBR.computeLiveMap(IM); HexagonBlockRanges::RegToRangeMap DM = HBR.computeDeadMap(IM, LM); - DEBUG(dbgs() << printMBBReference(B) << " dead map\n" - << HexagonBlockRanges::PrintRangeMap(DM, HRI)); + LLVM_DEBUG(dbgs() << printMBBReference(B) << " dead map\n" + << HexagonBlockRanges::PrintRangeMap(DM, HRI)); for (auto FI : BlockFIMap[&B]) { if (BadFIs.count(FI)) continue; - DEBUG(dbgs() << "Working on fi#" << FI << '\n'); + LLVM_DEBUG(dbgs() << "Working on fi#" << FI << '\n'); HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B]; for (auto &Range : RL) { - DEBUG(dbgs() << "--Examining range:" << RL << '\n'); + LLVM_DEBUG(dbgs() << "--Examining range:" << RL << '\n'); if (!IndexType::isInstr(Range.start()) || !IndexType::isInstr(Range.end())) continue; @@ -2224,7 +2225,8 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF, auto *RC = HII.getRegClass(SI.getDesc(), 2, &HRI, MF); // The this-> is needed to unconfuse MSVC. unsigned FoundR = this->findPhysReg(MF, Range, IM, DM, RC); - DEBUG(dbgs() << "Replacement reg:" << printReg(FoundR, &HRI) << '\n'); + LLVM_DEBUG(dbgs() << "Replacement reg:" << printReg(FoundR, &HRI) + << '\n'); if (FoundR == 0) continue; #ifndef NDEBUG diff --git a/lib/Target/Hexagon/HexagonGatherPacketize.cpp b/lib/Target/Hexagon/HexagonGatherPacketize.cpp index 253f09d12839..63ec9c3d3124 100644 --- a/lib/Target/Hexagon/HexagonGatherPacketize.cpp +++ b/lib/Target/Hexagon/HexagonGatherPacketize.cpp @@ -62,7 +62,7 @@ bool HexagonGatherPacketize::runOnMachineFunction(MachineFunction &Fn) { if (!EnableGatherPacketize) return false; auto &ST = Fn.getSubtarget<HexagonSubtarget>(); - bool HasV65 = ST.hasV65TOps(); + bool HasV65 = ST.hasV65Ops(); bool UseHVX = ST.useHVXOps(); if (!(HasV65 & UseHVX)) return false; diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp index c1841d735b8c..2582a021e956 100644 --- a/lib/Target/Hexagon/HexagonGenInsert.cpp +++ b/lib/Target/Hexagon/HexagonGenInsert.cpp @@ -55,6 +55,12 @@ static cl::opt<unsigned> VRegDistCutoff("insert-dist-cutoff", cl::init(30U), cl::Hidden, cl::ZeroOrMore, cl::desc("Vreg distance cutoff for insert " "generation.")); +// Limit the container sizes for extreme cases where we run out of memory. +static cl::opt<unsigned> MaxORLSize("insert-max-orl", cl::init(4096), + cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of OrderedRegisterList")); +static cl::opt<unsigned> MaxIFMSize("insert-max-ifmap", cl::init(1024), + cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum size of IFMap")); + static cl::opt<bool> OptTiming("insert-timing", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable timing of insert generation")); static cl::opt<bool> OptTimingDetail("insert-timing-detail", cl::init(false), @@ -86,6 +92,7 @@ namespace { struct RegisterSet : private BitVector { RegisterSet() = default; explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {} + RegisterSet(const RegisterSet &RS) : BitVector(RS) {} using BitVector::clear; @@ -370,9 +377,11 @@ namespace { class OrderedRegisterList { using ListType = std::vector<unsigned>; + const unsigned MaxSize; public: - OrderedRegisterList(const RegisterOrdering &RO) : Ord(RO) {} + OrderedRegisterList(const RegisterOrdering &RO) + : MaxSize(MaxORLSize), Ord(RO) {} void insert(unsigned VR); void remove(unsigned VR); @@ -433,12 +442,17 @@ void OrderedRegisterList::insert(unsigned VR) { Seq.push_back(VR); else Seq.insert(L, VR); + + unsigned S = Seq.size(); + if (S > MaxSize) + Seq.resize(MaxSize); + assert(Seq.size() <= MaxSize); } void OrderedRegisterList::remove(unsigned VR) { iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord); - assert(L != Seq.end()); - Seq.erase(L); + if (L != Seq.end()) + Seq.erase(L); } namespace { @@ -618,7 +632,7 @@ void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB, SortableVectorType VRs; for (RegisterOrdering::iterator I = RB.begin(), E = RB.end(); I != E; ++I) VRs.push_back(I->first); - std::sort(VRs.begin(), VRs.end(), LexCmp); + llvm::sort(VRs.begin(), VRs.end(), LexCmp); // Transfer the results to the outgoing register ordering. for (unsigned i = 0, n = VRs.size(); i < n; ++i) RO.insert(std::make_pair(VRs[i], i)); @@ -950,6 +964,9 @@ void HexagonGenInsert::collectInBlock(MachineBasicBlock *B, continue; findRecordInsertForms(VR, AVs); + // Stop if the map size is too large. + if (IFMap.size() > MaxIFMSize) + return; } } diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp index 5a001d6ed9c1..e5af96468af1 100644 --- a/lib/Target/Hexagon/HexagonGenMux.cpp +++ b/lib/Target/Hexagon/HexagonGenMux.cpp @@ -40,6 +40,7 @@ #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/MathExtras.h" #include <algorithm> #include <cassert> @@ -56,6 +57,11 @@ namespace llvm { } // end namespace llvm +// Initialize this to 0 to always prefer generating mux by default. +static cl::opt<unsigned> MinPredDist("hexagon-gen-mux-threshold", cl::Hidden, + cl::init(0), cl::desc("Minimum distance between predicate definition and " + "farther of the two predicated uses")); + namespace { class HexagonGenMux : public MachineFunctionPass { @@ -269,11 +275,13 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { // There is now a complete definition of DR, i.e. we have the predicate // register, the definition if-true, and definition if-false. - // First, check if both definitions are far enough from the definition + // First, check if the definitions are far enough from the definition // of the predicate register. unsigned MinX = std::min(CI.TrueX, CI.FalseX); unsigned MaxX = std::max(CI.TrueX, CI.FalseX); - unsigned SearchX = (MaxX > 4) ? MaxX-4 : 0; + // Specifically, check if the predicate definition is within a prescribed + // distance from the farther of the two predicated instructions. + unsigned SearchX = (MaxX >= MinPredDist) ? MaxX-MinPredDist : 0; bool NearDef = false; for (unsigned X = SearchX; X < MaxX; ++X) { const DefUseInfo &DU = DUM.lookup(X); @@ -348,7 +356,7 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) { return false; }; for (auto I = B.rbegin(), E = B.rend(); I != E; ++I) { - if (I->isDebugValue()) + if (I->isDebugInstr()) continue; // This isn't 100% accurate, but it's safe. // It won't detect (as a kill) a case like this diff --git a/lib/Target/Hexagon/HexagonGenPredicate.cpp b/lib/Target/Hexagon/HexagonGenPredicate.cpp index 9288ed03d4d2..c0d2de90467a 100644 --- a/lib/Target/Hexagon/HexagonGenPredicate.cpp +++ b/lib/Target/Hexagon/HexagonGenPredicate.cpp @@ -222,13 +222,12 @@ void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) { } void HexagonGenPredicate::processPredicateGPR(const Register &Reg) { - DEBUG(dbgs() << __func__ << ": " - << printReg(Reg.R, TRI, Reg.S) << "\n"); + LLVM_DEBUG(dbgs() << __func__ << ": " << printReg(Reg.R, TRI, Reg.S) << "\n"); using use_iterator = MachineRegisterInfo::use_iterator; use_iterator I = MRI->use_begin(Reg.R), E = MRI->use_end(); if (I == E) { - DEBUG(dbgs() << "Dead reg: " << printReg(Reg.R, TRI, Reg.S) << '\n'); + LLVM_DEBUG(dbgs() << "Dead reg: " << printReg(Reg.R, TRI, Reg.S) << '\n'); MachineInstr *DefI = MRI->getVRegDef(Reg.R); DefI->eraseFromParent(); return; @@ -250,7 +249,7 @@ Register HexagonGenPredicate::getPredRegFor(const Register &Reg) { if (F != G2P.end()) return F->second; - DEBUG(dbgs() << __func__ << ": " << PrintRegister(Reg, *TRI)); + LLVM_DEBUG(dbgs() << __func__ << ": " << PrintRegister(Reg, *TRI)); MachineInstr *DefI = MRI->getVRegDef(Reg.R); assert(DefI); unsigned Opc = DefI->getOpcode(); @@ -258,7 +257,7 @@ Register HexagonGenPredicate::getPredRegFor(const Register &Reg) { assert(DefI->getOperand(0).isDef() && DefI->getOperand(1).isUse()); Register PR = DefI->getOperand(1); G2P.insert(std::make_pair(Reg, PR)); - DEBUG(dbgs() << " -> " << PrintRegister(PR, *TRI) << '\n'); + LLVM_DEBUG(dbgs() << " -> " << PrintRegister(PR, *TRI) << '\n'); return PR; } @@ -274,7 +273,8 @@ Register HexagonGenPredicate::getPredRegFor(const Register &Reg) { BuildMI(B, std::next(DefIt), DL, TII->get(TargetOpcode::COPY), NewPR) .addReg(Reg.R, 0, Reg.S); G2P.insert(std::make_pair(Reg, Register(NewPR))); - DEBUG(dbgs() << " -> !" << PrintRegister(Register(NewPR), *TRI) << '\n'); + LLVM_DEBUG(dbgs() << " -> !" << PrintRegister(Register(NewPR), *TRI) + << '\n'); return Register(NewPR); } @@ -364,7 +364,7 @@ bool HexagonGenPredicate::isScalarPred(Register PredReg) { } bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) { - DEBUG(dbgs() << __func__ << ": " << MI << " " << *MI); + LLVM_DEBUG(dbgs() << __func__ << ": " << MI << " " << *MI); unsigned Opc = MI->getOpcode(); assert(isConvertibleToPredForm(MI)); @@ -426,7 +426,7 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) { Register Pred = getPredRegFor(GPR); MIB.addReg(Pred.R, 0, Pred.S); } - DEBUG(dbgs() << "generated: " << *MIB); + LLVM_DEBUG(dbgs() << "generated: " << *MIB); // Generate a copy-out: NewGPR = NewPR, and replace all uses of OutR // with NewGPR. @@ -449,7 +449,7 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) { } bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) { - DEBUG(dbgs() << __func__ << "\n"); + LLVM_DEBUG(dbgs() << __func__ << "\n"); const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass; bool Changed = false; VectOfInst Erase; diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 715fd52f3acd..0e33976a58ac 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -168,7 +168,7 @@ namespace { } }; - /// \brief Find the register that contains the loop controlling + /// Find the register that contains the loop controlling /// induction variable. /// If successful, it will return true and set the \p Reg, \p IVBump /// and \p IVOp arguments. Otherwise it will return false. @@ -183,19 +183,19 @@ namespace { bool findInductionRegister(MachineLoop *L, unsigned &Reg, int64_t &IVBump, MachineInstr *&IVOp) const; - /// \brief Return the comparison kind for the specified opcode. + /// Return the comparison kind for the specified opcode. Comparison::Kind getComparisonKind(unsigned CondOpc, MachineOperand *InitialValue, const MachineOperand *Endvalue, int64_t IVBump) const; - /// \brief Analyze the statements in a loop to determine if the loop + /// Analyze the statements in a loop to determine if the loop /// has a computable trip count and, if so, return a value that represents /// the trip count expression. CountValue *getLoopTripCount(MachineLoop *L, SmallVectorImpl<MachineInstr *> &OldInsts); - /// \brief Return the expression that represents the number of times + /// Return the expression that represents the number of times /// a loop iterates. The function takes the operands that represent the /// loop start value, loop end value, and induction value. Based upon /// these operands, the function attempts to compute the trip count. @@ -206,64 +206,64 @@ namespace { const MachineOperand *End, unsigned IVReg, int64_t IVBump, Comparison::Kind Cmp) const; - /// \brief Return true if the instruction is not valid within a hardware + /// Return true if the instruction is not valid within a hardware /// loop. bool isInvalidLoopOperation(const MachineInstr *MI, bool IsInnerHWLoop) const; - /// \brief Return true if the loop contains an instruction that inhibits + /// Return true if the loop contains an instruction that inhibits /// using the hardware loop. bool containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const; - /// \brief Given a loop, check if we can convert it to a hardware loop. + /// Given a loop, check if we can convert it to a hardware loop. /// If so, then perform the conversion and return true. bool convertToHardwareLoop(MachineLoop *L, bool &L0used, bool &L1used); - /// \brief Return true if the instruction is now dead. + /// Return true if the instruction is now dead. bool isDead(const MachineInstr *MI, SmallVectorImpl<MachineInstr *> &DeadPhis) const; - /// \brief Remove the instruction if it is now dead. + /// Remove the instruction if it is now dead. void removeIfDead(MachineInstr *MI); - /// \brief Make sure that the "bump" instruction executes before the + /// Make sure that the "bump" instruction executes before the /// compare. We need that for the IV fixup, so that the compare /// instruction would not use a bumped value that has not yet been /// defined. If the instructions are out of order, try to reorder them. bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI); - /// \brief Return true if MO and MI pair is visited only once. If visited + /// Return true if MO and MI pair is visited only once. If visited /// more than once, this indicates there is recursion. In such a case, /// return false. bool isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, MachineInstr *MI, const MachineOperand *MO, LoopFeederMap &LoopFeederPhi) const; - /// \brief Return true if the Phi may generate a value that may underflow, + /// Return true if the Phi may generate a value that may underflow, /// or may wrap. bool phiMayWrapOrUnderflow(MachineInstr *Phi, const MachineOperand *EndVal, MachineBasicBlock *MBB, MachineLoop *L, LoopFeederMap &LoopFeederPhi) const; - /// \brief Return true if the induction variable may underflow an unsigned + /// Return true if the induction variable may underflow an unsigned /// value in the first iteration. bool loopCountMayWrapOrUnderFlow(const MachineOperand *InitVal, const MachineOperand *EndVal, MachineBasicBlock *MBB, MachineLoop *L, LoopFeederMap &LoopFeederPhi) const; - /// \brief Check if the given operand has a compile-time known constant + /// Check if the given operand has a compile-time known constant /// value. Return true if yes, and false otherwise. When returning true, set /// Val to the corresponding constant value. bool checkForImmediate(const MachineOperand &MO, int64_t &Val) const; - /// \brief Check if the operand has a compile-time known constant value. + /// Check if the operand has a compile-time known constant value. bool isImmediate(const MachineOperand &MO) const { int64_t V; return checkForImmediate(MO, V); } - /// \brief Return the immediate for the specified operand. + /// Return the immediate for the specified operand. int64_t getImmediate(const MachineOperand &MO) const { int64_t V; if (!checkForImmediate(MO, V)) @@ -271,12 +271,12 @@ namespace { return V; } - /// \brief Reset the given machine operand to now refer to a new immediate + /// Reset the given machine operand to now refer to a new immediate /// value. Assumes that the operand was already referencing an immediate /// value, either directly, or via a register. void setImmediate(MachineOperand &MO, int64_t Val); - /// \brief Fix the data flow of the induction variable. + /// Fix the data flow of the induction variable. /// The desired flow is: phi ---> bump -+-> comparison-in-latch. /// | /// +-> back to phi @@ -297,7 +297,7 @@ namespace { /// cannot be adjusted to reflect the post-bump value. bool fixupInductionVariable(MachineLoop *L); - /// \brief Given a loop, if it does not have a preheader, create one. + /// Given a loop, if it does not have a preheader, create one. /// Return the block that is the preheader. MachineBasicBlock *createPreheaderForLoop(MachineLoop *L); }; @@ -307,7 +307,7 @@ namespace { int HexagonHardwareLoops::Counter = 0; #endif - /// \brief Abstraction for a trip count of a loop. A smaller version + /// Abstraction for a trip count of a loop. A smaller version /// of the MachineOperand class without the concerns of changing the /// operand representation. class CountValue { @@ -376,7 +376,7 @@ FunctionPass *llvm::createHexagonHardwareLoops() { } bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n"); + LLVM_DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n"); if (skipFunction(MF.getFunction())) return false; @@ -556,7 +556,7 @@ HexagonHardwareLoops::getComparisonKind(unsigned CondOpc, return Cmp; } -/// \brief Analyze the statements in a loop to determine if the loop has +/// Analyze the statements in a loop to determine if the loop has /// a computable trip count and, if so, return a value that represents /// the trip count expression. /// @@ -718,7 +718,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp); } -/// \brief Helper function that returns the expression that represents the +/// Helper function that returns the expression that represents the /// number of times a loop iterates. The function takes the operands that /// represent the loop start value, loop end value, and induction value. /// Based upon these operands, the function attempts to compute the trip count. @@ -928,6 +928,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, // 'Add' instruction. const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg()); if (EndValInstr->getOpcode() == Hexagon::A2_addi && + EndValInstr->getOperand(1).getSubReg() == 0 && EndValInstr->getOperand(2).getImm() == StartV) { DistR = EndValInstr->getOperand(1).getReg(); } else { @@ -984,7 +985,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, return new CountValue(CountValue::CV_Register, CountR, CountSR); } -/// \brief Return true if the operation is invalid within hardware loop. +/// Return true if the operation is invalid within hardware loop. bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI, bool IsInnerHWLoop) const { // Call is not allowed because the callee may use a hardware loop except for @@ -1006,19 +1007,20 @@ bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI, return false; } -/// \brief Return true if the loop contains an instruction that inhibits +/// Return true if the loop contains an instruction that inhibits /// the use of the hardware loop instruction. bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L, bool IsInnerHWLoop) const { const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks(); - DEBUG(dbgs() << "\nhw_loop head, " << printMBBReference(*Blocks[0])); + LLVM_DEBUG(dbgs() << "\nhw_loop head, " << printMBBReference(*Blocks[0])); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { MachineBasicBlock *MBB = Blocks[i]; for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) { const MachineInstr *MI = &*MII; if (isInvalidLoopOperation(MI, IsInnerHWLoop)) { - DEBUG(dbgs()<< "\nCannot convert to hw_loop due to:"; MI->dump();); + LLVM_DEBUG(dbgs() << "\nCannot convert to hw_loop due to:"; + MI->dump();); return true; } } @@ -1026,7 +1028,7 @@ bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L, return false; } -/// \brief Returns true if the instruction is dead. This was essentially +/// Returns true if the instruction is dead. This was essentially /// copied from DeadMachineInstructionElim::isDead, but with special cases /// for inline asm, physical registers and instructions with side effects /// removed. @@ -1083,7 +1085,7 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { SmallVector<MachineInstr*, 1> DeadPhis; if (isDead(MI, DeadPhis)) { - DEBUG(dbgs() << "HW looping will remove: " << *MI); + LLVM_DEBUG(dbgs() << "HW looping will remove: " << *MI); // It is possible that some DBG_VALUE instructions refer to this // instruction. Examine each def operand for such references; @@ -1112,7 +1114,7 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) { } } -/// \brief Check if the loop is a candidate for converting to a hardware +/// Check if the loop is a candidate for converting to a hardware /// loop. If so, then perform the transformation. /// /// This function works on innermost loops first. A loop can be converted @@ -1237,7 +1239,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L, LoopStart = TopBlock; // Convert the loop to a hardware loop. - DEBUG(dbgs() << "Change to hardware loop at "; L->dump()); + LLVM_DEBUG(dbgs() << "Change to hardware loop at "; L->dump()); DebugLoc DL; if (InsertPos != Preheader->end()) DL = InsertPos->getDebugLoc(); @@ -1367,7 +1369,7 @@ bool HexagonHardwareLoops::isLoopFeeder(MachineLoop *L, MachineBasicBlock *A, LoopFeederMap &LoopFeederPhi) const { if (LoopFeederPhi.find(MO->getReg()) == LoopFeederPhi.end()) { const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks(); - DEBUG(dbgs() << "\nhw_loop head, " << printMBBReference(*Blocks[0])); + LLVM_DEBUG(dbgs() << "\nhw_loop head, " << printMBBReference(*Blocks[0])); // Ignore all BBs that form Loop. for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { MachineBasicBlock *MBB = Blocks[i]; @@ -1768,16 +1770,16 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { for (unsigned i = 1, n = PredDef->getNumOperands(); i < n; ++i) { MachineOperand &MO = PredDef->getOperand(i); if (MO.isReg() && MO.getReg() == RB.first) { - DEBUG(dbgs() << "\n DefMI(" << i << ") = " - << *(MRI->getVRegDef(I->first))); + LLVM_DEBUG(dbgs() << "\n DefMI(" << i + << ") = " << *(MRI->getVRegDef(I->first))); if (IndI) return false; IndI = MRI->getVRegDef(I->first); IndMO = &MO; } else if (MO.isReg()) { - DEBUG(dbgs() << "\n DefMI(" << i << ") = " - << *(MRI->getVRegDef(MO.getReg()))); + LLVM_DEBUG(dbgs() << "\n DefMI(" << i + << ") = " << *(MRI->getVRegDef(MO.getReg()))); if (nonIndI) return false; diff --git a/lib/Target/Hexagon/HexagonHazardRecognizer.cpp b/lib/Target/Hexagon/HexagonHazardRecognizer.cpp index 036b18678709..44f1f554c662 100644 --- a/lib/Target/Hexagon/HexagonHazardRecognizer.cpp +++ b/lib/Target/Hexagon/HexagonHazardRecognizer.cpp @@ -26,11 +26,13 @@ using namespace llvm; #define DEBUG_TYPE "post-RA-sched" void HexagonHazardRecognizer::Reset() { - DEBUG(dbgs() << "Reset hazard recognizer\n"); + LLVM_DEBUG(dbgs() << "Reset hazard recognizer\n"); Resources->clearResources(); PacketNum = 0; UsesDotCur = nullptr; DotCurPNum = -1; + UsesLoad = false; + PrefVectorStoreNew = nullptr; RegDefs.clear(); } @@ -41,7 +43,7 @@ HexagonHazardRecognizer::getHazardType(SUnit *SU, int stalls) { return NoHazard; if (!Resources->canReserveResources(*MI)) { - DEBUG(dbgs() << "*** Hazard in cycle " << PacketNum << ", " << *MI); + LLVM_DEBUG(dbgs() << "*** Hazard in cycle " << PacketNum << ", " << *MI); HazardType RetVal = Hazard; if (TII->mayBeNewStore(*MI)) { // Make sure the register to be stored is defined by an instruction in the @@ -57,14 +59,16 @@ HexagonHazardRecognizer::getHazardType(SUnit *SU, int stalls) { MI->getDebugLoc()); if (Resources->canReserveResources(*NewMI)) RetVal = NoHazard; - DEBUG(dbgs() << "*** Try .new version? " << (RetVal == NoHazard) << "\n"); + LLVM_DEBUG(dbgs() << "*** Try .new version? " << (RetVal == NoHazard) + << "\n"); MF->DeleteMachineInstr(NewMI); } return RetVal; } if (SU == UsesDotCur && DotCurPNum != (int)PacketNum) { - DEBUG(dbgs() << "*** .cur Hazard in cycle " << PacketNum << ", " << *MI); + LLVM_DEBUG(dbgs() << "*** .cur Hazard in cycle " << PacketNum << ", " + << *MI); return Hazard; } @@ -72,21 +76,33 @@ HexagonHazardRecognizer::getHazardType(SUnit *SU, int stalls) { } void HexagonHazardRecognizer::AdvanceCycle() { - DEBUG(dbgs() << "Advance cycle, clear state\n"); + LLVM_DEBUG(dbgs() << "Advance cycle, clear state\n"); Resources->clearResources(); if (DotCurPNum != -1 && DotCurPNum != (int)PacketNum) { UsesDotCur = nullptr; DotCurPNum = -1; } + UsesLoad = false; + PrefVectorStoreNew = nullptr; PacketNum++; RegDefs.clear(); } -/// If a packet contains a dot cur instruction, then we may prefer the -/// instruction that can use the dot cur result. Or, if the use -/// isn't scheduled in the same packet, then prefer other instructions -/// in the subsequent packet. +/// Handle the cases when we prefer one instruction over another. Case 1 - we +/// prefer not to generate multiple loads in the packet to avoid a potential +/// bank conflict. Case 2 - if a packet contains a dot cur instruction, then we +/// prefer the instruction that can use the dot cur result. However, if the use +/// is not scheduled in the same packet, then prefer other instructions in the +/// subsequent packet. Case 3 - we prefer a vector store that can be converted +/// to a .new store. The packetizer will not generate the .new store if the +/// store doesn't have resources to fit in the packet (but the .new store may +/// have resources). We attempt to schedule the store as soon as possible to +/// help packetize the two instructions together. bool HexagonHazardRecognizer::ShouldPreferAnother(SUnit *SU) { + if (PrefVectorStoreNew != nullptr && PrefVectorStoreNew != SU) + return true; + if (UsesLoad && SU->isInstr() && SU->getInstr()->mayLoad()) + return true; return UsesDotCur && ((SU == UsesDotCur) ^ (DotCurPNum == (int)PacketNum)); } @@ -118,17 +134,16 @@ void HexagonHazardRecognizer::EmitInstruction(SUnit *SU) { } else Resources->reserveResources(*MI); - DEBUG(dbgs() << " Add instruction " << *MI); + LLVM_DEBUG(dbgs() << " Add instruction " << *MI); // When scheduling a dot cur instruction, check if there is an instruction // that can use the dot cur in the same packet. If so, we'll attempt to - // schedule it before other instructions. We only do this if the use has - // the same height as the dot cur. Otherwise, we may miss scheduling an - // instruction with a greater height, which is more important. + // schedule it before other instructions. We only do this if the load has a + // single zero-latency use. if (TII->mayBeCurLoad(*MI)) for (auto &S : SU->Succs) if (S.isAssignedRegDep() && S.getLatency() == 0 && - SU->getHeight() == S.getSUnit()->getHeight()) { + S.getSUnit()->NumPredsLeft == 1) { UsesDotCur = S.getSUnit(); DotCurPNum = PacketNum; break; @@ -137,4 +152,15 @@ void HexagonHazardRecognizer::EmitInstruction(SUnit *SU) { UsesDotCur = nullptr; DotCurPNum = -1; } + + UsesLoad = MI->mayLoad(); + + if (TII->isHVXVec(*MI) && !MI->mayLoad() && !MI->mayStore()) + for (auto &S : SU->Succs) + if (S.isAssignedRegDep() && S.getLatency() == 0 && + TII->mayBeNewStore(*S.getSUnit()->getInstr()) && + Resources->canReserveResources(*S.getSUnit()->getInstr())) { + PrefVectorStoreNew = S.getSUnit(); + break; + } } diff --git a/lib/Target/Hexagon/HexagonHazardRecognizer.h b/lib/Target/Hexagon/HexagonHazardRecognizer.h index 70efcb7a9f76..2874d73ce819 100644 --- a/lib/Target/Hexagon/HexagonHazardRecognizer.h +++ b/lib/Target/Hexagon/HexagonHazardRecognizer.h @@ -23,13 +23,21 @@ namespace llvm { class HexagonHazardRecognizer : public ScheduleHazardRecognizer { DFAPacketizer *Resources; const HexagonInstrInfo *TII; - unsigned PacketNum; + unsigned PacketNum = 0; // If the packet contains a potential dot cur instruction. This is // used for the scheduling priority function. - SUnit *UsesDotCur; + SUnit *UsesDotCur = nullptr; // The packet number when a dor cur is emitted. If its use is not generated // in the same packet, then try to wait another cycle before emitting. - int DotCurPNum; + int DotCurPNum = -1; + // Does the packet contain a load. Used to restrict another load, if possible. + bool UsesLoad = false; + // Check if we should prefer a vector store that will become a .new version. + // The .new store uses different resources than a normal store, and the + // packetizer will not generate the .new if the regular store does not have + // resources available (even if the .new version does). To help, the schedule + // attempts to schedule the .new as soon as possible in the packet. + SUnit *PrefVectorStoreNew = nullptr; // The set of registers defined by instructions in the current packet. SmallSet<unsigned, 8> RegDefs; @@ -37,8 +45,7 @@ public: HexagonHazardRecognizer(const InstrItineraryData *II, const HexagonInstrInfo *HII, const HexagonSubtarget &ST) - : Resources(ST.createDFAPacketizer(II)), TII(HII), PacketNum(0), - UsesDotCur(nullptr), DotCurPNum(-1) { } + : Resources(ST.createDFAPacketizer(II)), TII(HII) { } ~HexagonHazardRecognizer() override { if (Resources) diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index a6ac4e3df745..efb4c2eb0fc3 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -64,51 +64,6 @@ FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, } } -// Intrinsics that return a a predicate. -static bool doesIntrinsicReturnPredicate(unsigned ID) { - switch (ID) { - default: - return false; - case Intrinsic::hexagon_C2_cmpeq: - case Intrinsic::hexagon_C2_cmpgt: - case Intrinsic::hexagon_C2_cmpgtu: - case Intrinsic::hexagon_C2_cmpgtup: - case Intrinsic::hexagon_C2_cmpgtp: - case Intrinsic::hexagon_C2_cmpeqp: - case Intrinsic::hexagon_C2_bitsset: - case Intrinsic::hexagon_C2_bitsclr: - case Intrinsic::hexagon_C2_cmpeqi: - case Intrinsic::hexagon_C2_cmpgti: - case Intrinsic::hexagon_C2_cmpgtui: - case Intrinsic::hexagon_C2_cmpgei: - case Intrinsic::hexagon_C2_cmpgeui: - case Intrinsic::hexagon_C2_cmplt: - case Intrinsic::hexagon_C2_cmpltu: - case Intrinsic::hexagon_C2_bitsclri: - case Intrinsic::hexagon_C2_and: - case Intrinsic::hexagon_C2_or: - case Intrinsic::hexagon_C2_xor: - case Intrinsic::hexagon_C2_andn: - case Intrinsic::hexagon_C2_not: - case Intrinsic::hexagon_C2_orn: - case Intrinsic::hexagon_C2_pxfer_map: - case Intrinsic::hexagon_C2_any8: - case Intrinsic::hexagon_C2_all8: - case Intrinsic::hexagon_A2_vcmpbeq: - case Intrinsic::hexagon_A2_vcmpbgtu: - case Intrinsic::hexagon_A2_vcmpheq: - case Intrinsic::hexagon_A2_vcmphgt: - case Intrinsic::hexagon_A2_vcmphgtu: - case Intrinsic::hexagon_A2_vcmpweq: - case Intrinsic::hexagon_A2_vcmpwgt: - case Intrinsic::hexagon_A2_vcmpwgtu: - case Intrinsic::hexagon_C2_tfrrp: - case Intrinsic::hexagon_S2_tstbit_i: - case Intrinsic::hexagon_S2_tstbit_r: - return true; - } -} - void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); @@ -138,12 +93,18 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) { Opcode = IsValidInc ? Hexagon::L2_loadrh_pi : Hexagon::L2_loadrh_io; break; case MVT::i32: + case MVT::f32: + case MVT::v2i16: + case MVT::v4i8: Opcode = IsValidInc ? Hexagon::L2_loadri_pi : Hexagon::L2_loadri_io; break; case MVT::i64: + case MVT::f64: + case MVT::v2i32: + case MVT::v4i16: + case MVT::v8i8: Opcode = IsValidInc ? Hexagon::L2_loadrd_pi : Hexagon::L2_loadrd_io; break; - // 64B case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: @@ -223,7 +184,6 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) { CurDAG->RemoveDeadNode(LD); } - MachineSDNode *HexagonDAGToDAGISel::LoadInstrForLoadIntrinsic(SDNode *IntN) { if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN) return nullptr; @@ -241,35 +201,14 @@ MachineSDNode *HexagonDAGToDAGISel::LoadInstrForLoadIntrinsic(SDNode *IntN) { }; auto FLC = LoadPciMap.find(IntNo); if (FLC != LoadPciMap.end()) { - SDNode *Mod = CurDAG->getMachineNode(Hexagon::A2_tfrrcr, dl, MVT::i32, - IntN->getOperand(4)); EVT ValTy = (IntNo == Intrinsic::hexagon_circ_ldd) ? MVT::i64 : MVT::i32; EVT RTys[] = { ValTy, MVT::i32, MVT::Other }; // Operands: { Base, Increment, Modifier, Chain } auto Inc = cast<ConstantSDNode>(IntN->getOperand(5)); SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), dl, MVT::i32); MachineSDNode *Res = CurDAG->getMachineNode(FLC->second, dl, RTys, - { IntN->getOperand(2), I, SDValue(Mod,0), IntN->getOperand(0) }); - return Res; - } - - static std::map<unsigned,unsigned> LoadPbrMap = { - { Intrinsic::hexagon_brev_ldb, Hexagon::L2_loadrb_pbr }, - { Intrinsic::hexagon_brev_ldub, Hexagon::L2_loadrub_pbr }, - { Intrinsic::hexagon_brev_ldh, Hexagon::L2_loadrh_pbr }, - { Intrinsic::hexagon_brev_lduh, Hexagon::L2_loadruh_pbr }, - { Intrinsic::hexagon_brev_ldw, Hexagon::L2_loadri_pbr }, - { Intrinsic::hexagon_brev_ldd, Hexagon::L2_loadrd_pbr }, - }; - auto FLB = LoadPbrMap.find(IntNo); - if (FLB != LoadPbrMap.end()) { - SDNode *Mod = CurDAG->getMachineNode(Hexagon::A2_tfrrcr, dl, MVT::i32, - IntN->getOperand(4)); - EVT ValTy = (IntNo == Intrinsic::hexagon_brev_ldd) ? MVT::i64 : MVT::i32; - EVT RTys[] = { ValTy, MVT::i32, MVT::Other }; - // Operands: { Base, Modifier, Chain } - MachineSDNode *Res = CurDAG->getMachineNode(FLB->second, dl, RTys, - { IntN->getOperand(2), SDValue(Mod,0), IntN->getOperand(0) }); + { IntN->getOperand(2), I, IntN->getOperand(4), + IntN->getOperand(0) }); return Res; } @@ -343,14 +282,10 @@ bool HexagonDAGToDAGISel::tryLoadOfLoadIntrinsic(LoadSDNode *N) { // a sign-extending intrinsic into (or the other way around). ISD::LoadExtType IntExt; switch (cast<ConstantSDNode>(C->getOperand(1))->getZExtValue()) { - case Intrinsic::hexagon_brev_ldub: - case Intrinsic::hexagon_brev_lduh: case Intrinsic::hexagon_circ_ldub: case Intrinsic::hexagon_circ_lduh: IntExt = ISD::ZEXTLOAD; break; - case Intrinsic::hexagon_brev_ldw: - case Intrinsic::hexagon_brev_ldd: case Intrinsic::hexagon_circ_ldw: case Intrinsic::hexagon_circ_ldd: IntExt = ISD::NON_EXTLOAD; @@ -378,6 +313,134 @@ bool HexagonDAGToDAGISel::tryLoadOfLoadIntrinsic(LoadSDNode *N) { CurDAG->RemoveDeadNode(C); return true; } + return false; +} + +// Convert the bit-reverse load intrinsic to appropriate target instruction. +bool HexagonDAGToDAGISel::SelectBrevLdIntrinsic(SDNode *IntN) { + if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN) + return false; + + const SDLoc &dl(IntN); + unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue(); + + static const std::map<unsigned, unsigned> LoadBrevMap = { + { Intrinsic::hexagon_L2_loadrb_pbr, Hexagon::L2_loadrb_pbr }, + { Intrinsic::hexagon_L2_loadrub_pbr, Hexagon::L2_loadrub_pbr }, + { Intrinsic::hexagon_L2_loadrh_pbr, Hexagon::L2_loadrh_pbr }, + { Intrinsic::hexagon_L2_loadruh_pbr, Hexagon::L2_loadruh_pbr }, + { Intrinsic::hexagon_L2_loadri_pbr, Hexagon::L2_loadri_pbr }, + { Intrinsic::hexagon_L2_loadrd_pbr, Hexagon::L2_loadrd_pbr } + }; + auto FLI = LoadBrevMap.find(IntNo); + if (FLI != LoadBrevMap.end()) { + EVT ValTy = + (IntNo == Intrinsic::hexagon_L2_loadrd_pbr) ? MVT::i64 : MVT::i32; + EVT RTys[] = { ValTy, MVT::i32, MVT::Other }; + // Operands of Intrinsic: {chain, enum ID of intrinsic, baseptr, + // modifier}. + // Operands of target instruction: { Base, Modifier, Chain }. + MachineSDNode *Res = CurDAG->getMachineNode( + FLI->second, dl, RTys, + {IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(0)}); + + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemIntrinsicSDNode>(IntN)->getMemOperand(); + Res->setMemRefs(MemOp, MemOp + 1); + + ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0)); + ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1)); + ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2)); + CurDAG->RemoveDeadNode(IntN); + return true; + } + return false; +} + +/// Generate a machine instruction node for the new circlar buffer intrinsics. +/// The new versions use a CSx register instead of the K field. +bool HexagonDAGToDAGISel::SelectNewCircIntrinsic(SDNode *IntN) { + if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN) + return false; + + SDLoc DL(IntN); + unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue(); + SmallVector<SDValue, 7> Ops; + + static std::map<unsigned,unsigned> LoadNPcMap = { + { Intrinsic::hexagon_L2_loadrub_pci, Hexagon::PS_loadrub_pci }, + { Intrinsic::hexagon_L2_loadrb_pci, Hexagon::PS_loadrb_pci }, + { Intrinsic::hexagon_L2_loadruh_pci, Hexagon::PS_loadruh_pci }, + { Intrinsic::hexagon_L2_loadrh_pci, Hexagon::PS_loadrh_pci }, + { Intrinsic::hexagon_L2_loadri_pci, Hexagon::PS_loadri_pci }, + { Intrinsic::hexagon_L2_loadrd_pci, Hexagon::PS_loadrd_pci }, + { Intrinsic::hexagon_L2_loadrub_pcr, Hexagon::PS_loadrub_pcr }, + { Intrinsic::hexagon_L2_loadrb_pcr, Hexagon::PS_loadrb_pcr }, + { Intrinsic::hexagon_L2_loadruh_pcr, Hexagon::PS_loadruh_pcr }, + { Intrinsic::hexagon_L2_loadrh_pcr, Hexagon::PS_loadrh_pcr }, + { Intrinsic::hexagon_L2_loadri_pcr, Hexagon::PS_loadri_pcr }, + { Intrinsic::hexagon_L2_loadrd_pcr, Hexagon::PS_loadrd_pcr } + }; + auto FLI = LoadNPcMap.find (IntNo); + if (FLI != LoadNPcMap.end()) { + EVT ValTy = MVT::i32; + if (IntNo == Intrinsic::hexagon_L2_loadrd_pci || + IntNo == Intrinsic::hexagon_L2_loadrd_pcr) + ValTy = MVT::i64; + EVT RTys[] = { ValTy, MVT::i32, MVT::Other }; + // Handle load.*_pci case which has 6 operands. + if (IntN->getNumOperands() == 6) { + auto Inc = cast<ConstantSDNode>(IntN->getOperand(3)); + SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32); + // Operands: { Base, Increment, Modifier, Start, Chain }. + Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5), + IntN->getOperand(0) }; + } else + // Handle load.*_pcr case which has 5 operands. + // Operands: { Base, Modifier, Start, Chain }. + Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4), + IntN->getOperand(0) }; + MachineSDNode *Res = CurDAG->getMachineNode(FLI->second, DL, RTys, Ops); + ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0)); + ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1)); + ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2)); + CurDAG->RemoveDeadNode(IntN); + return true; + } + + static std::map<unsigned,unsigned> StoreNPcMap = { + { Intrinsic::hexagon_S2_storerb_pci, Hexagon::PS_storerb_pci }, + { Intrinsic::hexagon_S2_storerh_pci, Hexagon::PS_storerh_pci }, + { Intrinsic::hexagon_S2_storerf_pci, Hexagon::PS_storerf_pci }, + { Intrinsic::hexagon_S2_storeri_pci, Hexagon::PS_storeri_pci }, + { Intrinsic::hexagon_S2_storerd_pci, Hexagon::PS_storerd_pci }, + { Intrinsic::hexagon_S2_storerb_pcr, Hexagon::PS_storerb_pcr }, + { Intrinsic::hexagon_S2_storerh_pcr, Hexagon::PS_storerh_pcr }, + { Intrinsic::hexagon_S2_storerf_pcr, Hexagon::PS_storerf_pcr }, + { Intrinsic::hexagon_S2_storeri_pcr, Hexagon::PS_storeri_pcr }, + { Intrinsic::hexagon_S2_storerd_pcr, Hexagon::PS_storerd_pcr } + }; + auto FSI = StoreNPcMap.find (IntNo); + if (FSI != StoreNPcMap.end()) { + EVT RTys[] = { MVT::i32, MVT::Other }; + // Handle store.*_pci case which has 7 operands. + if (IntN->getNumOperands() == 7) { + auto Inc = cast<ConstantSDNode>(IntN->getOperand(3)); + SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32); + // Operands: { Base, Increment, Modifier, Value, Start, Chain }. + Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5), + IntN->getOperand(6), IntN->getOperand(0) }; + } else + // Handle store.*_pcr case which has 6 operands. + // Operands: { Base, Modifier, Value, Start, Chain }. + Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4), + IntN->getOperand(5), IntN->getOperand(0) }; + MachineSDNode *Res = CurDAG->getMachineNode(FSI->second, DL, RTys, Ops); + ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0)); + ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1)); + CurDAG->RemoveDeadNode(IntN); + return true; + } return false; } @@ -385,9 +448,9 @@ bool HexagonDAGToDAGISel::tryLoadOfLoadIntrinsic(LoadSDNode *N) { void HexagonDAGToDAGISel::SelectLoad(SDNode *N) { SDLoc dl(N); LoadSDNode *LD = cast<LoadSDNode>(N); - ISD::MemIndexedMode AM = LD->getAddressingMode(); // Handle indexed loads. + ISD::MemIndexedMode AM = LD->getAddressingMode(); if (AM != ISD::UNINDEXED) { SelectIndexedLoad(LD, dl); return; @@ -422,9 +485,16 @@ void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) { Opcode = IsValidInc ? Hexagon::S2_storerh_pi : Hexagon::S2_storerh_io; break; case MVT::i32: + case MVT::f32: + case MVT::v2i16: + case MVT::v4i8: Opcode = IsValidInc ? Hexagon::S2_storeri_pi : Hexagon::S2_storeri_io; break; case MVT::i64: + case MVT::f64: + case MVT::v2i32: + case MVT::v4i16: + case MVT::v8i8: Opcode = IsValidInc ? Hexagon::S2_storerd_pi : Hexagon::S2_storerd_io; break; case MVT::v64i8: @@ -488,9 +558,9 @@ void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) { void HexagonDAGToDAGISel::SelectStore(SDNode *N) { SDLoc dl(N); StoreSDNode *ST = cast<StoreSDNode>(N); - ISD::MemIndexedMode AM = ST->getAddressingMode(); // Handle indexed stores. + ISD::MemIndexedMode AM = ST->getAddressingMode(); if (AM != ISD::UNINDEXED) { SelectIndexedStore(ST, dl); return; @@ -553,85 +623,6 @@ void HexagonDAGToDAGISel::SelectSHL(SDNode *N) { return Default(); } - -// -// If there is an zero_extend followed an intrinsic in DAG (this means - the -// result of the intrinsic is predicate); convert the zero_extend to -// transfer instruction. -// -// Zero extend -> transfer is lowered here. Otherwise, zero_extend will be -// converted into a MUX as predicate registers defined as 1 bit in the -// compiler. Architecture defines them as 8-bit registers. -// We want to preserve all the lower 8-bits and, not just 1 LSB bit. -// -void HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) { - SDLoc dl(N); - - SDValue Op0 = N->getOperand(0); - EVT OpVT = Op0.getValueType(); - unsigned OpBW = OpVT.getSizeInBits(); - - // Special handling for zero-extending a vector of booleans. - if (OpVT.isVector() && OpVT.getVectorElementType() == MVT::i1 && OpBW <= 64) { - SDNode *Mask = CurDAG->getMachineNode(Hexagon::C2_mask, dl, MVT::i64, Op0); - unsigned NE = OpVT.getVectorNumElements(); - EVT ExVT = N->getValueType(0); - unsigned ES = ExVT.getScalarSizeInBits(); - uint64_t MV = 0, Bit = 1; - for (unsigned i = 0; i < NE; ++i) { - MV |= Bit; - Bit <<= ES; - } - SDValue Ones = CurDAG->getTargetConstant(MV, dl, MVT::i64); - SDNode *OnesReg = CurDAG->getMachineNode(Hexagon::CONST64, dl, - MVT::i64, Ones); - if (ExVT.getSizeInBits() == 32) { - SDNode *And = CurDAG->getMachineNode(Hexagon::A2_andp, dl, MVT::i64, - SDValue(Mask,0), SDValue(OnesReg,0)); - SDValue SubR = CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32); - ReplaceNode(N, CurDAG->getMachineNode(Hexagon::EXTRACT_SUBREG, dl, ExVT, - SDValue(And, 0), SubR)); - return; - } - ReplaceNode(N, - CurDAG->getMachineNode(Hexagon::A2_andp, dl, ExVT, - SDValue(Mask, 0), SDValue(OnesReg, 0))); - return; - } - - SDNode *Int = N->getOperand(0).getNode(); - if ((Int->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) { - unsigned ID = cast<ConstantSDNode>(Int->getOperand(0))->getZExtValue(); - if (doesIntrinsicReturnPredicate(ID)) { - // Now we need to differentiate target data types. - if (N->getValueType(0) == MVT::i64) { - // Convert the zero_extend to Rs = Pd followed by A2_combinew(0,Rs). - SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32); - SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl, - MVT::i32, SDValue(Int, 0)); - SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, - MVT::i32, TargetConst0); - SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl, - MVT::i64, MVT::Other, - SDValue(Result_2, 0), - SDValue(Result_1, 0)); - ReplaceNode(N, Result_3); - return; - } - if (N->getValueType(0) == MVT::i32) { - // Convert the zero_extend to Rs = Pd - SDNode* RsPd = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl, - MVT::i32, SDValue(Int, 0)); - ReplaceNode(N, RsPd); - return; - } - llvm_unreachable("Unexpected value type"); - } - } - SelectCode(N); -} - - // // Handling intrinsics for circular load and bitreverse load. // @@ -642,6 +633,13 @@ void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) { return; } + // Handle bit-reverse load intrinsics. + if (SelectBrevLdIntrinsic(N)) + return; + + if (SelectNewCircIntrinsic(N)) + return; + unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); if (IntNo == Intrinsic::hexagon_V6_vgathermw || IntNo == Intrinsic::hexagon_V6_vgathermw_128B || @@ -735,7 +733,6 @@ void HexagonDAGToDAGISel::SelectConstant(SDNode *N) { SelectCode(N); } - void HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) { MachineFrameInfo &MFI = MF->getFrameInfo(); const HexagonFrameLowering *HFI = HST->getFrameLowering(); @@ -765,20 +762,113 @@ void HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) { ReplaceNode(N, R); } +void HexagonDAGToDAGISel::SelectAddSubCarry(SDNode *N) { + unsigned OpcCarry = N->getOpcode() == HexagonISD::ADDC ? Hexagon::A4_addp_c + : Hexagon::A4_subp_c; + SDNode *C = CurDAG->getMachineNode(OpcCarry, SDLoc(N), N->getVTList(), + { N->getOperand(0), N->getOperand(1), + N->getOperand(2) }); + ReplaceNode(N, C); +} -void HexagonDAGToDAGISel::SelectBitcast(SDNode *N) { - EVT SVT = N->getOperand(0).getValueType(); - EVT DVT = N->getValueType(0); - if (!SVT.isVector() || !DVT.isVector() || - SVT.getVectorElementType() == MVT::i1 || - DVT.getVectorElementType() == MVT::i1 || - SVT.getSizeInBits() != DVT.getSizeInBits()) { - SelectCode(N); - return; +void HexagonDAGToDAGISel::SelectVAlign(SDNode *N) { + MVT ResTy = N->getValueType(0).getSimpleVT(); + if (HST->isHVXVectorType(ResTy, true)) + return SelectHvxVAlign(N); + + const SDLoc &dl(N); + unsigned VecLen = ResTy.getSizeInBits(); + if (VecLen == 32) { + SDValue Ops[] = { + CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32), + N->getOperand(0), + CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32), + N->getOperand(1), + CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32) + }; + SDNode *R = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, + MVT::i64, Ops); + + // Shift right by "(Addr & 0x3) * 8" bytes. + SDValue M0 = CurDAG->getTargetConstant(0x18, dl, MVT::i32); + SDValue M1 = CurDAG->getTargetConstant(0x03, dl, MVT::i32); + SDNode *C = CurDAG->getMachineNode(Hexagon::S4_andi_asl_ri, dl, MVT::i32, + M0, N->getOperand(2), M1); + SDNode *S = CurDAG->getMachineNode(Hexagon::S2_lsr_r_p, dl, MVT::i64, + SDValue(R, 0), SDValue(C, 0)); + SDValue E = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo, dl, ResTy, + SDValue(S, 0)); + ReplaceNode(N, E.getNode()); + } else { + assert(VecLen == 64); + SDNode *Pu = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::v8i1, + N->getOperand(2)); + SDNode *VA = CurDAG->getMachineNode(Hexagon::S2_valignrb, dl, ResTy, + N->getOperand(0), N->getOperand(1), + SDValue(Pu,0)); + ReplaceNode(N, VA); } +} + +void HexagonDAGToDAGISel::SelectVAlignAddr(SDNode *N) { + const SDLoc &dl(N); + SDValue A = N->getOperand(1); + int Mask = -cast<ConstantSDNode>(A.getNode())->getSExtValue(); + assert(isPowerOf2_32(-Mask)); + + SDValue M = CurDAG->getTargetConstant(Mask, dl, MVT::i32); + SDNode *AA = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32, + N->getOperand(0), M); + ReplaceNode(N, AA); +} + +// Handle these nodes here to avoid having to write patterns for all +// combinations of input/output types. In all cases, the resulting +// instruction is the same. +void HexagonDAGToDAGISel::SelectTypecast(SDNode *N) { + SDValue Op = N->getOperand(0); + MVT OpTy = Op.getValueType().getSimpleVT(); + SDNode *T = CurDAG->MorphNodeTo(N, N->getOpcode(), + CurDAG->getVTList(OpTy), {Op}); + ReplaceNode(T, Op.getNode()); +} + +void HexagonDAGToDAGISel::SelectP2D(SDNode *N) { + MVT ResTy = N->getValueType(0).getSimpleVT(); + SDNode *T = CurDAG->getMachineNode(Hexagon::C2_mask, SDLoc(N), ResTy, + N->getOperand(0)); + ReplaceNode(N, T); +} + +void HexagonDAGToDAGISel::SelectD2P(SDNode *N) { + const SDLoc &dl(N); + MVT ResTy = N->getValueType(0).getSimpleVT(); + SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32); + SDNode *T = CurDAG->getMachineNode(Hexagon::A4_vcmpbgtui, dl, ResTy, + N->getOperand(0), Zero); + ReplaceNode(N, T); +} + +void HexagonDAGToDAGISel::SelectV2Q(SDNode *N) { + const SDLoc &dl(N); + MVT ResTy = N->getValueType(0).getSimpleVT(); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(N,0), N->getOperand(0)); - CurDAG->RemoveDeadNode(N); + SDValue C = CurDAG->getTargetConstant(-1, dl, MVT::i32); + SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C); + SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandvrt, dl, ResTy, + N->getOperand(0), SDValue(R,0)); + ReplaceNode(N, T); +} + +void HexagonDAGToDAGISel::SelectQ2V(SDNode *N) { + const SDLoc &dl(N); + MVT ResTy = N->getValueType(0).getSimpleVT(); + + SDValue C = CurDAG->getTargetConstant(-1, dl, MVT::i32); + SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C); + SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandqrt, dl, ResTy, + N->getOperand(0), SDValue(R,0)); + ReplaceNode(N, T); } void HexagonDAGToDAGISel::Select(SDNode *N) { @@ -789,13 +879,21 @@ void HexagonDAGToDAGISel::Select(SDNode *N) { case ISD::Constant: return SelectConstant(N); case ISD::ConstantFP: return SelectConstantFP(N); case ISD::FrameIndex: return SelectFrameIndex(N); - case ISD::BITCAST: return SelectBitcast(N); case ISD::SHL: return SelectSHL(N); case ISD::LOAD: return SelectLoad(N); case ISD::STORE: return SelectStore(N); - case ISD::ZERO_EXTEND: return SelectZeroExtend(N); case ISD::INTRINSIC_W_CHAIN: return SelectIntrinsicWChain(N); case ISD::INTRINSIC_WO_CHAIN: return SelectIntrinsicWOChain(N); + + case HexagonISD::ADDC: + case HexagonISD::SUBC: return SelectAddSubCarry(N); + case HexagonISD::VALIGN: return SelectVAlign(N); + case HexagonISD::VALIGNADDR: return SelectVAlignAddr(N); + case HexagonISD::TYPECAST: return SelectTypecast(N); + case HexagonISD::P2D: return SelectP2D(N); + case HexagonISD::D2P: return SelectD2P(N); + case HexagonISD::Q2V: return SelectQ2V(N); + case HexagonISD::V2Q: return SelectV2Q(N); } if (HST->useHVXOps()) { @@ -1240,7 +1338,7 @@ bool HexagonDAGToDAGISel::SelectAnyImmediate(SDValue &N, SDValue &R, } case HexagonISD::JT: case HexagonISD::CP: - // These are assumed to always be aligned at at least 8-byte boundary. + // These are assumed to always be aligned at least 8-byte boundary. if (LogAlign > 3) return false; R = N.getOperand(0); @@ -1252,7 +1350,7 @@ bool HexagonDAGToDAGISel::SelectAnyImmediate(SDValue &N, SDValue &R, R = N; return true; case ISD::BlockAddress: - // Block address is always aligned at at least 4-byte boundary. + // Block address is always aligned at least 4-byte boundary. if (LogAlign > 2 || !IsAligned(cast<BlockAddressSDNode>(N)->getOffset())) return false; R = N; @@ -1345,9 +1443,13 @@ bool HexagonDAGToDAGISel::DetectUseSxtw(SDValue &N, SDValue &R) { EVT T = Opc == ISD::SIGN_EXTEND ? N.getOperand(0).getValueType() : cast<VTSDNode>(N.getOperand(1))->getVT(); - if (T.getSizeInBits() != 32) + unsigned SW = T.getSizeInBits(); + if (SW == 32) + R = N.getOperand(0); + else if (SW < 32) + R = N; + else return false; - R = N.getOperand(0); break; } case ISD::LOAD: { @@ -1361,6 +1463,13 @@ bool HexagonDAGToDAGISel::DetectUseSxtw(SDValue &N, SDValue &R) { R = N; break; } + case ISD::SRA: { + auto *S = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!S || S->getZExtValue() != 32) + return false; + R = N; + break; + } default: return false; } @@ -1500,7 +1609,7 @@ static bool isOpcodeHandled(const SDNode *N) { } } -/// \brief Return the weight of an SDNode +/// Return the weight of an SDNode int HexagonDAGToDAGISel::getWeight(SDNode *N) { if (!isOpcodeHandled(N)) return 1; @@ -1799,15 +1908,15 @@ SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) { RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()), getHeight(N->getOperand(1).getNode())) + 1; - DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight - << " Height=" << RootHeights[N] << "): "); - DEBUG(N->dump()); + LLVM_DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight + << " Height=" << RootHeights[N] << "): "); + LLVM_DEBUG(N->dump(CurDAG)); return SDValue(N, 0); } - DEBUG(dbgs() << "** Balancing root node: "); - DEBUG(N->dump()); + LLVM_DEBUG(dbgs() << "** Balancing root node: "); + LLVM_DEBUG(N->dump(CurDAG)); unsigned NOpcode = N->getOpcode(); @@ -1855,7 +1964,7 @@ SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) { // Whoops, this node was RAUWd by one of the balanceSubTree calls we // made. Our worklist isn't up to date anymore. // Restart the whole process. - DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n"); + LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n"); return balanceSubTree(N, TopLevel); } @@ -1926,15 +2035,15 @@ SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) { } } - DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)] - << " weight=" << CurrentWeight << " imbalanced=" - << Imbalanced << "\n"); + LLVM_DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)] + << " weight=" << CurrentWeight + << " imbalanced=" << Imbalanced << "\n"); // Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y) // This factors out a shift in order to match memw(a<<Y+b). if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) || willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) { - DEBUG(dbgs() << "--> Found common factor for two MUL children!\n"); + LLVM_DEBUG(dbgs() << "--> Found common factor for two MUL children!\n"); int Weight = Mul1.Weight + Mul2.Weight; int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1; SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2); @@ -1968,9 +2077,9 @@ SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) { if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() && getTargetLowering()->isOffsetFoldingLegal(GANode)) { - DEBUG(dbgs() << "--> Combining GA and offset (" << Offset->getSExtValue() - << "): "); - DEBUG(GANode->dump()); + LLVM_DEBUG(dbgs() << "--> Combining GA and offset (" + << Offset->getSExtValue() << "): "); + LLVM_DEBUG(GANode->dump(CurDAG)); SDValue NewTGA = CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value), @@ -2014,7 +2123,7 @@ SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) { // If this is the top level and we haven't factored out a shift, we should try // to move a constant to the bottom to match addressing modes like memw(rX+C) if (TopLevel && !CanFactorize && Leaves.hasConst()) { - DEBUG(dbgs() << "--> Pushing constant to tip of tree."); + LLVM_DEBUG(dbgs() << "--> Pushing constant to tip of tree."); Leaves.pushToBottom(Leaves.pop()); } @@ -2041,7 +2150,7 @@ SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) { // Make sure that none of these nodes have been RAUW'd if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) || (RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) { - DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n"); + LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n"); return balanceSubTree(N, TopLevel); } @@ -2075,9 +2184,9 @@ SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) { int Weight = V0Weight + V1Weight; Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder)); - DEBUG(dbgs() << "--> Built new node (Weight=" << Weight << ",Height=" - << Height << "):\n"); - DEBUG(NewNode.dump()); + LLVM_DEBUG(dbgs() << "--> Built new node (Weight=" << Weight + << ",Height=" << Height << "):\n"); + LLVM_DEBUG(NewNode.dump()); } assert(Leaves.size() == 1); @@ -2101,15 +2210,15 @@ SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) { } if (N != NewRoot.getNode()) { - DEBUG(dbgs() << "--> Root is now: "); - DEBUG(NewRoot.dump()); + LLVM_DEBUG(dbgs() << "--> Root is now: "); + LLVM_DEBUG(NewRoot.dump()); // Replace all uses of old root by new root CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode()); // Mark that we have RAUW'd N RootWeights[N] = -2; } else { - DEBUG(dbgs() << "--> Root unchanged.\n"); + LLVM_DEBUG(dbgs() << "--> Root unchanged.\n"); } RootWeights[NewRoot.getNode()] = Leaves.top().Weight; @@ -2132,8 +2241,8 @@ void HexagonDAGToDAGISel::rebalanceAddressTrees() { if (RootWeights.count(BasePtr.getNode())) continue; - DEBUG(dbgs() << "** Rebalancing address calculation in node: "); - DEBUG(N->dump()); + LLVM_DEBUG(dbgs() << "** Rebalancing address calculation in node: "); + LLVM_DEBUG(N->dump(CurDAG)); // FindRoots SmallVector<SDNode *, 4> Worklist; @@ -2173,8 +2282,8 @@ void HexagonDAGToDAGISel::rebalanceAddressTrees() { N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), NewBasePtr, N->getOperand(3)); - DEBUG(dbgs() << "--> Final node: "); - DEBUG(N->dump()); + LLVM_DEBUG(dbgs() << "--> Final node: "); + LLVM_DEBUG(N->dump(CurDAG)); } CurDAG->RemoveDeadNodes(); @@ -2182,4 +2291,3 @@ void HexagonDAGToDAGISel::rebalanceAddressTrees() { RootHeights.clear(); RootWeights.clear(); } - diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.h b/lib/Target/Hexagon/HexagonISelDAGToDAG.h index fc66940ee52d..f4f09dd4e758 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.h +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.h @@ -90,6 +90,8 @@ public: unsigned ConstraintID, std::vector<SDValue> &OutOps) override; bool tryLoadOfLoadIntrinsic(LoadSDNode *N); + bool SelectBrevLdIntrinsic(SDNode *IntN); + bool SelectNewCircIntrinsic(SDNode *IntN); void SelectLoad(SDNode *N); void SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl); void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl); @@ -100,10 +102,17 @@ public: void SelectIntrinsicWOChain(SDNode *N); void SelectConstant(SDNode *N); void SelectConstantFP(SDNode *N); - void SelectBitcast(SDNode *N); void SelectV65Gather(SDNode *N); void SelectV65GatherPred(SDNode *N); void SelectHVXDualOutput(SDNode *N); + void SelectAddSubCarry(SDNode *N); + void SelectVAlign(SDNode *N); + void SelectVAlignAddr(SDNode *N); + void SelectTypecast(SDNode *N); + void SelectP2D(SDNode *N); + void SelectD2P(SDNode *N); + void SelectQ2V(SDNode *N); + void SelectV2Q(SDNode *N); // Include the declarations autogenerated from the selection patterns. #define GET_DAGISEL_DECL @@ -122,6 +131,7 @@ private: void SelectHvxShuffle(SDNode *N); void SelectHvxRor(SDNode *N); + void SelectHvxVAlign(SDNode *N); bool keepsLowBits(const SDValue &Val, unsigned NumBits, SDValue &Src); bool isAlignedMemNode(const MemSDNode *N) const; diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index 740861851185..8aef9b4560d5 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -11,6 +11,7 @@ #include "HexagonISelDAGToDAG.h" #include "HexagonISelLowering.h" #include "HexagonTargetMachine.h" +#include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/Intrinsics.h" @@ -94,18 +95,13 @@ namespace { // Benes network is a forward delta network immediately followed by // a reverse delta network. +enum class ColorKind { None, Red, Black }; // Graph coloring utility used to partition nodes into two groups: // they will correspond to nodes routed to the upper and lower networks. struct Coloring { - enum : uint8_t { - None = 0, - Red, - Black - }; - using Node = int; - using MapType = std::map<Node,uint8_t>; + using MapType = std::map<Node, ColorKind>; static constexpr Node Ignore = Node(-1); Coloring(ArrayRef<Node> Ord) : Order(Ord) { @@ -118,10 +114,10 @@ struct Coloring { return Colors; } - uint8_t other(uint8_t Color) { - if (Color == None) - return Red; - return Color == Red ? Black : Red; + ColorKind other(ColorKind Color) { + if (Color == ColorKind::None) + return ColorKind::Red; + return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red; } void dump() const; @@ -139,28 +135,28 @@ private: return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2; } - uint8_t getColor(Node N) { + ColorKind getColor(Node N) { auto F = Colors.find(N); - return F != Colors.end() ? F->second : (uint8_t)None; + return F != Colors.end() ? F->second : ColorKind::None; } - std::pair<bool,uint8_t> getUniqueColor(const NodeSet &Nodes); + std::pair<bool, ColorKind> getUniqueColor(const NodeSet &Nodes); void build(); bool color(); }; } // namespace -std::pair<bool,uint8_t> Coloring::getUniqueColor(const NodeSet &Nodes) { - uint8_t Color = None; +std::pair<bool, ColorKind> Coloring::getUniqueColor(const NodeSet &Nodes) { + auto Color = ColorKind::None; for (Node N : Nodes) { - uint8_t ColorN = getColor(N); - if (ColorN == None) + ColorKind ColorN = getColor(N); + if (ColorN == ColorKind::None) continue; - if (Color == None) + if (Color == ColorKind::None) Color = ColorN; - else if (Color != None && Color != ColorN) - return { false, None }; + else if (Color != ColorKind::None && Color != ColorN) + return { false, ColorKind::None }; } return { true, Color }; } @@ -245,12 +241,12 @@ bool Coloring::color() { // Coloring failed. Split this node. Node C = conj(N); - uint8_t ColorN = other(None); - uint8_t ColorC = other(ColorN); + ColorKind ColorN = other(ColorKind::None); + ColorKind ColorC = other(ColorN); NodeSet &Cs = Edges[C]; NodeSet CopyNs = Ns; for (Node M : CopyNs) { - uint8_t ColorM = getColor(M); + ColorKind ColorM = getColor(M); if (ColorM == ColorC) { // Connect M with C, disconnect M from N. Cs.insert(M); @@ -263,10 +259,10 @@ bool Coloring::color() { Colors[C] = ColorC; } - // Explicitly assign "None" all all uncolored nodes. + // Explicitly assign "None" to all uncolored nodes. for (unsigned I = 0; I != Order.size(); ++I) if (Colors.count(I) == 0) - Colors[I] = None; + Colors[I] = ColorKind::None; return true; } @@ -296,10 +292,21 @@ void Coloring::dump() const { } dbgs() << " }\n"; - static const char *const Names[] = { "None", "Red", "Black" }; + auto ColorKindToName = [](ColorKind C) { + switch (C) { + case ColorKind::None: + return "None"; + case ColorKind::Red: + return "Red"; + case ColorKind::Black: + return "Black"; + } + llvm_unreachable("all ColorKinds should be handled by the switch above"); + }; + dbgs() << " Colors: {\n"; for (auto C : Colors) - dbgs() << " " << C.first << " -> " << Names[C.second] << "\n"; + dbgs() << " " << C.first << " -> " << ColorKindToName(C.second) << "\n"; dbgs() << " }\n}\n"; } @@ -471,21 +478,21 @@ bool ReverseDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size, if (M.empty()) return false; - uint8_t ColorUp = Coloring::None; + ColorKind ColorUp = ColorKind::None; for (ElemType J = 0; J != Num; ++J) { ElemType I = P[J]; // I is the position in the input, // J is the position in the output. if (I == Ignore) continue; - uint8_t C = M.at(I); - if (C == Coloring::None) + ColorKind C = M.at(I); + if (C == ColorKind::None) continue; // During "Step", inputs cannot switch halves, so if the "up" color // is still unknown, make sure that it is selected in such a way that // "I" will stay in the same half. bool InpUp = I < Num/2; - if (ColorUp == Coloring::None) + if (ColorUp == ColorKind::None) ColorUp = InpUp ? C : G.other(C); if ((C == ColorUp) != InpUp) { // If I should go to a different half than where is it now, give up. @@ -545,16 +552,16 @@ bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size, // Both assignments, i.e. Red->Up and Red->Down are valid, but they will // result in different controls. Let's pick the one where the first // control will be "Pass". - uint8_t ColorUp = Coloring::None; + ColorKind ColorUp = ColorKind::None; for (ElemType J = 0; J != Num; ++J) { ElemType I = P[J]; if (I == Ignore) continue; - uint8_t C = M.at(I); - if (C == Coloring::None) + ColorKind C = M.at(I); + if (C == ColorKind::None) continue; - if (ColorUp == Coloring::None) { - ColorUp = (I < Num/2) ? Coloring::Red : Coloring::Black; + if (ColorUp == ColorKind::None) { + ColorUp = (I < Num / 2) ? ColorKind::Red : ColorKind::Black; } unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2; if (C == ColorUp) { @@ -769,6 +776,13 @@ struct ShuffleMask { size_t H = Mask.size()/2; return ShuffleMask(Mask.take_back(H)); } + + void print(raw_ostream &OS) const { + OS << "MinSrc:" << MinSrc << ", MaxSrc:" << MaxSrc << " {"; + for (int M : Mask) + OS << ' ' << M; + OS << " }"; + } }; } // namespace @@ -806,6 +820,7 @@ namespace llvm { void selectShuffle(SDNode *N); void selectRor(SDNode *N); + void selectVAlign(SDNode *N); private: void materialize(const ResultStack &Results); @@ -821,7 +836,6 @@ namespace llvm { MutableArrayRef<int> NewMask, unsigned Options = None); OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results, MutableArrayRef<int> NewMask); - OpRef zerous(ShuffleMask SM, OpRef Va, ResultStack &Results); OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb, ResultStack &Results); OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb, @@ -905,42 +919,55 @@ static bool isPermutation(ArrayRef<int> Mask) { } bool HvxSelector::selectVectorConstants(SDNode *N) { - // Constant vectors are generated as loads from constant pools. - // Since they are generated during the selection process, the main - // selection algorithm is not aware of them. Select them directly - // here. - SmallVector<SDNode*,4> Loads; - SmallVector<SDNode*,16> WorkQ; + // Constant vectors are generated as loads from constant pools or as + // splats of a constant value. Since they are generated during the + // selection process, the main selection algorithm is not aware of them. + // Select them directly here. + SmallVector<SDNode*,4> Nodes; + SetVector<SDNode*> WorkQ; + + // The one-use test for VSPLATW's operand may fail due to dead nodes + // left over in the DAG. + DAG.RemoveDeadNodes(); // The DAG can change (due to CSE) during selection, so cache all the // unselected nodes first to avoid traversing a mutating DAG. - auto IsLoadToSelect = [] (SDNode *N) { - if (!N->isMachineOpcode() && N->getOpcode() == ISD::LOAD) { - SDValue Addr = cast<LoadSDNode>(N)->getBasePtr(); - unsigned AddrOpc = Addr.getOpcode(); - if (AddrOpc == HexagonISD::AT_PCREL || AddrOpc == HexagonISD::CP) - if (Addr.getOperand(0).getOpcode() == ISD::TargetConstantPool) - return true; + auto IsNodeToSelect = [] (SDNode *N) { + if (N->isMachineOpcode()) + return false; + switch (N->getOpcode()) { + case HexagonISD::VZERO: + case HexagonISD::VSPLATW: + return true; + case ISD::LOAD: { + SDValue Addr = cast<LoadSDNode>(N)->getBasePtr(); + unsigned AddrOpc = Addr.getOpcode(); + if (AddrOpc == HexagonISD::AT_PCREL || AddrOpc == HexagonISD::CP) + if (Addr.getOperand(0).getOpcode() == ISD::TargetConstantPool) + return true; + } + break; } - return false; + // Make sure to select the operand of VSPLATW. + bool IsSplatOp = N->hasOneUse() && + N->use_begin()->getOpcode() == HexagonISD::VSPLATW; + return IsSplatOp; }; - WorkQ.push_back(N); + WorkQ.insert(N); for (unsigned i = 0; i != WorkQ.size(); ++i) { SDNode *W = WorkQ[i]; - if (IsLoadToSelect(W)) { - Loads.push_back(W); - continue; - } + if (IsNodeToSelect(W)) + Nodes.push_back(W); for (unsigned j = 0, f = W->getNumOperands(); j != f; ++j) - WorkQ.push_back(W->getOperand(j).getNode()); + WorkQ.insert(W->getOperand(j).getNode()); } - for (SDNode *L : Loads) + for (SDNode *L : Nodes) ISel.Select(L); - return !Loads.empty(); + return !Nodes.empty(); } void HvxSelector::materialize(const ResultStack &Results) { @@ -977,15 +1004,11 @@ void HvxSelector::materialize(const ResultStack &Results) { MVT OpTy = Op.getValueType().getSimpleVT(); if (Part != OpRef::Whole) { assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf); - if (Op.getOpcode() == HexagonISD::VCOMBINE) { - Op = (Part == OpRef::HiHalf) ? Op.getOperand(0) : Op.getOperand(1); - } else { - MVT HalfTy = MVT::getVectorVT(OpTy.getVectorElementType(), - OpTy.getVectorNumElements()/2); - unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo - : Hexagon::vsub_hi; - Op = DAG.getTargetExtractSubreg(Sub, dl, HalfTy, Op); - } + MVT HalfTy = MVT::getVectorVT(OpTy.getVectorElementType(), + OpTy.getVectorNumElements()/2); + unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo + : Hexagon::vsub_hi; + Op = DAG.getTargetExtractSubreg(Sub, dl, HalfTy, Op); } Ops.push_back(Op); } // for (Node : Results) @@ -1031,25 +1054,53 @@ OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb, int VecLen = SM.Mask.size(); MVT Ty = getSingleVT(MVT::i8); - if (SM.MaxSrc - SM.MinSrc < int(HwLen)) { - if (SM.MaxSrc < int(HwLen)) { - memcpy(NewMask.data(), SM.Mask.data(), sizeof(int)*VecLen); - return Va; + auto IsExtSubvector = [] (ShuffleMask M) { + assert(M.MinSrc >= 0 && M.MaxSrc >= 0); + for (int I = 0, E = M.Mask.size(); I != E; ++I) { + if (M.Mask[I] >= 0 && M.Mask[I]-I != M.MinSrc) + return false; } - if (SM.MinSrc >= int(HwLen)) { - for (int I = 0; I != VecLen; ++I) { - int M = SM.Mask[I]; - if (M != -1) - M -= HwLen; - NewMask[I] = M; + return true; + }; + + if (SM.MaxSrc - SM.MinSrc < int(HwLen)) { + if (SM.MinSrc == 0 || SM.MinSrc == int(HwLen) || !IsExtSubvector(SM)) { + // If the mask picks elements from only one of the operands, return + // that operand, and update the mask to use index 0 to refer to the + // first element of that operand. + // If the mask extracts a subvector, it will be handled below, so + // skip it here. + if (SM.MaxSrc < int(HwLen)) { + memcpy(NewMask.data(), SM.Mask.data(), sizeof(int)*VecLen); + return Va; + } + if (SM.MinSrc >= int(HwLen)) { + for (int I = 0; I != VecLen; ++I) { + int M = SM.Mask[I]; + if (M != -1) + M -= HwLen; + NewMask[I] = M; + } + return Vb; } - return Vb; + } + int MinSrc = SM.MinSrc; + if (SM.MaxSrc < int(HwLen)) { + Vb = Va; + } else if (SM.MinSrc > int(HwLen)) { + Va = Vb; + MinSrc = SM.MinSrc - HwLen; } const SDLoc &dl(Results.InpNode); - SDValue S = DAG.getTargetConstant(SM.MinSrc, dl, MVT::i32); - if (isUInt<3>(SM.MinSrc)) { - Results.push(Hexagon::V6_valignbi, Ty, {Vb, Va, S}); + if (isUInt<3>(MinSrc) || isUInt<3>(HwLen-MinSrc)) { + bool IsRight = isUInt<3>(MinSrc); // Right align. + SDValue S = DAG.getTargetConstant(IsRight ? MinSrc : HwLen-MinSrc, + dl, MVT::i32); + unsigned Opc = IsRight ? Hexagon::V6_valignbi + : Hexagon::V6_vlalignbi; + Results.push(Opc, Ty, {Vb, Va, S}); } else { + SDValue S = DAG.getTargetConstant(MinSrc, dl, MVT::i32); Results.push(Hexagon::A2_tfrsi, MVT::i32, {S}); unsigned Top = Results.top(); Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(Top)}); @@ -1139,25 +1190,6 @@ OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb, return concat(Out[0], Out[1], Results); } -OpRef HvxSelector::zerous(ShuffleMask SM, OpRef Va, ResultStack &Results) { - DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); - - int VecLen = SM.Mask.size(); - SmallVector<uint8_t,128> UsedBytes(VecLen); - bool HasUnused = false; - for (int I = 0; I != VecLen; ++I) { - if (SM.Mask[I] != -1) - UsedBytes[I] = 0xFF; - else - HasUnused = true; - } - if (!HasUnused) - return Va; - SDValue B = getVectorConstant(UsedBytes, SDLoc(Results.InpNode)); - Results.push(Hexagon::V6_vand, getSingleVT(MVT::i8), {Va, OpRef(B)}); - return OpRef::res(Results.top()); -} - OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb, ResultStack &Results) { DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';}); @@ -1279,6 +1311,8 @@ OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, return shuffp1(ShuffleMask(PackedMask), P, Results); SmallVector<int,256> MaskL(VecLen), MaskR(VecLen); + splitMask(SM.Mask, MaskL, MaskR); + OpRef L = shuffp1(ShuffleMask(MaskL), Va, Results); OpRef R = shuffp1(ShuffleMask(MaskR), Vb, Results); if (!L.isValid() || !R.isValid()) @@ -1934,7 +1968,6 @@ void HvxSelector::selectShuffle(SDNode *N) { // If the mask is all -1's, generate "undef". if (!UseLeft && !UseRight) { ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode()); - DAG.RemoveDeadNode(N); return; } @@ -1976,8 +2009,8 @@ void HvxSelector::selectRor(SDNode *N) { SDNode *NewN = nullptr; if (auto *CN = dyn_cast<ConstantSDNode>(RotV.getNode())) { - unsigned S = CN->getZExtValue(); - if (S % HST.getVectorLength() == 0) { + unsigned S = CN->getZExtValue() % HST.getVectorLength(); + if (S == 0) { NewN = VecV.getNode(); } else if (isUInt<3>(S)) { SDValue C = DAG.getTargetConstant(S, dl, MVT::i32); @@ -1990,6 +2023,15 @@ void HvxSelector::selectRor(SDNode *N) { NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV}); ISel.ReplaceNode(N, NewN); +} + +void HvxSelector::selectVAlign(SDNode *N) { + SDValue Vv = N->getOperand(0); + SDValue Vu = N->getOperand(1); + SDValue Rt = N->getOperand(2); + SDNode *NewN = DAG.getMachineNode(Hexagon::V6_valignb, SDLoc(N), + N->getValueType(0), {Vv, Vu, Rt}); + ISel.ReplaceNode(N, NewN); DAG.RemoveDeadNode(N); } @@ -2001,7 +2043,15 @@ void HexagonDAGToDAGISel::SelectHvxRor(SDNode *N) { HvxSelector(*this, *CurDAG).selectRor(N); } +void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) { + HvxSelector(*this, *CurDAG).selectVAlign(N); +} + void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) { + if (!HST->usePackets()) { + report_fatal_error("Support for gather requires packets, " + "which are disabled"); + } const SDLoc &dl(N); SDValue Chain = N->getOperand(0); SDValue Address = N->getOperand(2); @@ -2037,11 +2087,14 @@ void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) { MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); - ReplaceUses(N, Result); - CurDAG->RemoveDeadNode(N); + ReplaceNode(N, Result); } void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) { + if (!HST->usePackets()) { + report_fatal_error("Support for gather requires packets, " + "which are disabled"); + } const SDLoc &dl(N); SDValue Chain = N->getOperand(0); SDValue Address = N->getOperand(2); @@ -2076,8 +2129,7 @@ void HexagonDAGToDAGISel::SelectV65Gather(SDNode *N) { MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); - ReplaceUses(N, Result); - CurDAG->RemoveDeadNode(N); + ReplaceNode(N, Result); } void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) { @@ -2120,5 +2172,3 @@ void HexagonDAGToDAGISel::SelectHVXDualOutput(SDNode *N) { ReplaceUses(SDValue(N, 1), SDValue(Result, 1)); CurDAG->RemoveDeadNode(N); } - - diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 0e0da2ddc400..604d84994b6c 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -40,6 +40,7 @@ #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" @@ -103,427 +104,52 @@ static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os", cl::Hidden, cl::ZeroOrMore, cl::init(4), cl::desc("Max #stores to inline memset")); +static cl::opt<bool> AlignLoads("hexagon-align-loads", + cl::Hidden, cl::init(false), + cl::desc("Rewrite unaligned loads as a pair of aligned loads")); + namespace { class HexagonCCState : public CCState { - unsigned NumNamedVarArgParams; + unsigned NumNamedVarArgParams = 0; public: - HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, + HexagonCCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF, SmallVectorImpl<CCValAssign> &locs, LLVMContext &C, - int NumNamedVarArgParams) - : CCState(CC, isVarArg, MF, locs, C), - NumNamedVarArgParams(NumNamedVarArgParams) {} - + unsigned NumNamedArgs) + : CCState(CC, IsVarArg, MF, locs, C), + NumNamedVarArgParams(NumNamedArgs) {} unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; } }; - enum StridedLoadKind { - Even = 0, - Odd, - NoPattern - }; - } // end anonymous namespace -// Implement calling convention for Hexagon. - -static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 }; -static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; -static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; -static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 }; - -static bool -CC_Hexagon(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State); - -static bool -CC_Hexagon32(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State); - -static bool -CC_Hexagon64(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State); - -static bool -CC_HexagonVector(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State); - -static bool -RetCC_Hexagon(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State); - -static bool -RetCC_Hexagon32(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State); - -static bool -RetCC_Hexagon64(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State); - -static bool -RetCC_HexagonVector(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State); - -static bool -CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - HexagonCCState &HState = static_cast<HexagonCCState &>(State); - - if (ValNo < HState.getNumNamedVarArgParams()) { - // Deal with named arguments. - return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State); - } - - // Deal with un-named arguments. - unsigned Offset; - if (ArgFlags.isByVal()) { - // If pass-by-value, the size allocated on stack is decided - // by ArgFlags.getByValSize(), not by the size of LocVT. - Offset = State.AllocateStack(ArgFlags.getByValSize(), - ArgFlags.getByValAlign()); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return false; - } - if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) { - LocVT = MVT::i32; - ValVT = MVT::i32; - if (ArgFlags.isSExt()) - LocInfo = CCValAssign::SExt; - else if (ArgFlags.isZExt()) - LocInfo = CCValAssign::ZExt; - else - LocInfo = CCValAssign::AExt; - } - if (LocVT == MVT::i32 || LocVT == MVT::f32) { - Offset = State.AllocateStack(4, 4); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return false; - } - if (LocVT == MVT::i64 || LocVT == MVT::f64) { - Offset = State.AllocateStack(8, 8); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return false; - } - if (LocVT == MVT::v2i64 || LocVT == MVT::v4i32 || LocVT == MVT::v8i16 || - LocVT == MVT::v16i8) { - Offset = State.AllocateStack(16, 16); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return false; - } - if (LocVT == MVT::v4i64 || LocVT == MVT::v8i32 || LocVT == MVT::v16i16 || - LocVT == MVT::v32i8) { - Offset = State.AllocateStack(32, 32); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return false; - } - if (LocVT == MVT::v16i32 || LocVT == MVT::v32i16 || - LocVT == MVT::v64i8 || LocVT == MVT::v512i1) { - Offset = State.AllocateStack(64, 64); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return false; - } - if (LocVT == MVT::v32i32 || LocVT == MVT::v64i16 || - LocVT == MVT::v128i8 || LocVT == MVT::v1024i1) { - Offset = State.AllocateStack(128, 128); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return false; - } - if (LocVT == MVT::v64i32 || LocVT == MVT::v128i16 || - LocVT == MVT::v256i8) { - Offset = State.AllocateStack(256, 256); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return false; - } - - llvm_unreachable(nullptr); -} - -static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - if (ArgFlags.isByVal()) { - // Passed on stack. - unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), - ArgFlags.getByValAlign()); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return false; - } - - if (LocVT == MVT::i1) { - LocVT = MVT::i32; - } else if (LocVT == MVT::i8 || LocVT == MVT::i16) { - LocVT = MVT::i32; - ValVT = MVT::i32; - if (ArgFlags.isSExt()) - LocInfo = CCValAssign::SExt; - else if (ArgFlags.isZExt()) - LocInfo = CCValAssign::ZExt; - else - LocInfo = CCValAssign::AExt; - } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) { - LocVT = MVT::i32; - LocInfo = CCValAssign::BCvt; - } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) { - LocVT = MVT::i64; - LocInfo = CCValAssign::BCvt; - } - - if (LocVT == MVT::i32 || LocVT == MVT::f32) { - if (!CC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) - return false; - } - - if (LocVT == MVT::i64 || LocVT == MVT::f64) { - if (!CC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) - return false; - } - - if (LocVT == MVT::v8i32 || LocVT == MVT::v16i16 || LocVT == MVT::v32i8) { - unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 32); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return false; - } - - auto &HST = State.getMachineFunction().getSubtarget<HexagonSubtarget>(); - if (HST.isHVXVectorType(LocVT)) { - if (!CC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) - return false; - } - - return true; // CC didn't match. -} +// Implement calling convention for Hexagon. -static bool CC_Hexagon32(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - static const MCPhysReg RegList[] = { - Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, - Hexagon::R5 +static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + static const MCPhysReg ArgRegs[] = { + Hexagon::R0, Hexagon::R1, Hexagon::R2, + Hexagon::R3, Hexagon::R4, Hexagon::R5 }; - if (unsigned Reg = State.AllocateReg(RegList)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return false; - } + const unsigned NumArgRegs = array_lengthof(ArgRegs); + unsigned RegNum = State.getFirstUnallocated(ArgRegs); - unsigned Offset = State.AllocateStack(4, 4); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return false; -} - -static bool CC_Hexagon64(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - if (unsigned Reg = State.AllocateReg(Hexagon::D0)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return false; - } - - static const MCPhysReg RegList1[] = { - Hexagon::D1, Hexagon::D2 - }; - static const MCPhysReg RegList2[] = { - Hexagon::R1, Hexagon::R3 - }; - if (unsigned Reg = State.AllocateReg(RegList1, RegList2)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return false; - } + // RegNum is an index into ArgRegs: skip a register if RegNum is odd. + if (RegNum != NumArgRegs && RegNum % 2 == 1) + State.AllocateReg(ArgRegs[RegNum]); - unsigned Offset = State.AllocateStack(8, 8, Hexagon::D2); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + // Always return false here, as this function only makes sure that the first + // unallocated register has an even register number and does not actually + // allocate a register for the current argument. return false; } -static bool CC_HexagonVector(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - static const MCPhysReg VecLstS[] = { - Hexagon::V0, Hexagon::V1, Hexagon::V2, Hexagon::V3, Hexagon::V4, - Hexagon::V5, Hexagon::V6, Hexagon::V7, Hexagon::V8, Hexagon::V9, - Hexagon::V10, Hexagon::V11, Hexagon::V12, Hexagon::V13, Hexagon::V14, - Hexagon::V15 - }; - static const MCPhysReg VecLstD[] = { - Hexagon::W0, Hexagon::W1, Hexagon::W2, Hexagon::W3, Hexagon::W4, - Hexagon::W5, Hexagon::W6, Hexagon::W7 - }; - auto &MF = State.getMachineFunction(); - auto &HST = MF.getSubtarget<HexagonSubtarget>(); - - if (HST.useHVX64BOps() && - (LocVT == MVT::v16i32 || LocVT == MVT::v32i16 || - LocVT == MVT::v64i8 || LocVT == MVT::v512i1)) { - if (unsigned Reg = State.AllocateReg(VecLstS)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return false; - } - unsigned Offset = State.AllocateStack(64, 64); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return false; - } - if (HST.useHVX64BOps() && (LocVT == MVT::v32i32 || - LocVT == MVT::v64i16 || LocVT == MVT::v128i8)) { - if (unsigned Reg = State.AllocateReg(VecLstD)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return false; - } - unsigned Offset = State.AllocateStack(128, 128); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return false; - } - // 128B Mode - if (HST.useHVX128BOps() && (LocVT == MVT::v64i32 || - LocVT == MVT::v128i16 || LocVT == MVT::v256i8)) { - if (unsigned Reg = State.AllocateReg(VecLstD)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return false; - } - unsigned Offset = State.AllocateStack(256, 256); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return false; - } - if (HST.useHVX128BOps() && - (LocVT == MVT::v32i32 || LocVT == MVT::v64i16 || - LocVT == MVT::v128i8 || LocVT == MVT::v1024i1)) { - if (unsigned Reg = State.AllocateReg(VecLstS)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return false; - } - unsigned Offset = State.AllocateStack(128, 128); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); - return false; - } - return true; -} - -static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - auto &MF = State.getMachineFunction(); - auto &HST = MF.getSubtarget<HexagonSubtarget>(); - - if (LocVT == MVT::i1) { - // Return values of type MVT::i1 still need to be assigned to R0, but - // the value type needs to remain i1. LowerCallResult will deal with it, - // but it needs to recognize i1 as the value type. - LocVT = MVT::i32; - } else if (LocVT == MVT::i8 || LocVT == MVT::i16) { - LocVT = MVT::i32; - ValVT = MVT::i32; - if (ArgFlags.isSExt()) - LocInfo = CCValAssign::SExt; - else if (ArgFlags.isZExt()) - LocInfo = CCValAssign::ZExt; - else - LocInfo = CCValAssign::AExt; - } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) { - LocVT = MVT::i32; - LocInfo = CCValAssign::BCvt; - } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) { - LocVT = MVT::i64; - LocInfo = CCValAssign::BCvt; - } else if (LocVT == MVT::v64i8 || LocVT == MVT::v32i16 || - LocVT == MVT::v16i32 || LocVT == MVT::v512i1) { - LocVT = MVT::v16i32; - ValVT = MVT::v16i32; - LocInfo = CCValAssign::Full; - } else if (LocVT == MVT::v128i8 || LocVT == MVT::v64i16 || - LocVT == MVT::v32i32 || - (LocVT == MVT::v1024i1 && HST.useHVX128BOps())) { - LocVT = MVT::v32i32; - ValVT = MVT::v32i32; - LocInfo = CCValAssign::Full; - } else if (LocVT == MVT::v256i8 || LocVT == MVT::v128i16 || - LocVT == MVT::v64i32) { - LocVT = MVT::v64i32; - ValVT = MVT::v64i32; - LocInfo = CCValAssign::Full; - } - if (LocVT == MVT::i32 || LocVT == MVT::f32) { - if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) - return false; - } - - if (LocVT == MVT::i64 || LocVT == MVT::f64) { - if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) - return false; - } - if (LocVT == MVT::v16i32 || LocVT == MVT::v32i32 || LocVT == MVT::v64i32) { - if (!RetCC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State)) - return false; - } - return true; // CC didn't match. -} - -static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - if (LocVT == MVT::i32 || LocVT == MVT::f32) { - // Note that use of registers beyond R1 is not ABI compliant. However there - // are (experimental) IR passes which generate internal functions that - // return structs using these additional registers. - static const uint16_t RegList[] = { Hexagon::R0, Hexagon::R1, - Hexagon::R2, Hexagon::R3, - Hexagon::R4, Hexagon::R5 }; - if (unsigned Reg = State.AllocateReg(RegList)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return false; - } - } - - return true; -} - -static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - if (LocVT == MVT::i64 || LocVT == MVT::f64) { - if (unsigned Reg = State.AllocateReg(Hexagon::D0)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return false; - } - } - - return true; -} +#include "HexagonGenCallingConv.inc" -static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT, - MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - auto &MF = State.getMachineFunction(); - auto &HST = MF.getSubtarget<HexagonSubtarget>(); - - if (LocVT == MVT::v16i32) { - if (unsigned Reg = State.AllocateReg(Hexagon::V0)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return false; - } - } else if (LocVT == MVT::v32i32) { - unsigned Req = HST.useHVX128BOps() ? Hexagon::V0 : Hexagon::W0; - if (unsigned Reg = State.AllocateReg(Req)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return false; - } - } else if (LocVT == MVT::v64i32) { - if (unsigned Reg = State.AllocateReg(Hexagon::W0)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return false; - } - } - - return true; -} void HexagonTargetLowering::promoteLdStType(MVT VT, MVT PromotedLdStVT) { if (VT != PromotedLdStVT) { @@ -558,11 +184,14 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, bool HexagonTargetLowering::CanLowerReturn( - CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, + CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); + CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); + + if (MF.getSubtarget<HexagonSubtarget>().useHVXOps()) + return CCInfo.CheckReturn(Outs, RetCC_Hexagon_HVX); return CCInfo.CheckReturn(Outs, RetCC_Hexagon); } @@ -571,7 +200,7 @@ HexagonTargetLowering::CanLowerReturn( // the value is stored in memory pointed by a pointer passed by caller. SDValue HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, - bool isVarArg, + bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl, SelectionDAG &DAG) const { @@ -579,11 +208,14 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SmallVector<CCValAssign, 16> RVLocs; // CCState - Info about the registers and stack slot. - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); // Analyze return values of ISD::RET - CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon); + if (Subtarget.useHVXOps()) + CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon_HVX); + else + CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon); SDValue Flag; SmallVector<SDValue, 4> RetOps(1, Chain); @@ -624,17 +256,20 @@ bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { /// being lowered. Returns a SDNode with the same number of values as the /// ISD::CALL. SDValue HexagonTargetLowering::LowerCallResult( - SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool isVarArg, + SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon); + if (Subtarget.useHVXOps()) + CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon_HVX); + else + CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -683,67 +318,57 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; - bool &IsTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; bool DoesNotReturn = CLI.DoesNotReturn; - bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); + bool IsStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); auto PtrVT = getPointerTy(MF.getDataLayout()); - // Check for varargs. - unsigned NumNamedVarArgParams = -1U; - if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee)) { - const GlobalValue *GV = GAN->getGlobal(); - Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i32); - if (const Function* F = dyn_cast<Function>(GV)) { - // If a function has zero args and is a vararg function, that's - // disallowed so it must be an undeclared function. Do not assume - // varargs if the callee is undefined. - if (F->isVarArg() && F->getFunctionType()->getNumParams() != 0) - NumNamedVarArgParams = F->getFunctionType()->getNumParams(); - } - } + unsigned NumParams = CLI.CS.getInstruction() + ? CLI.CS.getFunctionType()->getNumParams() + : 0; + if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee)) + Callee = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, MVT::i32); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; - HexagonCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, - *DAG.getContext(), NumNamedVarArgParams); + HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(), + NumParams); - if (IsVarArg) - CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg); + if (Subtarget.useHVXOps()) + CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_HVX); else CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon); auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls"); if (Attr.getValueAsString() == "true") - IsTailCall = false; + CLI.IsTailCall = false; - if (IsTailCall) { + if (CLI.IsTailCall) { bool StructAttrFlag = MF.getFunction().hasStructRetAttr(); - IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, - IsVarArg, IsStructRet, - StructAttrFlag, - Outs, OutVals, Ins, DAG); + CLI.IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, + IsVarArg, IsStructRet, StructAttrFlag, Outs, + OutVals, Ins, DAG); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; if (VA.isMemLoc()) { - IsTailCall = false; + CLI.IsTailCall = false; break; } } - DEBUG(dbgs() << (IsTailCall ? "Eligible for Tail Call\n" - : "Argument must be passed on stack. " - "Not eligible for Tail Call\n")); + LLVM_DEBUG(dbgs() << (CLI.IsTailCall ? "Eligible for Tail Call\n" + : "Argument must be passed on stack. " + "Not eligible for Tail Call\n")); } // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass; SmallVector<SDValue, 8> MemOpChains; - auto &HRI = *Subtarget.getRegisterInfo(); + const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT); @@ -789,7 +414,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, VA.getLocVT().getStoreSizeInBits() >> 3); if (Flags.isByVal()) { // The argument is a struct passed by value. According to LLVM, "Arg" - // is is pointer. + // is a pointer. MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain, Flags, DAG, dl)); } else { @@ -807,14 +432,10 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } - if (NeedsArgAlign && Subtarget.hasV60TOps()) { - DEBUG(dbgs() << "Function needs byte stack align due to call args\n"); - // V6 vectors passed by value have 64 or 128 byte alignment depending - // on whether we are 64 byte vector mode or 128 byte. - bool UseHVX128B = Subtarget.useHVX128BOps(); - assert(Subtarget.useHVXOps()); - const unsigned ObjAlign = UseHVX128B ? 128 : 64; - LargestAlignSeen = std::max(LargestAlignSeen, ObjAlign); + if (NeedsArgAlign && Subtarget.hasV60Ops()) { + LLVM_DEBUG(dbgs() << "Function needs byte stack align due to call args\n"); + unsigned VecAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass); + LargestAlignSeen = std::max(LargestAlignSeen, VecAlign); MFI.ensureMaxAlignment(LargestAlignSeen); } // Transform all store nodes into one single node because all store @@ -823,7 +444,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); SDValue Glue; - if (!IsTailCall) { + if (!CLI.IsTailCall) { Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); Glue = Chain.getValue(1); } @@ -832,7 +453,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // chain and flag operands which copy the outgoing args into registers. // The Glue is necessary since all emitted instructions must be // stuck together. - if (!IsTailCall) { + if (!CLI.IsTailCall) { for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, Glue); @@ -891,7 +512,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (Glue.getNode()) Ops.push_back(Glue); - if (IsTailCall) { + if (CLI.IsTailCall) { MFI.setHasTailCall(); return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops); } @@ -916,66 +537,36 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InVals, OutVals, Callee); } -static bool getIndexedAddressParts(SDNode *Ptr, EVT VT, - SDValue &Base, SDValue &Offset, - bool &IsInc, SelectionDAG &DAG) { - if (Ptr->getOpcode() != ISD::ADD) - return false; - - auto &HST = static_cast<const HexagonSubtarget&>(DAG.getSubtarget()); - - bool ValidHVX128BType = - HST.useHVX128BOps() && (VT == MVT::v32i32 || - VT == MVT::v64i16 || VT == MVT::v128i8); - bool ValidHVXType = - HST.useHVX64BOps() && (VT == MVT::v16i32 || - VT == MVT::v32i16 || VT == MVT::v64i8); - - if (ValidHVX128BType || ValidHVXType || VT == MVT::i64 || VT == MVT::i32 || - VT == MVT::i16 || VT == MVT::i8) { - IsInc = (Ptr->getOpcode() == ISD::ADD); - Base = Ptr->getOperand(0); - Offset = Ptr->getOperand(1); - // Ensure that Offset is a constant. - return isa<ConstantSDNode>(Offset); - } - - return false; -} - -/// getPostIndexedAddressParts - returns true by value, base pointer and -/// offset pointer and addressing mode by reference if this node can be -/// combined with a load / store to form a post-indexed load / store. +/// Returns true by value, base pointer and offset pointer and addressing +/// mode by reference if this node can be combined with a load / store to +/// form a post-indexed load / store. bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, - SDValue &Base, - SDValue &Offset, - ISD::MemIndexedMode &AM, - SelectionDAG &DAG) const -{ - EVT VT; - - if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { - VT = LD->getMemoryVT(); - } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { - VT = ST->getMemoryVT(); - if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore()) - return false; - } else { + SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const { + LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N); + if (!LSN) + return false; + EVT VT = LSN->getMemoryVT(); + if (!VT.isSimple()) + return false; + bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || + VT == MVT::i64 || VT == MVT::f32 || VT == MVT::f64 || + VT == MVT::v2i16 || VT == MVT::v2i32 || VT == MVT::v4i8 || + VT == MVT::v4i16 || VT == MVT::v8i8 || + Subtarget.isHVXVectorType(VT.getSimpleVT()); + if (!IsLegalType) return false; - } - bool IsInc = false; - bool isLegal = getIndexedAddressParts(Op, VT, Base, Offset, IsInc, DAG); - if (isLegal) { - auto &HII = *Subtarget.getInstrInfo(); - int32_t OffsetVal = cast<ConstantSDNode>(Offset.getNode())->getSExtValue(); - if (HII.isValidAutoIncImm(VT, OffsetVal)) { - AM = IsInc ? ISD::POST_INC : ISD::POST_DEC; - return true; - } - } + if (Op->getOpcode() != ISD::ADD) + return false; + Base = Op->getOperand(0); + Offset = Op->getOperand(1); + if (!isa<ConstantSDNode>(Offset.getNode())) + return false; + AM = ISD::POST_INC; - return false; + int32_t V = cast<ConstantSDNode>(Offset.getNode())->getSExtValue(); + return Subtarget.getInstrInfo()->isValidAutoIncImm(VT, V); } SDValue @@ -1080,7 +671,7 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, if (A == 0) A = HFI.getStackAlignment(); - DEBUG({ + LLVM_DEBUG({ dbgs () << __func__ << " Align: " << A << " Size: "; Size.getNode()->dump(&DAG); dbgs() << "\n"; @@ -1095,20 +686,22 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, } SDValue HexagonTargetLowering::LowerFormalArguments( - SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>(); + MachineRegisterInfo &MRI = MF.getRegInfo(); // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, - *DAG.getContext()); + HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(), + MF.getFunction().getFunctionType()->getNumParams()); - CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon); + if (Subtarget.useHVXOps()) + CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_HVX); + else + CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon); // For LLVM, in the case when returning a struct by value (>8byte), // the first argument is a pointer that points to the location on caller's @@ -1117,110 +710,62 @@ SDValue HexagonTargetLowering::LowerFormalArguments( // equal to) 8 bytes. If not, no address will be passed into callee and // callee return the result direclty through R0/R1. - SmallVector<SDValue, 8> MemOps; + auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; ISD::ArgFlagsTy Flags = Ins[i].Flags; - unsigned ObjSize; - unsigned StackLocation; - int FI; - - if ( (VA.isRegLoc() && !Flags.isByVal()) - || (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() > 8)) { - // Arguments passed in registers - // 1. int, long long, ptr args that get allocated in register. - // 2. Large struct that gets an register to put its address in. - EVT RegVT = VA.getLocVT(); - if (RegVT == MVT::i8 || RegVT == MVT::i16 || - RegVT == MVT::i32 || RegVT == MVT::f32) { - unsigned VReg = - RegInfo.createVirtualRegister(&Hexagon::IntRegsRegClass); - RegInfo.addLiveIn(VA.getLocReg(), VReg); - if (VA.getLocInfo() == CCValAssign::BCvt) - RegVT = VA.getValVT(); - SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT); - // Treat values of type MVT::i1 specially: they are passed in - // registers of type i32, but they need to remain as values of - // type i1 for consistency of the argument lowering. - if (VA.getValVT() == MVT::i1) { - // Generate a copy into a predicate register and use the value - // of the register as the "InVal". - unsigned PReg = - RegInfo.createVirtualRegister(&Hexagon::PredRegsRegClass); - SDNode *T = DAG.getMachineNode(Hexagon::C2_tfrrp, dl, MVT::i1, - Copy.getValue(0)); - Copy = DAG.getCopyToReg(Copy.getValue(1), dl, PReg, SDValue(T, 0)); - Copy = DAG.getCopyFromReg(Copy, dl, PReg, MVT::i1); - } - InVals.push_back(Copy); - Chain = Copy.getValue(1); - } else if (RegVT == MVT::i64 || RegVT == MVT::f64) { - unsigned VReg = - RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass); - RegInfo.addLiveIn(VA.getLocReg(), VReg); - if (VA.getLocInfo() == CCValAssign::BCvt) - RegVT = VA.getValVT(); - InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); - - // Single Vector - } else if ((RegVT == MVT::v16i32 || - RegVT == MVT::v32i16 || RegVT == MVT::v64i8)) { - unsigned VReg = - RegInfo.createVirtualRegister(&Hexagon::HvxVRRegClass); - RegInfo.addLiveIn(VA.getLocReg(), VReg); - InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); - } else if (Subtarget.useHVX128BOps() && - ((RegVT == MVT::v32i32 || - RegVT == MVT::v64i16 || RegVT == MVT::v128i8))) { - unsigned VReg = - RegInfo.createVirtualRegister(&Hexagon::HvxVRRegClass); - RegInfo.addLiveIn(VA.getLocReg(), VReg); - InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); - - // Double Vector - } else if ((RegVT == MVT::v32i32 || - RegVT == MVT::v64i16 || RegVT == MVT::v128i8)) { - unsigned VReg = - RegInfo.createVirtualRegister(&Hexagon::HvxWRRegClass); - RegInfo.addLiveIn(VA.getLocReg(), VReg); - InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); - } else if (Subtarget.useHVX128BOps() && - ((RegVT == MVT::v64i32 || - RegVT == MVT::v128i16 || RegVT == MVT::v256i8))) { - unsigned VReg = - RegInfo.createVirtualRegister(&Hexagon::HvxWRRegClass); - RegInfo.addLiveIn(VA.getLocReg(), VReg); - InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); - } else if (RegVT == MVT::v512i1 || RegVT == MVT::v1024i1) { - assert(0 && "need to support VecPred regs"); - unsigned VReg = - RegInfo.createVirtualRegister(&Hexagon::HvxQRRegClass); - RegInfo.addLiveIn(VA.getLocReg(), VReg); - InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT)); + bool ByVal = Flags.isByVal(); + + // Arguments passed in registers: + // 1. 32- and 64-bit values and HVX vectors are passed directly, + // 2. Large structs are passed via an address, and the address is + // passed in a register. + if (VA.isRegLoc() && ByVal && Flags.getByValSize() <= 8) + llvm_unreachable("ByValSize must be bigger than 8 bytes"); + + bool InReg = VA.isRegLoc() && + (!ByVal || (ByVal && Flags.getByValSize() > 8)); + + if (InReg) { + MVT RegVT = VA.getLocVT(); + if (VA.getLocInfo() == CCValAssign::BCvt) + RegVT = VA.getValVT(); + + const TargetRegisterClass *RC = getRegClassFor(RegVT); + unsigned VReg = MRI.createVirtualRegister(RC); + SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT); + + // Treat values of type MVT::i1 specially: they are passed in + // registers of type i32, but they need to remain as values of + // type i1 for consistency of the argument lowering. + if (VA.getValVT() == MVT::i1) { + assert(RegVT.getSizeInBits() <= 32); + SDValue T = DAG.getNode(ISD::AND, dl, RegVT, + Copy, DAG.getConstant(1, dl, RegVT)); + Copy = DAG.getSetCC(dl, MVT::i1, T, DAG.getConstant(0, dl, RegVT), + ISD::SETNE); } else { - assert (0); +#ifndef NDEBUG + unsigned RegSize = RegVT.getSizeInBits(); + assert(RegSize == 32 || RegSize == 64 || + Subtarget.isHVXVectorType(RegVT)); +#endif } - } else if (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() <= 8) { - assert (0 && "ByValSize must be bigger than 8 bytes"); + InVals.push_back(Copy); + MRI.addLiveIn(VA.getLocReg(), VReg); } else { - // Sanity check. - assert(VA.isMemLoc()); - - if (Flags.isByVal()) { - // If it's a byval parameter, then we need to compute the - // "real" size, not the size of the pointer. - ObjSize = Flags.getByValSize(); - } else { - ObjSize = VA.getLocVT().getStoreSizeInBits() >> 3; - } + assert(VA.isMemLoc() && "Argument should be passed in memory"); - StackLocation = HEXAGON_LRFP_SIZE + VA.getLocMemOffset(); - // Create the frame index object for this incoming parameter... - FI = MFI.CreateFixedObject(ObjSize, StackLocation, true); + // If it's a byval parameter, then we need to compute the + // "real" size, not the size of the pointer. + unsigned ObjSize = Flags.isByVal() + ? Flags.getByValSize() + : VA.getLocVT().getStoreSizeInBits() / 8; - // Create the SelectionDAG nodes cordl, responding to a load - // from this parameter. + // Create the frame index object for this incoming parameter. + int Offset = HEXAGON_LRFP_SIZE + VA.getLocMemOffset(); + int FI = MFI.CreateFixedObject(ObjSize, Offset, true); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); if (Flags.isByVal()) { @@ -1229,22 +774,19 @@ SDValue HexagonTargetLowering::LowerFormalArguments( // location. InVals.push_back(FIN); } else { - InVals.push_back( - DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo())); + SDValue L = DAG.getLoad(VA.getValVT(), dl, Chain, FIN, + MachinePointerInfo::getFixedStack(MF, FI, 0)); + InVals.push_back(L); } } } - if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); - if (isVarArg) { + if (IsVarArg) { // This will point to the next argument passed via stack. - int FrameIndex = MFI.CreateFixedObject(Hexagon_PointerSize, - HEXAGON_LRFP_SIZE + - CCInfo.getNextStackOffset(), - true); - FuncInfo.setVarArgsFrameIndex(FrameIndex); + int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset(); + int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true); + HMFI.setVarArgsFrameIndex(FI); } return Chain; @@ -1262,66 +804,62 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(SV)); } -static bool isSExtFree(SDValue N) { - // A sign-extend of a truncate of a sign-extend is free. - if (N.getOpcode() == ISD::TRUNCATE && - N.getOperand(0).getOpcode() == ISD::AssertSext) - return true; - // We have sign-extended loads. - if (N.getOpcode() == ISD::LOAD) - return true; - return false; -} - SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - + const SDLoc &dl(Op); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); - if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(ty(LHS))) - return LowerHvxSetCC(Op, DAG); - - SDValue Cmp = Op.getOperand(2); - ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get(); - - EVT VT = Op.getValueType(); - EVT LHSVT = LHS.getValueType(); - EVT RHSVT = RHS.getValueType(); + ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); + MVT ResTy = ty(Op); + MVT OpTy = ty(LHS); - if (LHSVT == MVT::v2i16) { - assert(ISD::isSignedIntSetCC(CC) || ISD::isUnsignedIntSetCC(CC)); - unsigned ExtOpc = ISD::isSignedIntSetCC(CC) ? ISD::SIGN_EXTEND - : ISD::ZERO_EXTEND; - SDValue LX = DAG.getNode(ExtOpc, dl, MVT::v2i32, LHS); - SDValue RX = DAG.getNode(ExtOpc, dl, MVT::v2i32, RHS); - SDValue SC = DAG.getNode(ISD::SETCC, dl, MVT::v2i1, LX, RX, Cmp); - return SC; + if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) { + MVT ElemTy = OpTy.getVectorElementType(); + assert(ElemTy.isScalarInteger()); + MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()), + OpTy.getVectorNumElements()); + return DAG.getSetCC(dl, ResTy, + DAG.getSExtOrTrunc(LHS, SDLoc(LHS), WideTy), + DAG.getSExtOrTrunc(RHS, SDLoc(RHS), WideTy), CC); } // Treat all other vector types as legal. - if (VT.isVector()) + if (ResTy.isVector()) return Op; - // Equals and not equals should use sign-extend, not zero-extend, since - // we can represent small negative values in the compare instructions. + // Comparisons of short integers should use sign-extend, not zero-extend, + // since we can represent small negative values in the compare instructions. // The LLVM default is to use zero-extend arbitrarily in these cases. - if ((CC == ISD::SETEQ || CC == ISD::SETNE) && - (RHSVT == MVT::i8 || RHSVT == MVT::i16) && - (LHSVT == MVT::i8 || LHSVT == MVT::i16)) { - ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS); - if (C && C->getAPIntValue().isNegative()) { - LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS); - RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS); - return DAG.getNode(ISD::SETCC, dl, Op.getValueType(), - LHS, RHS, Op.getOperand(2)); - } - if (isSExtFree(LHS) || isSExtFree(RHS)) { - LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS); - RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS); - return DAG.getNode(ISD::SETCC, dl, Op.getValueType(), - LHS, RHS, Op.getOperand(2)); + auto isSExtFree = [this](SDValue N) { + switch (N.getOpcode()) { + case ISD::TRUNCATE: { + // A sign-extend of a truncate of a sign-extend is free. + SDValue Op = N.getOperand(0); + if (Op.getOpcode() != ISD::AssertSext) + return false; + EVT OrigTy = cast<VTSDNode>(Op.getOperand(1))->getVT(); + unsigned ThisBW = ty(N).getSizeInBits(); + unsigned OrigBW = OrigTy.getSizeInBits(); + // The type that was sign-extended to get the AssertSext must be + // narrower than the type of N (so that N has still the same value + // as the original). + return ThisBW >= OrigBW; + } + case ISD::LOAD: + // We have sign-extended loads. + return true; } + return false; + }; + + if (OpTy == MVT::i8 || OpTy == MVT::i16) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS); + bool IsNegative = C && C->getAPIntValue().isNegative(); + if (IsNegative || isSExtFree(LHS) || isSExtFree(RHS)) + return DAG.getSetCC(dl, ResTy, + DAG.getSExtOrTrunc(LHS, SDLoc(LHS), MVT::i32), + DAG.getSExtOrTrunc(RHS, SDLoc(RHS), MVT::i32), CC); } + return SDValue(); } @@ -1393,8 +931,7 @@ HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { else if (isVTi1Type) T = DAG.getTargetConstantPool(CVal, ValTy, Align, Offset, TF); else - T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, Offset, - TF); + T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, Offset, TF); assert(cast<ConstantPoolSDNode>(T)->getTargetFlags() == TF && "Inconsistent target flag encountered"); @@ -1480,7 +1017,7 @@ HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const { if (RM == Reloc::Static) { SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset); const GlobalObject *GO = GV->getBaseObject(); - if (GO && HLOF.isGlobalInSmallSection(GO, HTM)) + if (GO && Subtarget.useSmallData() && HLOF.isGlobalInSmallSection(GO, HTM)) return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA); return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA); } @@ -1688,13 +1225,15 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, const HexagonSubtarget &ST) : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)), Subtarget(ST) { - bool IsV4 = !Subtarget.hasV5TOps(); + bool IsV4 = !Subtarget.hasV5Ops(); auto &HRI = *Subtarget.getRegisterInfo(); setPrefLoopAlignment(4); setPrefFunctionAlignment(4); setMinFunctionAlignment(2); setStackPointerRegisterToSaveRestore(HRI.getStackRegister()); + setBooleanContents(TargetLoweringBase::UndefinedBooleanContent); + setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent); setMaxAtomicSizeInBitsSupported(64); setMinCmpXchgSizeInBits(32); @@ -1728,45 +1267,11 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass); addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass); - if (Subtarget.hasV5TOps()) { + if (Subtarget.hasV5Ops()) { addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass); addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass); } - if (Subtarget.hasV60TOps()) { - if (Subtarget.useHVX64BOps()) { - addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass); - addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass); - addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass); - addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass); - addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass); - addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass); - // These "short" boolean vector types should be legal because - // they will appear as results of vector compares. If they were - // not legal, type legalization would try to make them legal - // and that would require using operations that do not use or - // produce such types. That, in turn, would imply using custom - // nodes, which would be unoptimizable by the DAG combiner. - // The idea is to rely on target-independent operations as much - // as possible. - addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass); - addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); - addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); - addRegisterClass(MVT::v512i1, &Hexagon::HvxQRRegClass); - } else if (Subtarget.useHVX128BOps()) { - addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass); - addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass); - addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass); - addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass); - addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass); - addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass); - addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); - addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); - addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass); - addRegisterClass(MVT::v1024i1, &Hexagon::HvxQRRegClass); - } - } - // // Handling of scalar operations. // @@ -1801,13 +1306,16 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BlockAddress, MVT::i32, Custom); // Hexagon needs to optimize cases with negative constants. - setOperationAction(ISD::SETCC, MVT::i8, Custom); - setOperationAction(ISD::SETCC, MVT::i16, Custom); + setOperationAction(ISD::SETCC, MVT::i8, Custom); + setOperationAction(ISD::SETCC, MVT::i16, Custom); + setOperationAction(ISD::SETCC, MVT::v4i8, Custom); + setOperationAction(ISD::SETCC, MVT::v2i16, Custom); // VASTART needs to be custom lowered to use the VarArgsFrameIndex. setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VAEND, MVT::Other, Expand); setOperationAction(ISD::VAARG, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); @@ -1819,35 +1327,21 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setMinimumJumpTableEntries(std::numeric_limits<int>::max()); setOperationAction(ISD::BR_JT, MVT::Other, Expand); - // Hexagon has instructions for add/sub with carry. The problem with - // modeling these instructions is that they produce 2 results: Rdd and Px. - // To model the update of Px, we will have to use Defs[p0..p3] which will - // cause any predicate live range to spill. So, we pretend we dont't have - // these instructions. - setOperationAction(ISD::ADDE, MVT::i8, Expand); - setOperationAction(ISD::ADDE, MVT::i16, Expand); - setOperationAction(ISD::ADDE, MVT::i32, Expand); - setOperationAction(ISD::ADDE, MVT::i64, Expand); - setOperationAction(ISD::SUBE, MVT::i8, Expand); - setOperationAction(ISD::SUBE, MVT::i16, Expand); - setOperationAction(ISD::SUBE, MVT::i32, Expand); - setOperationAction(ISD::SUBE, MVT::i64, Expand); - setOperationAction(ISD::ADDC, MVT::i8, Expand); - setOperationAction(ISD::ADDC, MVT::i16, Expand); - setOperationAction(ISD::ADDC, MVT::i32, Expand); - setOperationAction(ISD::ADDC, MVT::i64, Expand); - setOperationAction(ISD::SUBC, MVT::i8, Expand); - setOperationAction(ISD::SUBC, MVT::i16, Expand); - setOperationAction(ISD::SUBC, MVT::i32, Expand); - setOperationAction(ISD::SUBC, MVT::i64, Expand); - - // Only add and sub that detect overflow are the saturating ones. + setOperationAction(ISD::ABS, MVT::i32, Legal); + setOperationAction(ISD::ABS, MVT::i64, Legal); + + // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit, + // but they only operate on i64. for (MVT VT : MVT::integer_valuetypes()) { - setOperationAction(ISD::UADDO, VT, Expand); - setOperationAction(ISD::SADDO, VT, Expand); - setOperationAction(ISD::USUBO, VT, Expand); - setOperationAction(ISD::SSUBO, VT, Expand); + setOperationAction(ISD::UADDO, VT, Expand); + setOperationAction(ISD::USUBO, VT, Expand); + setOperationAction(ISD::SADDO, VT, Expand); + setOperationAction(ISD::SSUBO, VT, Expand); + setOperationAction(ISD::ADDCARRY, VT, Expand); + setOperationAction(ISD::SUBCARRY, VT, Expand); } + setOperationAction(ISD::ADDCARRY, MVT::i64, Custom); + setOperationAction(ISD::SUBCARRY, MVT::i64, Custom); setOperationAction(ISD::CTLZ, MVT::i8, Promote); setOperationAction(ISD::CTLZ, MVT::i16, Promote); @@ -1865,22 +1359,21 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); setOperationAction(ISD::BSWAP, MVT::i32, Legal); setOperationAction(ISD::BSWAP, MVT::i64, Legal); - setOperationAction(ISD::MUL, MVT::i64, Legal); for (unsigned IntExpOp : - { ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, - ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR, - ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS, - ISD::SMUL_LOHI, ISD::UMUL_LOHI }) { - setOperationAction(IntExpOp, MVT::i32, Expand); - setOperationAction(IntExpOp, MVT::i64, Expand); + {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, + ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR, + ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS, + ISD::SMUL_LOHI, ISD::UMUL_LOHI}) { + for (MVT VT : MVT::integer_valuetypes()) + setOperationAction(IntExpOp, VT, Expand); } for (unsigned FPExpOp : {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FCOPYSIGN}) { - setOperationAction(FPExpOp, MVT::f32, Expand); - setOperationAction(FPExpOp, MVT::f64, Expand); + for (MVT VT : MVT::fp_valuetypes()) + setOperationAction(FPExpOp, VT, Expand); } // No extending loads from i32. @@ -1920,10 +1413,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, // either "custom" or "legal" for specific cases. static const unsigned VectExpOps[] = { // Integer arithmetic: - ISD::ADD, ISD::SUB, ISD::MUL, ISD::SDIV, ISD::UDIV, - ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::ADDC, - ISD::SUBC, ISD::SADDO, ISD::UADDO, ISD::SSUBO, ISD::USUBO, - ISD::SMUL_LOHI, ISD::UMUL_LOHI, + ISD::ADD, ISD::SUB, ISD::MUL, ISD::SDIV, ISD::UDIV, + ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::SADDO, + ISD::UADDO, ISD::SSUBO, ISD::USUBO, ISD::SMUL_LOHI, ISD::UMUL_LOHI, // Logical/bit: ISD::AND, ISD::OR, ISD::XOR, ISD::ROTL, ISD::ROTR, ISD::CTPOP, ISD::CTLZ, ISD::CTTZ, @@ -1970,16 +1462,16 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, // Extending loads from (native) vectors of i8 into (native) vectors of i16 // are legal. - setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, MVT::v2i8, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, MVT::v2i8, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, MVT::v2i8, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, MVT::v2i8, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Legal); setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal); setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal); // Types natively supported: - for (MVT NativeVT : {MVT::v32i1, MVT::v64i1, MVT::v4i8, MVT::v8i8, MVT::v2i16, - MVT::v4i16, MVT::v1i32, MVT::v2i32, MVT::v1i64}) { + for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8, + MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32}) { setOperationAction(ISD::BUILD_VECTOR, NativeVT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, NativeVT, Custom); @@ -1995,19 +1487,34 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::XOR, NativeVT, Legal); } + // Custom lower unaligned loads. + for (MVT VecVT : {MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8, + MVT::v2i16, MVT::v4i16, MVT::v2i32}) { + setOperationAction(ISD::LOAD, VecVT, Custom); + } + + for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v2i32, MVT::v4i16, MVT::v2i32}) { + setCondCodeAction(ISD::SETLT, VT, Expand); + setCondCodeAction(ISD::SETLE, VT, Expand); + setCondCodeAction(ISD::SETULT, VT, Expand); + setCondCodeAction(ISD::SETULE, VT, Expand); + } + + // Custom-lower bitcasts from i8 to v8i1. + setOperationAction(ISD::BITCAST, MVT::i8, Custom); setOperationAction(ISD::SETCC, MVT::v2i16, Custom); setOperationAction(ISD::VSELECT, MVT::v2i16, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); - auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) { - setOperationAction(Opc, FromTy, Promote); - AddPromotedToType(Opc, FromTy, ToTy); - }; - // Subtarget-specific operation actions. // - if (Subtarget.hasV5TOps()) { + if (Subtarget.hasV60Ops()) { + setOperationAction(ISD::ROTL, MVT::i32, Custom); + setOperationAction(ISD::ROTL, MVT::i64, Custom); + } + if (Subtarget.hasV5Ops()) { setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FADD, MVT::f64, Expand); setOperationAction(ISD::FSUB, MVT::f64, Expand); @@ -2061,71 +1568,14 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, // Handling of indexed loads/stores: default is "expand". // - for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) { + for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::f32, MVT::f64, + MVT::v2i16, MVT::v2i32, MVT::v4i8, MVT::v4i16, MVT::v8i8}) { setIndexedLoadAction(ISD::POST_INC, VT, Legal); setIndexedStoreAction(ISD::POST_INC, VT, Legal); } - if (Subtarget.useHVXOps()) { - bool Use64b = Subtarget.useHVX64BOps(); - ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128; - ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128; - MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8; - MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8; - - setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal); - setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); - setOperationAction(ISD::CONCAT_VECTORS, ByteW, Legal); - setOperationAction(ISD::AND, ByteV, Legal); - setOperationAction(ISD::OR, ByteV, Legal); - setOperationAction(ISD::XOR, ByteV, Legal); - - for (MVT T : LegalV) { - setIndexedLoadAction(ISD::POST_INC, T, Legal); - setIndexedStoreAction(ISD::POST_INC, T, Legal); - - setOperationAction(ISD::ADD, T, Legal); - setOperationAction(ISD::SUB, T, Legal); - if (T != ByteV) { - setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); - setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); - } - - setOperationAction(ISD::MUL, T, Custom); - setOperationAction(ISD::SETCC, T, Custom); - setOperationAction(ISD::BUILD_VECTOR, T, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom); - if (T != ByteV) - setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom); - } - - for (MVT T : LegalV) { - if (T == ByteV) - continue; - // Promote all shuffles and concats to operate on vectors of bytes. - setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV); - setPromoteTo(ISD::CONCAT_VECTORS, T, ByteV); - setPromoteTo(ISD::AND, T, ByteV); - setPromoteTo(ISD::OR, T, ByteV); - setPromoteTo(ISD::XOR, T, ByteV); - } - - for (MVT T : LegalW) { - // Custom-lower BUILD_VECTOR for vector pairs. The standard (target- - // independent) handling of it would convert it to a load, which is - // not always the optimal choice. - setOperationAction(ISD::BUILD_VECTOR, T, Custom); - - if (T == ByteW) - continue; - // Promote all shuffles and concats to operate on vectors of bytes. - setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW); - setPromoteTo(ISD::CONCAT_VECTORS, T, ByteW); - } - } + if (Subtarget.useHVXOps()) + initializeHVXLowering(); computeRegisterProperties(&HRI); @@ -2195,7 +1645,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3"); } - if (Subtarget.hasV5TOps()) { + if (Subtarget.hasV5Ops()) { if (FastMath) setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf"); else @@ -2242,6 +1692,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((HexagonISD::NodeType)Opcode) { + case HexagonISD::ADDC: return "HexagonISD::ADDC"; + case HexagonISD::SUBC: return "HexagonISD::SUBC"; case HexagonISD::ALLOCA: return "HexagonISD::ALLOCA"; case HexagonISD::AT_GOT: return "HexagonISD::AT_GOT"; case HexagonISD::AT_PCREL: return "HexagonISD::AT_PCREL"; @@ -2255,16 +1707,12 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::CP: return "HexagonISD::CP"; case HexagonISD::DCFETCH: return "HexagonISD::DCFETCH"; case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN"; + case HexagonISD::TSTBIT: return "HexagonISD::TSTBIT"; case HexagonISD::EXTRACTU: return "HexagonISD::EXTRACTU"; - case HexagonISD::EXTRACTURP: return "HexagonISD::EXTRACTURP"; case HexagonISD::INSERT: return "HexagonISD::INSERT"; - case HexagonISD::INSERTRP: return "HexagonISD::INSERTRP"; case HexagonISD::JT: return "HexagonISD::JT"; case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG"; case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN"; - case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE"; - case HexagonISD::VPACKE: return "HexagonISD::VPACKE"; - case HexagonISD::VPACKO: return "HexagonISD::VPACKO"; case HexagonISD::VASL: return "HexagonISD::VASL"; case HexagonISD::VASR: return "HexagonISD::VASR"; case HexagonISD::VLSR: return "HexagonISD::VLSR"; @@ -2274,11 +1722,97 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::VROR: return "HexagonISD::VROR"; case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE"; case HexagonISD::VZERO: return "HexagonISD::VZERO"; + case HexagonISD::VSPLATW: return "HexagonISD::VSPLATW"; + case HexagonISD::D2P: return "HexagonISD::D2P"; + case HexagonISD::P2D: return "HexagonISD::P2D"; + case HexagonISD::V2Q: return "HexagonISD::V2Q"; + case HexagonISD::Q2V: return "HexagonISD::Q2V"; + case HexagonISD::QCAT: return "HexagonISD::QCAT"; + case HexagonISD::QTRUE: return "HexagonISD::QTRUE"; + case HexagonISD::QFALSE: return "HexagonISD::QFALSE"; + case HexagonISD::TYPECAST: return "HexagonISD::TYPECAST"; + case HexagonISD::VALIGN: return "HexagonISD::VALIGN"; + case HexagonISD::VALIGNADDR: return "HexagonISD::VALIGNADDR"; case HexagonISD::OP_END: break; } return nullptr; } +// Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load +// intrinsic. +static bool isBrevLdIntrinsic(const Value *Inst) { + unsigned ID = cast<IntrinsicInst>(Inst)->getIntrinsicID(); + return (ID == Intrinsic::hexagon_L2_loadrd_pbr || + ID == Intrinsic::hexagon_L2_loadri_pbr || + ID == Intrinsic::hexagon_L2_loadrh_pbr || + ID == Intrinsic::hexagon_L2_loadruh_pbr || + ID == Intrinsic::hexagon_L2_loadrb_pbr || + ID == Intrinsic::hexagon_L2_loadrub_pbr); +} + +// Bit-reverse Load Intrinsic :Crawl up and figure out the object from previous +// instruction. So far we only handle bitcast, extract value and bit reverse +// load intrinsic instructions. Should we handle CGEP ? +static Value *getBrevLdObject(Value *V) { + if (Operator::getOpcode(V) == Instruction::ExtractValue || + Operator::getOpcode(V) == Instruction::BitCast) + V = cast<Operator>(V)->getOperand(0); + else if (isa<IntrinsicInst>(V) && isBrevLdIntrinsic(V)) + V = cast<Instruction>(V)->getOperand(0); + return V; +} + +// Bit-reverse Load Intrinsic: For a PHI Node return either an incoming edge or +// a back edge. If the back edge comes from the intrinsic itself, the incoming +// edge is returned. +static Value *returnEdge(const PHINode *PN, Value *IntrBaseVal) { + const BasicBlock *Parent = PN->getParent(); + int Idx = -1; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) { + BasicBlock *Blk = PN->getIncomingBlock(i); + // Determine if the back edge is originated from intrinsic. + if (Blk == Parent) { + Value *BackEdgeVal = PN->getIncomingValue(i); + Value *BaseVal; + // Loop over till we return the same Value or we hit the IntrBaseVal. + do { + BaseVal = BackEdgeVal; + BackEdgeVal = getBrevLdObject(BackEdgeVal); + } while ((BaseVal != BackEdgeVal) && (IntrBaseVal != BackEdgeVal)); + // If the getBrevLdObject returns IntrBaseVal, we should return the + // incoming edge. + if (IntrBaseVal == BackEdgeVal) + continue; + Idx = i; + break; + } else // Set the node to incoming edge. + Idx = i; + } + assert(Idx >= 0 && "Unexpected index to incoming argument in PHI"); + return PN->getIncomingValue(Idx); +} + +// Bit-reverse Load Intrinsic: Figure out the underlying object the base +// pointer points to, for the bit-reverse load intrinsic. Setting this to +// memoperand might help alias analysis to figure out the dependencies. +static Value *getUnderLyingObjectForBrevLdIntr(Value *V) { + Value *IntrBaseVal = V; + Value *BaseVal; + // Loop over till we return the same Value, implies we either figure out + // the object or we hit a PHI + do { + BaseVal = V; + V = getBrevLdObject(V); + } while (BaseVal != V); + + // Identify the object from PHINode. + if (const PHINode *PN = dyn_cast<PHINode>(V)) + return returnEdge(PN, IntrBaseVal); + // For non PHI nodes, the object is the last value returned by getBrevLdObject + else + return V; +} + /// Given an intrinsic, checks if on the target the intrinsic will need to map /// to a MemIntrinsicNode (touches memory). If this is the case, it returns /// true and store the intrinsic information into the IntrinsicInfo that was @@ -2288,6 +1822,32 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { + case Intrinsic::hexagon_L2_loadrd_pbr: + case Intrinsic::hexagon_L2_loadri_pbr: + case Intrinsic::hexagon_L2_loadrh_pbr: + case Intrinsic::hexagon_L2_loadruh_pbr: + case Intrinsic::hexagon_L2_loadrb_pbr: + case Intrinsic::hexagon_L2_loadrub_pbr: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); + auto &Cont = I.getCalledFunction()->getParent()->getContext(); + // The intrinsic function call is of the form { ElTy, i8* } + // @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type + // should be derived from ElTy. + PointerType *PtrTy = I.getCalledFunction() + ->getReturnType() + ->getContainedType(0) + ->getPointerTo(); + Info.memVT = MVT::getVT(PtrTy->getElementType()); + llvm::Value *BasePtrVal = I.getOperand(0); + Info.ptrVal = getUnderLyingObjectForBrevLdIntr(BasePtrVal); + // The offset value comes through Modifier register. For now, assume the + // offset is 0. + Info.offset = 0; + Info.align = DL.getABITypeAlignment(Info.memVT.getTypeForEVT(Cont)); + Info.flags = MachineMemOperand::MOLoad; + return true; + } case Intrinsic::hexagon_V6_vgathermw: case Intrinsic::hexagon_V6_vgathermw_128B: case Intrinsic::hexagon_V6_vgathermh: @@ -2319,17 +1879,13 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, } bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { - EVT MTy1 = EVT::getEVT(Ty1); - EVT MTy2 = EVT::getEVT(Ty2); - if (!MTy1.isSimple() || !MTy2.isSimple()) - return false; - return (MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32); + return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2)); } bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { if (!VT1.isSimple() || !VT2.isSimple()) return false; - return (VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32); + return VT1.getSimpleVT() == MVT::i64 && VT2.getSimpleVT() == MVT::i32; } bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { @@ -2372,126 +1928,199 @@ HexagonTargetLowering::getPreferredVectorAction(EVT VT) const { return TargetLoweringBase::TypeSplitVector; } +std::pair<SDValue, int> +HexagonTargetLowering::getBaseAndOffset(SDValue Addr) const { + if (Addr.getOpcode() == ISD::ADD) { + SDValue Op1 = Addr.getOperand(1); + if (auto *CN = dyn_cast<const ConstantSDNode>(Op1.getNode())) + return { Addr.getOperand(0), CN->getSExtValue() }; + } + return { Addr, 0 }; +} + // Lower a vector shuffle (V1, V2, V3). V1 and V2 are the two vectors // to select data from, V3 is the permutation. SDValue HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { - const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); - SDLoc dl(Op); - EVT VT = Op.getValueType(); + const auto *SVN = cast<ShuffleVectorSDNode>(Op); + ArrayRef<int> AM = SVN->getMask(); + assert(AM.size() <= 8 && "Unexpected shuffle mask"); + unsigned VecLen = AM.size(); - if (V2.isUndef()) - V2 = V1; - - if (SVN->isSplat()) { - int Lane = SVN->getSplatIndex(); - if (Lane == -1) Lane = 0; - - // Test if V1 is a SCALAR_TO_VECTOR. - if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) - return DAG.getNode(HexagonISD::VSPLAT, dl, VT, V1.getOperand(0)); - - // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR - // (and probably will turn into a SCALAR_TO_VECTOR once legalization - // reaches it). - if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && - !isa<ConstantSDNode>(V1.getOperand(0))) { - bool IsScalarToVector = true; - for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) { - if (!V1.getOperand(i).isUndef()) { - IsScalarToVector = false; - break; - } - } - if (IsScalarToVector) - return DAG.getNode(HexagonISD::VSPLAT, dl, VT, V1.getOperand(0)); + MVT VecTy = ty(Op); + assert(!Subtarget.isHVXVectorType(VecTy, true) && + "HVX shuffles should be legal"); + assert(VecTy.getSizeInBits() <= 64 && "Unexpected vector length"); + + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + const SDLoc &dl(Op); + + // If the inputs are not the same as the output, bail. This is not an + // error situation, but complicates the handling and the default expansion + // (into BUILD_VECTOR) should be adequate. + if (ty(Op0) != VecTy || ty(Op1) != VecTy) + return SDValue(); + + // Normalize the mask so that the first non-negative index comes from + // the first operand. + SmallVector<int,8> Mask(AM.begin(), AM.end()); + unsigned F = llvm::find_if(AM, [](int M) { return M >= 0; }) - AM.data(); + if (F == AM.size()) + return DAG.getUNDEF(VecTy); + if (AM[F] >= int(VecLen)) { + ShuffleVectorSDNode::commuteMask(Mask); + std::swap(Op0, Op1); + } + + // Express the shuffle mask in terms of bytes. + SmallVector<int,8> ByteMask; + unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / 8; + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + int M = Mask[i]; + if (M < 0) { + for (unsigned j = 0; j != ElemBytes; ++j) + ByteMask.push_back(-1); + } else { + for (unsigned j = 0; j != ElemBytes; ++j) + ByteMask.push_back(M*ElemBytes + j); } - return DAG.getNode(HexagonISD::VSPLAT, dl, VT, - DAG.getConstant(Lane, dl, MVT::i32)); } + assert(ByteMask.size() <= 8); + + // All non-undef (non-negative) indexes are well within [0..127], so they + // fit in a single byte. Build two 64-bit words: + // - MaskIdx where each byte is the corresponding index (for non-negative + // indexes), and 0xFF for negative indexes, and + // - MaskUnd that has 0xFF for each negative index. + uint64_t MaskIdx = 0; + uint64_t MaskUnd = 0; + for (unsigned i = 0, e = ByteMask.size(); i != e; ++i) { + unsigned S = 8*i; + uint64_t M = ByteMask[i] & 0xFF; + if (M == 0xFF) + MaskUnd |= M << S; + MaskIdx |= M << S; + } + + if (ByteMask.size() == 4) { + // Identity. + if (MaskIdx == (0x03020100 | MaskUnd)) + return Op0; + // Byte swap. + if (MaskIdx == (0x00010203 | MaskUnd)) { + SDValue T0 = DAG.getBitcast(MVT::i32, Op0); + SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i32, T0); + return DAG.getBitcast(VecTy, T1); + } - // FIXME: We need to support more general vector shuffles. See - // below the comment from the ARM backend that deals in the general - // case with the vector shuffles. For now, let expand handle these. - return SDValue(); + // Byte packs. + SDValue Concat10 = DAG.getNode(HexagonISD::COMBINE, dl, + typeJoin({ty(Op1), ty(Op0)}), {Op1, Op0}); + if (MaskIdx == (0x06040200 | MaskUnd)) + return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat10}, DAG); + if (MaskIdx == (0x07050301 | MaskUnd)) + return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat10}, DAG); + + SDValue Concat01 = DAG.getNode(HexagonISD::COMBINE, dl, + typeJoin({ty(Op0), ty(Op1)}), {Op0, Op1}); + if (MaskIdx == (0x02000604 | MaskUnd)) + return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat01}, DAG); + if (MaskIdx == (0x03010705 | MaskUnd)) + return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat01}, DAG); + } + + if (ByteMask.size() == 8) { + // Identity. + if (MaskIdx == (0x0706050403020100ull | MaskUnd)) + return Op0; + // Byte swap. + if (MaskIdx == (0x0001020304050607ull | MaskUnd)) { + SDValue T0 = DAG.getBitcast(MVT::i64, Op0); + SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i64, T0); + return DAG.getBitcast(VecTy, T1); + } - // If the shuffle is not directly supported and it has 4 elements, use - // the PerfectShuffle-generated table to synthesize it from other shuffles. -} + // Halfword picks. + if (MaskIdx == (0x0d0c050409080100ull | MaskUnd)) + return getInstr(Hexagon::S2_shuffeh, dl, VecTy, {Op1, Op0}, DAG); + if (MaskIdx == (0x0f0e07060b0a0302ull | MaskUnd)) + return getInstr(Hexagon::S2_shuffoh, dl, VecTy, {Op1, Op0}, DAG); + if (MaskIdx == (0x0d0c090805040100ull | MaskUnd)) + return getInstr(Hexagon::S2_vtrunewh, dl, VecTy, {Op1, Op0}, DAG); + if (MaskIdx == (0x0f0e0b0a07060302ull | MaskUnd)) + return getInstr(Hexagon::S2_vtrunowh, dl, VecTy, {Op1, Op0}, DAG); + if (MaskIdx == (0x0706030205040100ull | MaskUnd)) { + VectorPair P = opSplit(Op0, dl, DAG); + return getInstr(Hexagon::S2_packhl, dl, VecTy, {P.second, P.first}, DAG); + } -// If BUILD_VECTOR has same base element repeated several times, -// report true. -static bool isCommonSplatElement(BuildVectorSDNode *BVN) { - unsigned NElts = BVN->getNumOperands(); - SDValue V0 = BVN->getOperand(0); + // Byte packs. + if (MaskIdx == (0x0e060c040a020800ull | MaskUnd)) + return getInstr(Hexagon::S2_shuffeb, dl, VecTy, {Op1, Op0}, DAG); + if (MaskIdx == (0x0f070d050b030901ull | MaskUnd)) + return getInstr(Hexagon::S2_shuffob, dl, VecTy, {Op1, Op0}, DAG); + } - for (unsigned i = 1, e = NElts; i != e; ++i) { - if (BVN->getOperand(i) != V0) - return false; + return SDValue(); +} + +// Create a Hexagon-specific node for shifting a vector by an integer. +SDValue +HexagonTargetLowering::getVectorShiftByInt(SDValue Op, SelectionDAG &DAG) + const { + if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) { + if (SDValue S = BVN->getSplatValue()) { + unsigned NewOpc; + switch (Op.getOpcode()) { + case ISD::SHL: + NewOpc = HexagonISD::VASL; + break; + case ISD::SRA: + NewOpc = HexagonISD::VASR; + break; + case ISD::SRL: + NewOpc = HexagonISD::VLSR; + break; + default: + llvm_unreachable("Unexpected shift opcode"); + } + return DAG.getNode(NewOpc, SDLoc(Op), ty(Op), Op.getOperand(0), S); + } } - return true; + + return SDValue(); } -// Lower a vector shift. Try to convert -// <VT> = SHL/SRA/SRL <VT> by <VT> to Hexagon specific -// <VT> = SHL/SRA/SRL <VT> by <IT/i32>. SDValue HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const { - BuildVectorSDNode *BVN = nullptr; - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); - SDValue V3; - SDLoc dl(Op); - EVT VT = Op.getValueType(); + return getVectorShiftByInt(Op, DAG); +} - if ((BVN = dyn_cast<BuildVectorSDNode>(V1.getNode())) && - isCommonSplatElement(BVN)) - V3 = V2; - else if ((BVN = dyn_cast<BuildVectorSDNode>(V2.getNode())) && - isCommonSplatElement(BVN)) - V3 = V1; - else - return SDValue(); +SDValue +HexagonTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const { + if (isa<ConstantSDNode>(Op.getOperand(1).getNode())) + return Op; + return SDValue(); +} - SDValue CommonSplat = BVN->getOperand(0); - SDValue Result; +SDValue +HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { + MVT ResTy = ty(Op); + SDValue InpV = Op.getOperand(0); + MVT InpTy = ty(InpV); + assert(ResTy.getSizeInBits() == InpTy.getSizeInBits()); + const SDLoc &dl(Op); - if (VT.getSimpleVT() == MVT::v4i16) { - switch (Op.getOpcode()) { - case ISD::SRA: - Result = DAG.getNode(HexagonISD::VASR, dl, VT, V3, CommonSplat); - break; - case ISD::SHL: - Result = DAG.getNode(HexagonISD::VASL, dl, VT, V3, CommonSplat); - break; - case ISD::SRL: - Result = DAG.getNode(HexagonISD::VLSR, dl, VT, V3, CommonSplat); - break; - default: - return SDValue(); - } - } else if (VT.getSimpleVT() == MVT::v2i32) { - switch (Op.getOpcode()) { - case ISD::SRA: - Result = DAG.getNode(HexagonISD::VASR, dl, VT, V3, CommonSplat); - break; - case ISD::SHL: - Result = DAG.getNode(HexagonISD::VASL, dl, VT, V3, CommonSplat); - break; - case ISD::SRL: - Result = DAG.getNode(HexagonISD::VLSR, dl, VT, V3, CommonSplat); - break; - default: - return SDValue(); - } - } else { - return SDValue(); + // Handle conversion from i8 to v8i1. + if (ResTy == MVT::v8i1) { + SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV); + SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32); + return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG); } - return DAG.getNode(ISD::BITCAST, dl, VT, Result); + return SDValue(); } bool @@ -2509,9 +2138,10 @@ HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values, Consts[i] = ConstantInt::get(IntTy, 0); continue; } + // Make sure to always cast to IntTy. if (auto *CN = dyn_cast<ConstantSDNode>(V.getNode())) { const ConstantInt *CI = CN->getConstantIntValue(); - Consts[i] = const_cast<ConstantInt*>(CI); + Consts[i] = ConstantInt::get(IntTy, CI->getValue().getSExtValue()); } else if (auto *CN = dyn_cast<ConstantFPSDNode>(V.getNode())) { const ConstantFP *CF = CN->getConstantFPValue(); APInt A = CF->getValueAPF().bitcastToAPInt(); @@ -2550,8 +2180,8 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl, Consts[1]->getZExtValue() << 16; return DAG.getBitcast(MVT::v2i16, DAG.getConstant(V, dl, MVT::i32)); } - SDValue N = getNode(Hexagon::A2_combine_ll, dl, MVT::i32, - {Elem[1], Elem[0]}, DAG); + SDValue N = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, + {Elem[1], Elem[0]}, DAG); return DAG.getBitcast(MVT::v2i16, N); } @@ -2596,7 +2226,7 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl, SDValue B0 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[0], T0}); SDValue B1 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[2], T1}); - SDValue R = getNode(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG); + SDValue R = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG); return DAG.getBitcast(MVT::v4i8, R); } @@ -2651,7 +2281,7 @@ HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl, uint64_t Mask = (ElemTy == MVT::i8) ? 0xFFull : (ElemTy == MVT::i16) ? 0xFFFFull : 0xFFFFFFFFull; for (unsigned i = 0; i != Num; ++i) - Val = (Val << W) | (Consts[i]->getZExtValue() & Mask); + Val = (Val << W) | (Consts[Num-1-i]->getZExtValue() & Mask); SDValue V0 = DAG.getConstant(Val, dl, MVT::i64); return DAG.getBitcast(VecTy, V0); } @@ -2677,8 +2307,56 @@ HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV, unsigned VecWidth = VecTy.getSizeInBits(); unsigned ValWidth = ValTy.getSizeInBits(); unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits(); - assert(VecWidth == 32 || VecWidth == 64); assert((VecWidth % ElemWidth) == 0); + auto *IdxN = dyn_cast<ConstantSDNode>(IdxV); + + // Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon + // without any coprocessors). + if (ElemWidth == 1) { + assert(VecWidth == VecTy.getVectorNumElements() && "Sanity failure"); + assert(VecWidth == 8 || VecWidth == 4 || VecWidth == 2); + // Check if this is an extract of the lowest bit. + if (IdxN) { + // Extracting the lowest bit is a no-op, but it changes the type, + // so it must be kept as an operation to avoid errors related to + // type mismatches. + if (IdxN->isNullValue() && ValTy.getSizeInBits() == 1) + return DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, VecV); + } + + // If the value extracted is a single bit, use tstbit. + if (ValWidth == 1) { + SDValue A0 = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG); + SDValue M0 = DAG.getConstant(8 / VecWidth, dl, MVT::i32); + SDValue I0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, M0); + return DAG.getNode(HexagonISD::TSTBIT, dl, MVT::i1, A0, I0); + } + + // Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in + // a predicate register. The elements of the vector are repeated + // in the register (if necessary) so that the total number is 8. + // The extracted subvector will need to be expanded in such a way. + unsigned Scale = VecWidth / ValWidth; + + // Generate (p2d VecV) >> 8*Idx to move the interesting bytes to + // position 0. + assert(ty(IdxV) == MVT::i32); + SDValue S0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, + DAG.getConstant(8*Scale, dl, MVT::i32)); + SDValue T0 = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV); + SDValue T1 = DAG.getNode(ISD::SRL, dl, MVT::i64, T0, S0); + while (Scale > 1) { + // The longest possible subvector is at most 32 bits, so it is always + // contained in the low subregister. + T1 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, T1); + T1 = expandPredicate(T1, dl, DAG); + Scale /= 2; + } + + return DAG.getNode(HexagonISD::D2P, dl, ResTy, T1); + } + + assert(VecWidth == 32 || VecWidth == 64); // Cast everything to scalar integer types. MVT ScalarTy = tyScalar(VecTy); @@ -2687,8 +2365,8 @@ HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV, SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32); SDValue ExtV; - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(IdxV)) { - unsigned Off = C->getZExtValue() * ElemWidth; + if (IdxN) { + unsigned Off = IdxN->getZExtValue() * ElemWidth; if (VecWidth == 64 && ValWidth == 32) { assert(Off == 0 || Off == 32); unsigned SubIdx = Off == 0 ? Hexagon::isub_lo : Hexagon::isub_hi; @@ -2707,11 +2385,8 @@ HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV, IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32); SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, DAG.getConstant(ElemWidth, dl, MVT::i32)); - // EXTRACTURP takes width/offset in a 64-bit pair. - SDValue CombV = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, - {WidthV, OffV}); - ExtV = DAG.getNode(HexagonISD::EXTRACTURP, dl, ScalarTy, - {VecV, CombV}); + ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy, + {VecV, WidthV, OffV}); } // Cast ExtV to the requested result type. @@ -2725,6 +2400,33 @@ HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV, const SDLoc &dl, MVT ValTy, SelectionDAG &DAG) const { MVT VecTy = ty(VecV); + if (VecTy.getVectorElementType() == MVT::i1) { + MVT ValTy = ty(ValV); + assert(ValTy.getVectorElementType() == MVT::i1); + SDValue ValR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, ValV); + unsigned VecLen = VecTy.getVectorNumElements(); + unsigned Scale = VecLen / ValTy.getVectorNumElements(); + assert(Scale > 1); + + for (unsigned R = Scale; R > 1; R /= 2) { + ValR = contractPredicate(ValR, dl, DAG); + ValR = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, + DAG.getUNDEF(MVT::i32), ValR); + } + // The longest possible subvector is at most 32 bits, so it is always + // contained in the low subregister. + ValR = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, ValR); + + unsigned ValBytes = 64 / Scale; + SDValue Width = DAG.getConstant(ValBytes*8, dl, MVT::i32); + SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, + DAG.getConstant(8, dl, MVT::i32)); + SDValue VecR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV); + SDValue Ins = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32, + {VecR, ValR, Width, Idx}); + return DAG.getNode(HexagonISD::D2P, dl, VecTy, Ins); + } + unsigned VecWidth = VecTy.getSizeInBits(); unsigned ValWidth = ValTy.getSizeInBits(); assert(VecWidth == 32 || VecWidth == 64); @@ -2752,17 +2454,32 @@ HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV, if (ty(IdxV) != MVT::i32) IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32); SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, WidthV); - // INSERTRP takes width/offset in a 64-bit pair. - SDValue CombV = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, - {WidthV, OffV}); - InsV = DAG.getNode(HexagonISD::INSERTRP, dl, ScalarTy, - {VecV, ValV, CombV}); + InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy, + {VecV, ValV, WidthV, OffV}); } return DAG.getNode(ISD::BITCAST, dl, VecTy, InsV); } SDValue +HexagonTargetLowering::expandPredicate(SDValue Vec32, const SDLoc &dl, + SelectionDAG &DAG) const { + assert(ty(Vec32).getSizeInBits() == 32); + if (isUndef(Vec32)) + return DAG.getUNDEF(MVT::i64); + return getInstr(Hexagon::S2_vsxtbh, dl, MVT::i64, {Vec32}, DAG); +} + +SDValue +HexagonTargetLowering::contractPredicate(SDValue Vec64, const SDLoc &dl, + SelectionDAG &DAG) const { + assert(ty(Vec64).getSizeInBits() == 64); + if (isUndef(Vec64)) + return DAG.getUNDEF(MVT::i32); + return getInstr(Hexagon::S2_vtrunehb, dl, MVT::i32, {Vec64}, DAG); +} + +SDValue HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG) const { if (Ty.isVector()) { @@ -2784,18 +2501,34 @@ SDValue HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { MVT VecTy = ty(Op); unsigned BW = VecTy.getSizeInBits(); - - if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy, true)) - return LowerHvxBuildVector(Op, DAG); - - if (BW == 32 || BW == 64) { - const SDLoc &dl(Op); - SmallVector<SDValue,8> Ops; - for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) - Ops.push_back(Op.getOperand(i)); - if (BW == 32) - return buildVector32(Ops, dl, VecTy, DAG); + const SDLoc &dl(Op); + SmallVector<SDValue,8> Ops; + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) + Ops.push_back(Op.getOperand(i)); + + if (BW == 32) + return buildVector32(Ops, dl, VecTy, DAG); + if (BW == 64) return buildVector64(Ops, dl, VecTy, DAG); + + if (VecTy == MVT::v8i1 || VecTy == MVT::v4i1 || VecTy == MVT::v2i1) { + // For each i1 element in the resulting predicate register, put 1 + // shifted by the index of the element into a general-purpose register, + // then or them together and transfer it back into a predicate register. + SDValue Rs[8]; + SDValue Z = getZero(dl, MVT::i32, DAG); + // Always produce 8 bits, repeat inputs if necessary. + unsigned Rep = 8 / VecTy.getVectorNumElements(); + for (unsigned i = 0; i != 8; ++i) { + SDValue S = DAG.getConstant(1ull << i, dl, MVT::i32); + Rs[i] = DAG.getSelect(dl, MVT::i32, Ops[i/Rep], S, Z); + } + for (ArrayRef<SDValue> A(Rs); A.size() != 1; A = A.drop_back(A.size()/2)) { + for (unsigned i = 0, e = A.size()/2; i != e; ++i) + Rs[i] = DAG.getNode(ISD::OR, dl, MVT::i32, Rs[2*i], Rs[2*i+1]); + } + // Move the value directly to a predicate register. + return getInstr(Hexagon::C2_tfrrp, dl, VecTy, {Rs[0]}, DAG); } return SDValue(); @@ -2805,14 +2538,64 @@ SDValue HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { MVT VecTy = ty(Op); - assert(!Subtarget.useHVXOps() || !Subtarget.isHVXVectorType(VecTy)); - + const SDLoc &dl(Op); if (VecTy.getSizeInBits() == 64) { assert(Op.getNumOperands() == 2); - return DAG.getNode(HexagonISD::COMBINE, SDLoc(Op), VecTy, Op.getOperand(1), + return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, Op.getOperand(1), Op.getOperand(0)); } + MVT ElemTy = VecTy.getVectorElementType(); + if (ElemTy == MVT::i1) { + assert(VecTy == MVT::v2i1 || VecTy == MVT::v4i1 || VecTy == MVT::v8i1); + MVT OpTy = ty(Op.getOperand(0)); + // Scale is how many times the operands need to be contracted to match + // the representation in the target register. + unsigned Scale = VecTy.getVectorNumElements() / OpTy.getVectorNumElements(); + assert(Scale == Op.getNumOperands() && Scale > 1); + + // First, convert all bool vectors to integers, then generate pairwise + // inserts to form values of doubled length. Up until there are only + // two values left to concatenate, all of these values will fit in a + // 32-bit integer, so keep them as i32 to use 32-bit inserts. + SmallVector<SDValue,4> Words[2]; + unsigned IdxW = 0; + + for (SDValue P : Op.getNode()->op_values()) { + SDValue W = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, P); + for (unsigned R = Scale; R > 1; R /= 2) { + W = contractPredicate(W, dl, DAG); + W = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, + DAG.getUNDEF(MVT::i32), W); + } + W = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, W); + Words[IdxW].push_back(W); + } + + while (Scale > 2) { + SDValue WidthV = DAG.getConstant(64 / Scale, dl, MVT::i32); + Words[IdxW ^ 1].clear(); + + for (unsigned i = 0, e = Words[IdxW].size(); i != e; i += 2) { + SDValue W0 = Words[IdxW][i], W1 = Words[IdxW][i+1]; + // Insert W1 into W0 right next to the significant bits of W0. + SDValue T = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32, + {W0, W1, WidthV, WidthV}); + Words[IdxW ^ 1].push_back(T); + } + IdxW ^= 1; + Scale /= 2; + } + + // Another sanity check. At this point there should only be two words + // left, and Scale should be 2. + assert(Scale == 2 && Words[IdxW].size() == 2); + + SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, + Words[IdxW][1], Words[IdxW][0]); + return DAG.getNode(HexagonISD::D2P, dl, VecTy, WW); + } + return SDValue(); } @@ -2820,10 +2603,6 @@ SDValue HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDValue Vec = Op.getOperand(0); - MVT VecTy = ty(Vec); - if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy)) - return LowerHvxExtractElement(Op, DAG); - MVT ElemTy = ty(Vec).getVectorElementType(); return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ElemTy, ty(Op), DAG); } @@ -2831,31 +2610,20 @@ HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SDValue HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { - SDValue Vec = Op.getOperand(0); - MVT VecTy = ty(Vec); - if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy)) - return LowerHvxExtractSubvector(Op, DAG); - - return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ty(Op), ty(Op), DAG); + return extractVector(Op.getOperand(0), Op.getOperand(1), SDLoc(Op), + ty(Op), ty(Op), DAG); } SDValue HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { - MVT VecTy = ty(Op); - if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy)) - return LowerHvxInsertElement(Op, DAG); - return insertVector(Op.getOperand(0), Op.getOperand(1), Op.getOperand(2), - SDLoc(Op), VecTy.getVectorElementType(), DAG); + SDLoc(Op), ty(Op).getVectorElementType(), DAG); } SDValue HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { - if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(ty(Op))) - return LowerHvxInsertSubvector(Op, DAG); - SDValue ValV = Op.getOperand(1); return insertVector(Op.getOperand(0), ValV, Op.getOperand(2), SDLoc(Op), ty(ValV), DAG); @@ -2875,6 +2643,109 @@ HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { } SDValue +HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) + const { + LoadSDNode *LN = cast<LoadSDNode>(Op.getNode()); + unsigned HaveAlign = LN->getAlignment(); + MVT LoadTy = ty(Op); + unsigned NeedAlign = Subtarget.getTypeAlignment(LoadTy); + if (HaveAlign >= NeedAlign) + return Op; + + const SDLoc &dl(Op); + const DataLayout &DL = DAG.getDataLayout(); + LLVMContext &Ctx = *DAG.getContext(); + unsigned AS = LN->getAddressSpace(); + + // If the load aligning is disabled or the load can be broken up into two + // smaller legal loads, do the default (target-independent) expansion. + bool DoDefault = false; + // Handle it in the default way if this is an indexed load. + if (!LN->isUnindexed()) + DoDefault = true; + + if (!AlignLoads) { + if (allowsMemoryAccess(Ctx, DL, LN->getMemoryVT(), AS, HaveAlign)) + return Op; + DoDefault = true; + } + if (!DoDefault && 2*HaveAlign == NeedAlign) { + // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)". + MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8*HaveAlign) + : MVT::getVectorVT(MVT::i8, HaveAlign); + DoDefault = allowsMemoryAccess(Ctx, DL, PartTy, AS, HaveAlign); + } + if (DoDefault) { + std::pair<SDValue, SDValue> P = expandUnalignedLoad(LN, DAG); + return DAG.getMergeValues({P.first, P.second}, dl); + } + + // The code below generates two loads, both aligned as NeedAlign, and + // with the distance of NeedAlign between them. For that to cover the + // bits that need to be loaded (and without overlapping), the size of + // the loads should be equal to NeedAlign. This is true for all loadable + // types, but add an assertion in case something changes in the future. + assert(LoadTy.getSizeInBits() == 8*NeedAlign); + + unsigned LoadLen = NeedAlign; + SDValue Base = LN->getBasePtr(); + SDValue Chain = LN->getChain(); + auto BO = getBaseAndOffset(Base); + unsigned BaseOpc = BO.first.getOpcode(); + if (BaseOpc == HexagonISD::VALIGNADDR && BO.second % LoadLen == 0) + return Op; + + if (BO.second % LoadLen != 0) { + BO.first = DAG.getNode(ISD::ADD, dl, MVT::i32, BO.first, + DAG.getConstant(BO.second % LoadLen, dl, MVT::i32)); + BO.second -= BO.second % LoadLen; + } + SDValue BaseNoOff = (BaseOpc != HexagonISD::VALIGNADDR) + ? DAG.getNode(HexagonISD::VALIGNADDR, dl, MVT::i32, BO.first, + DAG.getConstant(NeedAlign, dl, MVT::i32)) + : BO.first; + SDValue Base0 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second, dl); + SDValue Base1 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second+LoadLen, dl); + + MachineMemOperand *WideMMO = nullptr; + if (MachineMemOperand *MMO = LN->getMemOperand()) { + MachineFunction &MF = DAG.getMachineFunction(); + WideMMO = MF.getMachineMemOperand(MMO->getPointerInfo(), MMO->getFlags(), + 2*LoadLen, LoadLen, MMO->getAAInfo(), MMO->getRanges(), + MMO->getSyncScopeID(), MMO->getOrdering(), + MMO->getFailureOrdering()); + } + + SDValue Load0 = DAG.getLoad(LoadTy, dl, Chain, Base0, WideMMO); + SDValue Load1 = DAG.getLoad(LoadTy, dl, Chain, Base1, WideMMO); + + SDValue Aligned = DAG.getNode(HexagonISD::VALIGN, dl, LoadTy, + {Load1, Load0, BaseNoOff.getOperand(0)}); + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Load0.getValue(1), Load1.getValue(1)); + SDValue M = DAG.getMergeValues({Aligned, NewChain}, dl); + return M; +} + +SDValue +HexagonTargetLowering::LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const { + const SDLoc &dl(Op); + unsigned Opc = Op.getOpcode(); + SDValue X = Op.getOperand(0), Y = Op.getOperand(1), C = Op.getOperand(2); + + if (Opc == ISD::ADDCARRY) + return DAG.getNode(HexagonISD::ADDC, dl, Op.getNode()->getVTList(), + { X, Y, C }); + + EVT CarryTy = C.getValueType(); + SDValue SubC = DAG.getNode(HexagonISD::SUBC, dl, Op.getNode()->getVTList(), + { X, Y, DAG.getLogicalNOT(dl, C, CarryTy) }); + SDValue Out[] = { SubC.getValue(0), + DAG.getLogicalNOT(dl, SubC.getValue(1), CarryTy) }; + return DAG.getMergeValues(Out, dl); +} + +SDValue HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Offset = Op.getOperand(1); @@ -2904,6 +2775,17 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { unsigned Opc = Op.getOpcode(); + + // Handle INLINEASM first. + if (Opc == ISD::INLINEASM) + return LowerINLINEASM(Op, DAG); + + if (isHvxOperation(Op)) { + // If HVX lowering returns nothing, try the default lowering. + if (SDValue V = LowerHvxOperation(Op, DAG)) + return V; + } + switch (Opc) { default: #ifndef NDEBUG @@ -2919,13 +2801,17 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::BITCAST: return LowerBITCAST(Op, DAG); + case ISD::LOAD: return LowerUnalignedLoad(Op, DAG); + case ISD::ADDCARRY: + case ISD::SUBCARRY: return LowerAddSubCarry(Op, DAG); case ISD::SRA: case ISD::SHL: case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG); + case ISD::ROTL: return LowerROTL(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); - // Frame & Return address. Currently unimplemented. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); @@ -2939,17 +2825,35 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VSELECT: return LowerVSELECT(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG); - case ISD::INLINEASM: return LowerINLINEASM(Op, DAG); case ISD::PREFETCH: return LowerPREFETCH(Op, DAG); case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG); - case ISD::MUL: - if (Subtarget.useHVXOps()) - return LowerHvxMul(Op, DAG); break; } + return SDValue(); } +void +HexagonTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const { + const SDLoc &dl(N); + switch (N->getOpcode()) { + case ISD::SRL: + case ISD::SRA: + case ISD::SHL: + return; + case ISD::BITCAST: + // Handle a bitcast from v8i1 to i8. + if (N->getValueType(0) == MVT::i8) { + SDValue P = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, + N->getOperand(0), DAG); + Results.push_back(P); + } + break; + } +} + /// Returns relocation base for the given PIC jumptable. SDValue HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table, @@ -3023,7 +2927,7 @@ HexagonTargetLowering::getRegForInlineAsmConstraint( case 512: return {0u, &Hexagon::HvxVRRegClass}; case 1024: - if (Subtarget.hasV60TOps() && Subtarget.useHVX128BOps()) + if (Subtarget.hasV60Ops() && Subtarget.useHVX128BOps()) return {0u, &Hexagon::HvxVRRegClass}; return {0u, &Hexagon::HvxWRRegClass}; case 2048: @@ -3042,7 +2946,7 @@ HexagonTargetLowering::getRegForInlineAsmConstraint( /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { - return Subtarget.hasV5TOps(); + return Subtarget.hasV5Ops(); } /// isLegalAddressingMode - Return true if the addressing mode represented by @@ -3104,9 +3008,9 @@ bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const { bool HexagonTargetLowering::IsEligibleForTailCallOptimization( SDValue Callee, CallingConv::ID CalleeCC, - bool isVarArg, - bool isCalleeStructRet, - bool isCallerStructRet, + bool IsVarArg, + bool IsCalleeStructRet, + bool IsCallerStructRet, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, @@ -3137,12 +3041,12 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization( } // Do not tail call optimize vararg calls. - if (isVarArg) + if (IsVarArg) return false; // Also avoid tail call optimization if either caller or callee uses struct // return semantics. - if (isCalleeStructRet || isCallerStructRet) + if (IsCalleeStructRet || IsCallerStructRet) return false; // In addition to the cases above, we also disable Tail Call Optimization if @@ -3185,54 +3089,25 @@ bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *Fast) const { if (Fast) *Fast = false; - - switch (VT.getSimpleVT().SimpleTy) { - default: - return false; - case MVT::v64i8: - case MVT::v128i8: - case MVT::v256i8: - case MVT::v32i16: - case MVT::v64i16: - case MVT::v128i16: - case MVT::v16i32: - case MVT::v32i32: - case MVT::v64i32: - return true; - } - return false; + return Subtarget.isHVXVectorType(VT.getSimpleVT()); } std::pair<const TargetRegisterClass*, uint8_t> HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const { - const TargetRegisterClass *RRC = nullptr; + if (Subtarget.isHVXVectorType(VT, true)) { + unsigned BitWidth = VT.getSizeInBits(); + unsigned VecWidth = Subtarget.getVectorLength() * 8; - uint8_t Cost = 1; - switch (VT.SimpleTy) { - default: - return TargetLowering::findRepresentativeClass(TRI, VT); - case MVT::v64i8: - case MVT::v32i16: - case MVT::v16i32: - RRC = &Hexagon::HvxVRRegClass; - break; - case MVT::v128i8: - case MVT::v64i16: - case MVT::v32i32: - if (Subtarget.hasV60TOps() && Subtarget.useHVXOps() && - Subtarget.useHVX128BOps()) - RRC = &Hexagon::HvxVRRegClass; - else - RRC = &Hexagon::HvxWRRegClass; - break; - case MVT::v256i8: - case MVT::v128i16: - case MVT::v64i32: - RRC = &Hexagon::HvxWRRegClass; - break; + if (VT.getVectorElementType() == MVT::i1) + return std::make_pair(&Hexagon::HvxQRRegClass, 1); + if (BitWidth == VecWidth) + return std::make_pair(&Hexagon::HvxVRRegClass, 1); + assert(BitWidth == 2 * VecWidth); + return std::make_pair(&Hexagon::HvxWRRegClass, 1); } - return std::make_pair(RRC, Cost); + + return TargetLowering::findRepresentativeClass(TRI, VT); } Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 732834b464b4..3d94bd1ff6ed 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -18,12 +18,12 @@ #include "Hexagon.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/Support/MachineValueType.h" #include <cstdint> #include <utility> @@ -36,6 +36,8 @@ namespace HexagonISD { CONST32 = OP_BEGIN, CONST32_GP, // For marking data present in GP. + ADDC, // Add with carry: (X, Y, Cin) -> (X+Y, Cout). + SUBC, // Sub with carry: (X, Y, Cin) -> (X+~Y+Cin, Cout). ALLOCA, AT_GOT, // Index in GOT. @@ -51,18 +53,15 @@ namespace HexagonISD { CP, // Constant pool. COMBINE, - VSPLAT, + VSPLAT, // Generic splat, selection depends on argument/return + // types. VASL, VASR, VLSR, + TSTBIT, INSERT, - INSERTRP, EXTRACTU, - EXTRACTURP, - VCOMBINE, - VPACKE, - VPACKO, VEXTRACTW, VINSERTW0, VROR, @@ -70,8 +69,24 @@ namespace HexagonISD { EH_RETURN, DCFETCH, READCYCLE, + D2P, // Convert 8-byte value to 8-bit predicate register. [*] + P2D, // Convert 8-bit predicate register to 8-byte value. [*] + V2Q, // Convert HVX vector to a vector predicate reg. [*] + Q2V, // Convert vector predicate to an HVX vector. [*] + // [*] The equivalence is defined as "Q <=> (V != 0)", + // where the != operation compares bytes. + // Note: V != 0 is implemented as V >u 0. + QCAT, + QTRUE, + QFALSE, VZERO, - + VSPLATW, // HVX splat of a 32-bit word with an arbitrary result type. + TYPECAST, // No-op that's used to convert between different legal + // types in a register. + VALIGN, // Align two vectors (in Op0, Op1) to one that would have + // been loaded from address in Op2. + VALIGNADDR, // Align vector address: Op0 & -Op1, except when it is + // an address in a vector load, then it's a no-op. OP_END }; @@ -110,6 +125,10 @@ namespace HexagonISD { bool isTruncateFree(Type *Ty1, Type *Ty2) const override; bool isTruncateFree(EVT VT1, EVT VT2) const override; + bool isCheapToSpeculateCttz() const override { return true; } + bool isCheapToSpeculateCtlz() const override { return true; } + bool isCtlzFast() const override { return true; } + bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; /// Return true if an FMA operation is faster than a pair of mul and add @@ -127,6 +146,9 @@ namespace HexagonISD { const override; SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const override; + const char *getTargetNodeName(unsigned Opcode) const override; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; @@ -137,6 +159,13 @@ namespace HexagonISD { SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; @@ -284,6 +313,9 @@ namespace HexagonISD { } private: + void initializeHVXLowering(); + std::pair<SDValue,int> getBaseAndOffset(SDValue Addr) const; + bool getBuildVectorConstInts(ArrayRef<SDValue> Values, MVT VecTy, SelectionDAG &DAG, MutableArrayRef<ConstantInt*> Consts) const; @@ -295,13 +327,19 @@ namespace HexagonISD { MVT ValTy, MVT ResTy, SelectionDAG &DAG) const; SDValue insertVector(SDValue VecV, SDValue ValV, SDValue IdxV, const SDLoc &dl, MVT ValTy, SelectionDAG &DAG) const; + SDValue expandPredicate(SDValue Vec32, const SDLoc &dl, + SelectionDAG &DAG) const; + SDValue contractPredicate(SDValue Vec64, const SDLoc &dl, + SelectionDAG &DAG) const; + SDValue getVectorShiftByInt(SDValue Op, SelectionDAG &DAG) const; + bool isUndef(SDValue Op) const { if (Op.isMachineOpcode()) return Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF; return Op.getOpcode() == ISD::UNDEF; } - SDValue getNode(unsigned MachineOpc, const SDLoc &dl, MVT Ty, - ArrayRef<SDValue> Ops, SelectionDAG &DAG) const { + SDValue getInstr(unsigned MachineOpc, const SDLoc &dl, MVT Ty, + ArrayRef<SDValue> Ops, SelectionDAG &DAG) const { SDNode *N = DAG.getMachineNode(MachineOpc, dl, Ty, Ops); return SDValue(N, 0); } @@ -328,7 +366,8 @@ namespace HexagonISD { MVT tyVector(MVT Ty, MVT ElemTy) const { if (Ty.isVector() && Ty.getVectorElementType() == ElemTy) return Ty; - unsigned TyWidth = Ty.getSizeInBits(), ElemWidth = ElemTy.getSizeInBits(); + unsigned TyWidth = Ty.getSizeInBits(); + unsigned ElemWidth = ElemTy.getSizeInBits(); assert((TyWidth % ElemWidth) == 0); return MVT::getVectorVT(ElemTy, TyWidth/ElemWidth); } @@ -343,31 +382,66 @@ namespace HexagonISD { VectorPair opSplit(SDValue Vec, const SDLoc &dl, SelectionDAG &DAG) const; SDValue opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const; + bool isHvxSingleTy(MVT Ty) const; + bool isHvxPairTy(MVT Ty) const; SDValue convertToByteIndex(SDValue ElemIdx, MVT ElemTy, SelectionDAG &DAG) const; SDValue getIndexInWord32(SDValue Idx, MVT ElemTy, SelectionDAG &DAG) const; SDValue getByteShuffle(const SDLoc &dl, SDValue Op0, SDValue Op1, ArrayRef<int> Mask, SelectionDAG &DAG) const; - MVT getVecBoolVT() const; - - SDValue buildHvxVectorSingle(ArrayRef<SDValue> Values, const SDLoc &dl, - MVT VecTy, SelectionDAG &DAG) const; + SDValue buildHvxVectorReg(ArrayRef<SDValue> Values, const SDLoc &dl, + MVT VecTy, SelectionDAG &DAG) const; SDValue buildHvxVectorPred(ArrayRef<SDValue> Values, const SDLoc &dl, MVT VecTy, SelectionDAG &DAG) const; + SDValue createHvxPrefixPred(SDValue PredV, const SDLoc &dl, + unsigned BitBytes, bool ZeroFill, + SelectionDAG &DAG) const; + SDValue extractHvxElementReg(SDValue VecV, SDValue IdxV, const SDLoc &dl, + MVT ResTy, SelectionDAG &DAG) const; + SDValue extractHvxElementPred(SDValue VecV, SDValue IdxV, const SDLoc &dl, + MVT ResTy, SelectionDAG &DAG) const; + SDValue insertHvxElementReg(SDValue VecV, SDValue IdxV, SDValue ValV, + const SDLoc &dl, SelectionDAG &DAG) const; + SDValue insertHvxElementPred(SDValue VecV, SDValue IdxV, SDValue ValV, + const SDLoc &dl, SelectionDAG &DAG) const; + SDValue extractHvxSubvectorReg(SDValue VecV, SDValue IdxV, const SDLoc &dl, + MVT ResTy, SelectionDAG &DAG) const; + SDValue extractHvxSubvectorPred(SDValue VecV, SDValue IdxV, const SDLoc &dl, + MVT ResTy, SelectionDAG &DAG) const; + SDValue insertHvxSubvectorReg(SDValue VecV, SDValue SubV, SDValue IdxV, + const SDLoc &dl, SelectionDAG &DAG) const; + SDValue insertHvxSubvectorPred(SDValue VecV, SDValue SubV, SDValue IdxV, + const SDLoc &dl, SelectionDAG &DAG) const; + SDValue extendHvxVectorPred(SDValue VecV, const SDLoc &dl, MVT ResTy, + bool ZeroExt, SelectionDAG &DAG) const; SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxMul(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const; + + SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const; + SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const; std::pair<const TargetRegisterClass*, uint8_t> findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override; + + bool isHvxOperation(SDValue Op) const; + SDValue LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const; }; } // end namespace llvm diff --git a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 51480d09d734..2566194ca9c6 100644 --- a/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -10,9 +10,192 @@ #include "HexagonISelLowering.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 }; +static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; +static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; +static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 }; + + +void +HexagonTargetLowering::initializeHVXLowering() { + if (Subtarget.useHVX64BOps()) { + addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass); + addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass); + addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass); + addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass); + addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass); + addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass); + // These "short" boolean vector types should be legal because + // they will appear as results of vector compares. If they were + // not legal, type legalization would try to make them legal + // and that would require using operations that do not use or + // produce such types. That, in turn, would imply using custom + // nodes, which would be unoptimizable by the DAG combiner. + // The idea is to rely on target-independent operations as much + // as possible. + addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass); + addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); + addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); + addRegisterClass(MVT::v512i1, &Hexagon::HvxQRRegClass); + } else if (Subtarget.useHVX128BOps()) { + addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass); + addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass); + addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass); + addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass); + addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass); + addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass); + addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); + addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); + addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass); + addRegisterClass(MVT::v1024i1, &Hexagon::HvxQRRegClass); + } + + // Set up operation actions. + + bool Use64b = Subtarget.useHVX64BOps(); + ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128; + ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128; + MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8; + MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8; + + auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) { + setOperationAction(Opc, FromTy, Promote); + AddPromotedToType(Opc, FromTy, ToTy); + }; + + setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); + + for (MVT T : LegalV) { + setIndexedLoadAction(ISD::POST_INC, T, Legal); + setIndexedStoreAction(ISD::POST_INC, T, Legal); + + setOperationAction(ISD::AND, T, Legal); + setOperationAction(ISD::OR, T, Legal); + setOperationAction(ISD::XOR, T, Legal); + setOperationAction(ISD::ADD, T, Legal); + setOperationAction(ISD::SUB, T, Legal); + setOperationAction(ISD::CTPOP, T, Legal); + setOperationAction(ISD::CTLZ, T, Legal); + if (T != ByteV) { + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); + setOperationAction(ISD::BSWAP, T, Legal); + } + + setOperationAction(ISD::CTTZ, T, Custom); + setOperationAction(ISD::LOAD, T, Custom); + setOperationAction(ISD::MUL, T, Custom); + setOperationAction(ISD::MULHS, T, Custom); + setOperationAction(ISD::MULHU, T, Custom); + setOperationAction(ISD::BUILD_VECTOR, T, Custom); + // Make concat-vectors custom to handle concats of more than 2 vectors. + setOperationAction(ISD::CONCAT_VECTORS, T, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom); + setOperationAction(ISD::ANY_EXTEND, T, Custom); + setOperationAction(ISD::SIGN_EXTEND, T, Custom); + setOperationAction(ISD::ZERO_EXTEND, T, Custom); + if (T != ByteV) { + setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom); + // HVX only has shifts of words and halfwords. + setOperationAction(ISD::SRA, T, Custom); + setOperationAction(ISD::SHL, T, Custom); + setOperationAction(ISD::SRL, T, Custom); + + // Promote all shuffles to operate on vectors of bytes. + setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV); + } + + setCondCodeAction(ISD::SETNE, T, Expand); + setCondCodeAction(ISD::SETLE, T, Expand); + setCondCodeAction(ISD::SETGE, T, Expand); + setCondCodeAction(ISD::SETLT, T, Expand); + setCondCodeAction(ISD::SETULE, T, Expand); + setCondCodeAction(ISD::SETUGE, T, Expand); + setCondCodeAction(ISD::SETULT, T, Expand); + } + + for (MVT T : LegalW) { + // Custom-lower BUILD_VECTOR for vector pairs. The standard (target- + // independent) handling of it would convert it to a load, which is + // not always the optimal choice. + setOperationAction(ISD::BUILD_VECTOR, T, Custom); + // Make concat-vectors custom to handle concats of more than 2 vectors. + setOperationAction(ISD::CONCAT_VECTORS, T, Custom); + + // Custom-lower these operations for pairs. Expand them into a concat + // of the corresponding operations on individual vectors. + setOperationAction(ISD::ANY_EXTEND, T, Custom); + setOperationAction(ISD::SIGN_EXTEND, T, Custom); + setOperationAction(ISD::ZERO_EXTEND, T, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, T, Custom); + setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom); + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); + + setOperationAction(ISD::LOAD, T, Custom); + setOperationAction(ISD::STORE, T, Custom); + setOperationAction(ISD::CTLZ, T, Custom); + setOperationAction(ISD::CTTZ, T, Custom); + setOperationAction(ISD::CTPOP, T, Custom); + + setOperationAction(ISD::ADD, T, Legal); + setOperationAction(ISD::SUB, T, Legal); + setOperationAction(ISD::MUL, T, Custom); + setOperationAction(ISD::MULHS, T, Custom); + setOperationAction(ISD::MULHU, T, Custom); + setOperationAction(ISD::AND, T, Custom); + setOperationAction(ISD::OR, T, Custom); + setOperationAction(ISD::XOR, T, Custom); + setOperationAction(ISD::SETCC, T, Custom); + setOperationAction(ISD::VSELECT, T, Custom); + if (T != ByteW) { + setOperationAction(ISD::SRA, T, Custom); + setOperationAction(ISD::SHL, T, Custom); + setOperationAction(ISD::SRL, T, Custom); + + // Promote all shuffles to operate on vectors of bytes. + setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW); + } + } + + // Boolean vectors. + + for (MVT T : LegalW) { + // Boolean types for vector pairs will overlap with the boolean + // types for single vectors, e.g. + // v64i8 -> v64i1 (single) + // v64i16 -> v64i1 (pair) + // Set these actions first, and allow the single actions to overwrite + // any duplicates. + MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements()); + setOperationAction(ISD::SETCC, BoolW, Custom); + setOperationAction(ISD::AND, BoolW, Custom); + setOperationAction(ISD::OR, BoolW, Custom); + setOperationAction(ISD::XOR, BoolW, Custom); + } + + for (MVT T : LegalV) { + MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements()); + setOperationAction(ISD::BUILD_VECTOR, BoolV, Custom); + setOperationAction(ISD::CONCAT_VECTORS, BoolV, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, BoolV, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, BoolV, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, BoolV, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, BoolV, Custom); + setOperationAction(ISD::AND, BoolV, Legal); + setOperationAction(ISD::OR, BoolV, Legal); + setOperationAction(ISD::XOR, BoolV, Legal); + } +} + SDValue HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops, const SDLoc &dl, SelectionDAG &DAG) const { @@ -75,9 +258,23 @@ HexagonTargetLowering::VectorPair HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl, SelectionDAG &DAG) const { TypePair Tys = typeSplit(ty(Vec)); + if (Vec.getOpcode() == HexagonISD::QCAT) + return VectorPair(Vec.getOperand(0), Vec.getOperand(1)); return DAG.SplitVector(Vec, dl, Tys.first, Tys.second); } +bool +HexagonTargetLowering::isHvxSingleTy(MVT Ty) const { + return Subtarget.isHVXVectorType(Ty) && + Ty.getSizeInBits() == 8 * Subtarget.getVectorLength(); +} + +bool +HexagonTargetLowering::isHvxPairTy(MVT Ty) const { + return Subtarget.isHVXVectorType(Ty) && + Ty.getSizeInBits() == 16 * Subtarget.getVectorLength(); +} + SDValue HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy, SelectionDAG &DAG) const { @@ -141,36 +338,16 @@ HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0, opCastElem(Op1, MVT::i8, DAG), ByteMask); } -MVT -HexagonTargetLowering::getVecBoolVT() const { - return MVT::getVectorVT(MVT::i1, 8*Subtarget.getVectorLength()); -} - SDValue -HexagonTargetLowering::buildHvxVectorSingle(ArrayRef<SDValue> Values, - const SDLoc &dl, MVT VecTy, - SelectionDAG &DAG) const { +HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values, + const SDLoc &dl, MVT VecTy, + SelectionDAG &DAG) const { unsigned VecLen = Values.size(); MachineFunction &MF = DAG.getMachineFunction(); MVT ElemTy = VecTy.getVectorElementType(); unsigned ElemWidth = ElemTy.getSizeInBits(); unsigned HwLen = Subtarget.getVectorLength(); - SmallVector<ConstantInt*, 128> Consts(VecLen); - bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts); - if (AllConst) { - if (llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); })) - return getZero(dl, VecTy, DAG); - - ArrayRef<Constant*> Tmp((Constant**)Consts.begin(), - (Constant**)Consts.end()); - Constant *CV = ConstantVector::get(Tmp); - unsigned Align = HwLen; - SDValue CP = LowerConstantPool(DAG.getConstantPool(CV, VecTy, Align), DAG); - return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP, - MachinePointerInfo::getConstantPool(MF), Align); - } - unsigned ElemSize = ElemWidth / 8; assert(ElemSize*VecLen == HwLen); SmallVector<SDValue,32> Words; @@ -187,12 +364,47 @@ HexagonTargetLowering::buildHvxVectorSingle(ArrayRef<SDValue> Values, Words.assign(Values.begin(), Values.end()); } + unsigned NumWords = Words.size(); + bool IsSplat = true, IsUndef = true; + SDValue SplatV; + for (unsigned i = 0; i != NumWords && IsSplat; ++i) { + if (isUndef(Words[i])) + continue; + IsUndef = false; + if (!SplatV.getNode()) + SplatV = Words[i]; + else if (SplatV != Words[i]) + IsSplat = false; + } + if (IsUndef) + return DAG.getUNDEF(VecTy); + if (IsSplat) { + assert(SplatV.getNode()); + auto *IdxN = dyn_cast<ConstantSDNode>(SplatV.getNode()); + if (IdxN && IdxN->isNullValue()) + return getZero(dl, VecTy, DAG); + return DAG.getNode(HexagonISD::VSPLATW, dl, VecTy, SplatV); + } + + // Delay recognizing constant vectors until here, so that we can generate + // a vsplat. + SmallVector<ConstantInt*, 128> Consts(VecLen); + bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts); + if (AllConst) { + ArrayRef<Constant*> Tmp((Constant**)Consts.begin(), + (Constant**)Consts.end()); + Constant *CV = ConstantVector::get(Tmp); + unsigned Align = HwLen; + SDValue CP = LowerConstantPool(DAG.getConstantPool(CV, VecTy, Align), DAG); + return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP, + MachinePointerInfo::getConstantPool(MF), Align); + } + // Construct two halves in parallel, then or them together. assert(4*Words.size() == Subtarget.getVectorLength()); - SDValue HalfV0 = getNode(Hexagon::V6_vd0, dl, VecTy, {}, DAG); - SDValue HalfV1 = getNode(Hexagon::V6_vd0, dl, VecTy, {}, DAG); + SDValue HalfV0 = getInstr(Hexagon::V6_vd0, dl, VecTy, {}, DAG); + SDValue HalfV1 = getInstr(Hexagon::V6_vd0, dl, VecTy, {}, DAG); SDValue S = DAG.getConstant(4, dl, MVT::i32); - unsigned NumWords = Words.size(); for (unsigned i = 0; i != NumWords/2; ++i) { SDValue N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {HalfV0, Words[i]}); @@ -209,6 +421,95 @@ HexagonTargetLowering::buildHvxVectorSingle(ArrayRef<SDValue> Values, } SDValue +HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl, + unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const { + MVT PredTy = ty(PredV); + unsigned HwLen = Subtarget.getVectorLength(); + MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); + + if (Subtarget.isHVXVectorType(PredTy, true)) { + // Move the vector predicate SubV to a vector register, and scale it + // down to match the representation (bytes per type element) that VecV + // uses. The scaling down will pick every 2nd or 4th (every Scale-th + // in general) element and put them at the front of the resulting + // vector. This subvector will then be inserted into the Q2V of VecV. + // To avoid having an operation that generates an illegal type (short + // vector), generate a full size vector. + // + SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV); + SmallVector<int,128> Mask(HwLen); + // Scale = BitBytes(PredV) / Given BitBytes. + unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes); + unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes; + + for (unsigned i = 0; i != HwLen; ++i) { + unsigned Num = i % Scale; + unsigned Off = i / Scale; + Mask[BlockLen*Num + Off] = i; + } + SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask); + if (!ZeroFill) + return S; + // Fill the bytes beyond BlockLen with 0s. + MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); + SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, + {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG); + SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q); + return DAG.getNode(ISD::AND, dl, ByteTy, S, M); + } + + // Make sure that this is a valid scalar predicate. + assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1); + + unsigned Bytes = 8 / PredTy.getVectorNumElements(); + SmallVector<SDValue,4> Words[2]; + unsigned IdxW = 0; + + auto Lo32 = [&DAG, &dl] (SDValue P) { + return DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, P); + }; + auto Hi32 = [&DAG, &dl] (SDValue P) { + return DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, P); + }; + + SDValue W0 = isUndef(PredV) + ? DAG.getUNDEF(MVT::i64) + : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV); + Words[IdxW].push_back(Hi32(W0)); + Words[IdxW].push_back(Lo32(W0)); + + while (Bytes < BitBytes) { + IdxW ^= 1; + Words[IdxW].clear(); + + if (Bytes < 4) { + for (const SDValue &W : Words[IdxW ^ 1]) { + SDValue T = expandPredicate(W, dl, DAG); + Words[IdxW].push_back(Hi32(T)); + Words[IdxW].push_back(Lo32(T)); + } + } else { + for (const SDValue &W : Words[IdxW ^ 1]) { + Words[IdxW].push_back(W); + Words[IdxW].push_back(W); + } + } + Bytes *= 2; + } + + assert(Bytes == BitBytes); + + SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy); + SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32); + for (const SDValue &W : Words[IdxW]) { + Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4); + Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W); + } + + return Vec; +} + +SDValue HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values, const SDLoc &dl, MVT VecTy, SelectionDAG &DAG) const { @@ -218,6 +519,18 @@ HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values, unsigned HwLen = Subtarget.getVectorLength(); assert(VecLen <= HwLen || VecLen == 8*HwLen); SmallVector<SDValue,128> Bytes; + bool AllT = true, AllF = true; + + auto IsTrue = [] (SDValue V) { + if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode())) + return !N->isNullValue(); + return false; + }; + auto IsFalse = [] (SDValue V) { + if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode())) + return N->isNullValue(); + return false; + }; if (VecLen <= HwLen) { // In the hardware, each bit of a vector predicate corresponds to a byte @@ -226,8 +539,11 @@ HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values, assert(HwLen % VecLen == 0); unsigned BitBytes = HwLen / VecLen; for (SDValue V : Values) { + AllT &= IsTrue(V); + AllF &= IsFalse(V); + SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8) - : DAG.getConstant(0, dl, MVT::i8); + : DAG.getUNDEF(MVT::i8); for (unsigned B = 0; B != BitBytes; ++B) Bytes.push_back(Ext); } @@ -243,8 +559,11 @@ HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values, break; } SDValue F = Values[I+B]; + AllT &= IsTrue(F); + AllF &= IsFalse(F); + SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8) - : DAG.getConstant(0, dl, MVT::i8); + : DAG.getUNDEF(MVT::i8); Bytes.push_back(Ext); // Verify that the rest of values in the group are the same as the // first. @@ -253,53 +572,25 @@ HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values, } } - MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); - SDValue ByteVec = buildHvxVectorSingle(Bytes, dl, ByteTy, DAG); - SDValue Cmp = DAG.getSetCC(dl, VecTy, ByteVec, getZero(dl, ByteTy, DAG), - ISD::SETUGT); - return Cmp; -} - -SDValue -HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) - const { - const SDLoc &dl(Op); - MVT VecTy = ty(Op); - - unsigned Size = Op.getNumOperands(); - SmallVector<SDValue,128> Ops; - for (unsigned i = 0; i != Size; ++i) - Ops.push_back(Op.getOperand(i)); - - if (VecTy.getVectorElementType() == MVT::i1) - return buildHvxVectorPred(Ops, dl, VecTy, DAG); + if (AllT) + return DAG.getNode(HexagonISD::QTRUE, dl, VecTy); + if (AllF) + return DAG.getNode(HexagonISD::QFALSE, dl, VecTy); - if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) { - ArrayRef<SDValue> A(Ops); - MVT SingleTy = typeSplit(VecTy).first; - SDValue V0 = buildHvxVectorSingle(A.take_front(Size/2), dl, SingleTy, DAG); - SDValue V1 = buildHvxVectorSingle(A.drop_front(Size/2), dl, SingleTy, DAG); - return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1); - } - - return buildHvxVectorSingle(Ops, dl, VecTy, DAG); + MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); + SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG); + return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec); } SDValue -HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) - const { - // Change the type of the extracted element to i32. - SDValue VecV = Op.getOperand(0); +HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV, + const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { MVT ElemTy = ty(VecV).getVectorElementType(); + unsigned ElemWidth = ElemTy.getSizeInBits(); assert(ElemWidth >= 8 && ElemWidth <= 32); (void)ElemWidth; - const SDLoc &dl(Op); - SDValue IdxV = Op.getOperand(1); - if (ty(IdxV) != MVT::i32) - IdxV = DAG.getBitcast(MVT::i32, IdxV); - SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG); SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {VecV, ByteIdx}); @@ -316,13 +607,29 @@ HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) } SDValue -HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) - const { - const SDLoc &dl(Op); - SDValue VecV = Op.getOperand(0); - SDValue ValV = Op.getOperand(1); - SDValue IdxV = Op.getOperand(2); +HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV, + const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { + // Implement other return types if necessary. + assert(ResTy == MVT::i1); + + unsigned HwLen = Subtarget.getVectorLength(); + MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); + SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); + + unsigned Scale = HwLen / ty(VecV).getVectorNumElements(); + SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32); + IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV); + + SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG); + SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32); + return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG); +} + +SDValue +HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV, + SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const { MVT ElemTy = ty(VecV).getVectorElementType(); + unsigned ElemWidth = ElemTy.getSizeInBits(); assert(ElemWidth >= 8 && ElemWidth <= 32); (void)ElemWidth; @@ -336,7 +643,7 @@ HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV}); SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV}); SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32, - {DAG.getConstant(HwLen/4, dl, MVT::i32), MaskV}); + {DAG.getConstant(HwLen, dl, MVT::i32), MaskV}); SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV}); return TorV; }; @@ -349,9 +656,8 @@ HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) // 1. Extract the existing word from the target vector. SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32, {ByteIdx, DAG.getConstant(2, dl, MVT::i32)}); - SDValue Ex0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - {opCastElem(VecV, MVT::i32, DAG), WordIdx}); - SDValue Ext = LowerHvxExtractElement(Ex0, DAG); + SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx, + dl, MVT::i32, DAG); // 2. Treating the extracted word as a 32-bit vector, insert the given // value into it. @@ -365,55 +671,531 @@ HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) } SDValue +HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV, + SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const { + unsigned HwLen = Subtarget.getVectorLength(); + MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); + SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); + + unsigned Scale = HwLen / ty(VecV).getVectorNumElements(); + SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32); + IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV); + ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV); + + SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG); + return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV); +} + +SDValue +HexagonTargetLowering::extractHvxSubvectorReg(SDValue VecV, SDValue IdxV, + const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { + MVT VecTy = ty(VecV); + unsigned HwLen = Subtarget.getVectorLength(); + unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue(); + MVT ElemTy = VecTy.getVectorElementType(); + unsigned ElemWidth = ElemTy.getSizeInBits(); + + // If the source vector is a vector pair, get the single vector containing + // the subvector of interest. The subvector will never overlap two single + // vectors. + if (isHvxPairTy(VecTy)) { + unsigned SubIdx; + if (Idx * ElemWidth >= 8*HwLen) { + SubIdx = Hexagon::vsub_hi; + Idx -= VecTy.getVectorNumElements() / 2; + } else { + SubIdx = Hexagon::vsub_lo; + } + VecTy = typeSplit(VecTy).first; + VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV); + if (VecTy == ResTy) + return VecV; + } + + // The only meaningful subvectors of a single HVX vector are those that + // fit in a scalar register. + assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64); + + MVT WordTy = tyVector(VecTy, MVT::i32); + SDValue WordVec = DAG.getBitcast(WordTy, VecV); + unsigned WordIdx = (Idx*ElemWidth) / 32; + + SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32); + SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG); + if (ResTy.getSizeInBits() == 32) + return DAG.getBitcast(ResTy, W0); + + SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32); + SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG); + SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, {W1, W0}); + return DAG.getBitcast(ResTy, WW); +} + +SDValue +HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV, + const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { + MVT VecTy = ty(VecV); + unsigned HwLen = Subtarget.getVectorLength(); + MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); + SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); + // IdxV is required to be a constant. + unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue(); + + unsigned ResLen = ResTy.getVectorNumElements(); + unsigned BitBytes = HwLen / VecTy.getVectorNumElements(); + unsigned Offset = Idx * BitBytes; + SDValue Undef = DAG.getUNDEF(ByteTy); + SmallVector<int,128> Mask; + + if (Subtarget.isHVXVectorType(ResTy, true)) { + // Converting between two vector predicates. Since the result is shorter + // than the source, it will correspond to a vector predicate with the + // relevant bits replicated. The replication count is the ratio of the + // source and target vector lengths. + unsigned Rep = VecTy.getVectorNumElements() / ResLen; + assert(isPowerOf2_32(Rep) && HwLen % Rep == 0); + for (unsigned i = 0; i != HwLen/Rep; ++i) { + for (unsigned j = 0; j != Rep; ++j) + Mask.push_back(i + Offset); + } + SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask); + return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV); + } + + // Converting between a vector predicate and a scalar predicate. In the + // vector predicate, a group of BitBytes bits will correspond to a single + // i1 element of the source vector type. Those bits will all have the same + // value. The same will be true for ByteVec, where each byte corresponds + // to a bit in the vector predicate. + // The algorithm is to traverse the ByteVec, going over the i1 values from + // the source vector, and generate the corresponding representation in an + // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the + // elements so that the interesting 8 bytes will be in the low end of the + // vector. + unsigned Rep = 8 / ResLen; + // Make sure the output fill the entire vector register, so repeat the + // 8-byte groups as many times as necessary. + for (unsigned r = 0; r != HwLen/ResLen; ++r) { + // This will generate the indexes of the 8 interesting bytes. + for (unsigned i = 0; i != ResLen; ++i) { + for (unsigned j = 0; j != Rep; ++j) + Mask.push_back(Offset + i*BitBytes); + } + } + + SDValue Zero = getZero(dl, MVT::i32, DAG); + SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask); + // Combine the two low words from ShuffV into a v8i8, and byte-compare + // them against 0. + SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero}); + SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, + {ShuffV, DAG.getConstant(4, dl, MVT::i32)}); + SDValue Vec64 = DAG.getNode(HexagonISD::COMBINE, dl, MVT::v8i8, {W1, W0}); + return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy, + {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG); +} + +SDValue +HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV, + SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const { + MVT VecTy = ty(VecV); + MVT SubTy = ty(SubV); + unsigned HwLen = Subtarget.getVectorLength(); + MVT ElemTy = VecTy.getVectorElementType(); + unsigned ElemWidth = ElemTy.getSizeInBits(); + + bool IsPair = isHvxPairTy(VecTy); + MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth); + // The two single vectors that VecV consists of, if it's a pair. + SDValue V0, V1; + SDValue SingleV = VecV; + SDValue PickHi; + + if (IsPair) { + V0 = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, VecV); + V1 = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, VecV); + + SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(), + dl, MVT::i32); + PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT); + if (isHvxSingleTy(SubTy)) { + if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) { + unsigned Idx = CN->getZExtValue(); + assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2); + unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi; + return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV); + } + // If IdxV is not a constant, generate the two variants: with the + // SubV as the high and as the low subregister, and select the right + // pair based on the IdxV. + SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1}); + SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV}); + return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo); + } + // The subvector being inserted must be entirely contained in one of + // the vectors V0 or V1. Set SingleV to the correct one, and update + // IdxV to be the index relative to the beginning of that vector. + SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV); + IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV); + SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0); + } + + // The only meaningful subvectors of a single HVX vector are those that + // fit in a scalar register. + assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64); + // Convert IdxV to be index in bytes. + auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode()); + if (!IdxN || !IdxN->isNullValue()) { + IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, + DAG.getConstant(ElemWidth/8, dl, MVT::i32)); + SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV); + } + // When inserting a single word, the rotation back to the original position + // would be by HwLen-Idx, but if two words are inserted, it will need to be + // by (HwLen-4)-Idx. + unsigned RolBase = HwLen; + if (VecTy.getSizeInBits() == 32) { + SDValue V = DAG.getBitcast(MVT::i32, SubV); + SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, V); + } else { + SDValue V = DAG.getBitcast(MVT::i64, SubV); + SDValue R0 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, V); + SDValue R1 = DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, V); + SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0); + SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, + DAG.getConstant(4, dl, MVT::i32)); + SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1); + RolBase = HwLen-4; + } + // If the vector wasn't ror'ed, don't ror it back. + if (RolBase != 4 || !IdxN || !IdxN->isNullValue()) { + SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant(RolBase, dl, MVT::i32), IdxV); + SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV); + } + + if (IsPair) { + SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1}); + SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV}); + return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo); + } + return SingleV; +} + +SDValue +HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV, + SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const { + MVT VecTy = ty(VecV); + MVT SubTy = ty(SubV); + assert(Subtarget.isHVXVectorType(VecTy, true)); + // VecV is an HVX vector predicate. SubV may be either an HVX vector + // predicate as well, or it can be a scalar predicate. + + unsigned VecLen = VecTy.getVectorNumElements(); + unsigned HwLen = Subtarget.getVectorLength(); + assert(HwLen % VecLen == 0 && "Unexpected vector type"); + + unsigned Scale = VecLen / SubTy.getVectorNumElements(); + unsigned BitBytes = HwLen / VecLen; + unsigned BlockLen = HwLen / Scale; + + MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); + SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); + SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG); + SDValue ByteIdx; + + auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode()); + if (!IdxN || !IdxN->isNullValue()) { + ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, + DAG.getConstant(BitBytes, dl, MVT::i32)); + ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx); + } + + // ByteVec is the target vector VecV rotated in such a way that the + // subvector should be inserted at index 0. Generate a predicate mask + // and use vmux to do the insertion. + MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); + SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, + {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG); + ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG); + // Rotate ByteVec back, and convert to a vector predicate. + if (!IdxN || !IdxN->isNullValue()) { + SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32); + SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx); + ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi); + } + return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec); +} + +SDValue +HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl, + MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const { + // Sign- and any-extending of a vector predicate to a vector register is + // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and + // a vector of 1s (where the 1s are of type matching the vector type). + assert(Subtarget.isHVXVectorType(ResTy)); + if (!ZeroExt) + return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV); + + assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements()); + SDValue True = DAG.getNode(HexagonISD::VSPLAT, dl, ResTy, + DAG.getConstant(1, dl, MVT::i32)); + SDValue False = getZero(dl, ResTy, DAG); + return DAG.getSelect(dl, ResTy, VecV, True, False); +} + +SDValue +HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) + const { + const SDLoc &dl(Op); + MVT VecTy = ty(Op); + + unsigned Size = Op.getNumOperands(); + SmallVector<SDValue,128> Ops; + for (unsigned i = 0; i != Size; ++i) + Ops.push_back(Op.getOperand(i)); + + if (VecTy.getVectorElementType() == MVT::i1) + return buildHvxVectorPred(Ops, dl, VecTy, DAG); + + if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) { + ArrayRef<SDValue> A(Ops); + MVT SingleTy = typeSplit(VecTy).first; + SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG); + SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1); + } + + return buildHvxVectorReg(Ops, dl, VecTy, DAG); +} + +SDValue +HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) + const { + // Vector concatenation of two integer (non-bool) vectors does not need + // special lowering. Custom-lower concats of bool vectors and expand + // concats of more than 2 vectors. + MVT VecTy = ty(Op); + const SDLoc &dl(Op); + unsigned NumOp = Op.getNumOperands(); + if (VecTy.getVectorElementType() != MVT::i1) { + if (NumOp == 2) + return Op; + // Expand the other cases into a build-vector. + SmallVector<SDValue,8> Elems; + for (SDValue V : Op.getNode()->ops()) + DAG.ExtractVectorElements(V, Elems); + // A vector of i16 will be broken up into a build_vector of i16's. + // This is a problem, since at the time of operation legalization, + // all operations are expected to be type-legalized, and i16 is not + // a legal type. If any of the extracted elements is not of a valid + // type, sign-extend it to a valid one. + for (unsigned i = 0, e = Elems.size(); i != e; ++i) { + SDValue V = Elems[i]; + MVT Ty = ty(V); + if (!isTypeLegal(Ty)) { + EVT NTy = getTypeToTransformTo(*DAG.getContext(), Ty); + if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + Elems[i] = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy, + V.getOperand(0), V.getOperand(1)), + DAG.getValueType(Ty)); + continue; + } + // A few less complicated cases. + if (V.getOpcode() == ISD::Constant) + Elems[i] = DAG.getSExtOrTrunc(V, dl, NTy); + else if (V.isUndef()) + Elems[i] = DAG.getUNDEF(NTy); + else + llvm_unreachable("Unexpected vector element"); + } + } + return DAG.getBuildVector(VecTy, dl, Elems); + } + + assert(VecTy.getVectorElementType() == MVT::i1); + unsigned HwLen = Subtarget.getVectorLength(); + assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0); + + SDValue Op0 = Op.getOperand(0); + + // If the operands are HVX types (i.e. not scalar predicates), then + // defer the concatenation, and create QCAT instead. + if (Subtarget.isHVXVectorType(ty(Op0), true)) { + if (NumOp == 2) + return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1)); + + ArrayRef<SDUse> U(Op.getNode()->ops()); + SmallVector<SDValue,4> SV(U.begin(), U.end()); + ArrayRef<SDValue> Ops(SV); + + MVT HalfTy = typeSplit(VecTy).first; + SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy, + Ops.take_front(NumOp/2)); + SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy, + Ops.take_back(NumOp/2)); + return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1); + } + + // Count how many bytes (in a vector register) each bit in VecTy + // corresponds to. + unsigned BitBytes = HwLen / VecTy.getVectorNumElements(); + + SmallVector<SDValue,8> Prefixes; + for (SDValue V : Op.getNode()->op_values()) { + SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG); + Prefixes.push_back(P); + } + + unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements(); + MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); + SDValue S = DAG.getConstant(InpLen*BitBytes, dl, MVT::i32); + SDValue Res = getZero(dl, ByteTy, DAG); + for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) { + Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S); + Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]); + } + return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res); +} + +SDValue +HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) + const { + // Change the type of the extracted element to i32. + SDValue VecV = Op.getOperand(0); + MVT ElemTy = ty(VecV).getVectorElementType(); + const SDLoc &dl(Op); + SDValue IdxV = Op.getOperand(1); + if (ElemTy == MVT::i1) + return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG); + + return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG); +} + +SDValue +HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) + const { + const SDLoc &dl(Op); + SDValue VecV = Op.getOperand(0); + SDValue ValV = Op.getOperand(1); + SDValue IdxV = Op.getOperand(2); + MVT ElemTy = ty(VecV).getVectorElementType(); + if (ElemTy == MVT::i1) + return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG); + + return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG); +} + +SDValue HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) const { SDValue SrcV = Op.getOperand(0); MVT SrcTy = ty(SrcV); - unsigned SrcElems = SrcTy.getVectorNumElements(); + MVT DstTy = ty(Op); SDValue IdxV = Op.getOperand(1); unsigned Idx = cast<ConstantSDNode>(IdxV.getNode())->getZExtValue(); - MVT DstTy = ty(Op); - assert(Idx == 0 || DstTy.getVectorNumElements() % Idx == 0); + assert(Idx % DstTy.getVectorNumElements() == 0); + (void)Idx; const SDLoc &dl(Op); - if (Idx == 0) - return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, DstTy, SrcV); - if (Idx == SrcElems/2) - return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, DstTy, SrcV); - return SDValue(); + + MVT ElemTy = SrcTy.getVectorElementType(); + if (ElemTy == MVT::i1) + return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG); + + return extractHvxSubvectorReg(SrcV, IdxV, dl, DstTy, DAG); } SDValue HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) const { - // Idx may be variable. + // Idx does not need to be a constant. + SDValue VecV = Op.getOperand(0); + SDValue ValV = Op.getOperand(1); SDValue IdxV = Op.getOperand(2); - auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode()); - if (!IdxN) - return SDValue(); - unsigned Idx = IdxN->getZExtValue(); - SDValue DstV = Op.getOperand(0); - SDValue SrcV = Op.getOperand(1); - MVT DstTy = ty(DstV); - MVT SrcTy = ty(SrcV); - unsigned DstElems = DstTy.getVectorNumElements(); - unsigned SrcElems = SrcTy.getVectorNumElements(); - if (2*SrcElems != DstElems) - return SDValue(); + const SDLoc &dl(Op); + MVT VecTy = ty(VecV); + MVT ElemTy = VecTy.getVectorElementType(); + if (ElemTy == MVT::i1) + return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG); + + return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG); +} + +SDValue +HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const { + // Lower any-extends of boolean vectors to sign-extends, since they + // translate directly to Q2V. Zero-extending could also be done equally + // fast, but Q2V is used/recognized in more places. + // For all other vectors, use zero-extend. + MVT ResTy = ty(Op); + SDValue InpV = Op.getOperand(0); + MVT ElemTy = ty(InpV).getVectorElementType(); + if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)) + return LowerHvxSignExt(Op, DAG); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV); +} + +SDValue +HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const { + MVT ResTy = ty(Op); + SDValue InpV = Op.getOperand(0); + MVT ElemTy = ty(InpV).getVectorElementType(); + if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)) + return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG); + return Op; +} + +SDValue +HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const { + MVT ResTy = ty(Op); + SDValue InpV = Op.getOperand(0); + MVT ElemTy = ty(InpV).getVectorElementType(); + if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)) + return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG); + return Op; +} +SDValue +HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const { + // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight): + // cttz(x) = bitwidth(x) - ctlz(~x & (x-1)) const SDLoc &dl(Op); - if (Idx == 0) - return DAG.getTargetInsertSubreg(Hexagon::vsub_lo, dl, DstTy, DstV, SrcV); - if (Idx == SrcElems) - return DAG.getTargetInsertSubreg(Hexagon::vsub_hi, dl, DstTy, DstV, SrcV); - return SDValue(); + MVT ResTy = ty(Op); + SDValue InpV = Op.getOperand(0); + assert(ResTy == ty(InpV)); + + // Calculate the vectors of 1 and bitwidth(x). + MVT ElemTy = ty(InpV).getVectorElementType(); + unsigned ElemWidth = ElemTy.getSizeInBits(); + // Using uint64_t because a shift by 32 can happen. + uint64_t Splat1 = 0, SplatW = 0; + assert(isPowerOf2_32(ElemWidth) && ElemWidth <= 32); + for (unsigned i = 0; i != 32/ElemWidth; ++i) { + Splat1 = (Splat1 << ElemWidth) | 1; + SplatW = (SplatW << ElemWidth) | ElemWidth; + } + SDValue Vec1 = DAG.getNode(HexagonISD::VSPLATW, dl, ResTy, + DAG.getConstant(uint32_t(Splat1), dl, MVT::i32)); + SDValue VecW = DAG.getNode(HexagonISD::VSPLATW, dl, ResTy, + DAG.getConstant(uint32_t(SplatW), dl, MVT::i32)); + SDValue VecN1 = DAG.getNode(HexagonISD::VSPLATW, dl, ResTy, + DAG.getConstant(-1, dl, MVT::i32)); + // Do not use DAG.getNOT, because that would create BUILD_VECTOR with + // a BITCAST. Here we can skip the BITCAST (so we don't have to handle + // it separately in custom combine or selection). + SDValue A = DAG.getNode(ISD::AND, dl, ResTy, + {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}), + DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})}); + return DAG.getNode(ISD::SUB, dl, ResTy, + {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)}); } SDValue HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const { MVT ResTy = ty(Op); - if (!ResTy.isVector()) - return SDValue(); + assert(ResTy.isVector() && isHvxSingleTy(ResTy)); const SDLoc &dl(Op); SmallVector<int,256> ShuffMask; @@ -423,18 +1205,14 @@ HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const { SDValue Vt = Op.getOperand(1); switch (ElemTy.SimpleTy) { - case MVT::i8: - case MVT::i16: { + case MVT::i8: { // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...). - // For i16, use V6_vmpyhv, which behaves in an analogous way to - // V6_vmpybv: results Lo and Hi are products of even/odd elements - // respectively. MVT ExtTy = typeExtElem(ResTy, 2); unsigned MpyOpc = ElemTy == MVT::i8 ? Hexagon::V6_vmpybv : Hexagon::V6_vmpyhv; - SDValue M = getNode(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG); + SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG); // Discard high halves of the resulting values, collect the low halves. for (unsigned I = 0; I < VecLen; I += 2) { @@ -442,18 +1220,24 @@ HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const { ShuffMask.push_back(I+VecLen); // Pick odd element. } VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG); - return getByteShuffle(dl, P.first, P.second, ShuffMask, DAG); + SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG); + return DAG.getBitcast(ResTy, BS); } + case MVT::i16: + // For i16 there is V6_vmpyih, which acts exactly like the MUL opcode. + // (There is also V6_vmpyhv, which behaves in an analogous way to + // V6_vmpybv.) + return getInstr(Hexagon::V6_vmpyih, dl, ResTy, {Vs, Vt}, DAG); case MVT::i32: { // Use the following sequence for signed word multiply: // T0 = V6_vmpyiowh Vs, Vt // T1 = V6_vaslw T0, 16 // T2 = V6_vmpyiewuh_acc T1, Vs, Vt SDValue S16 = DAG.getConstant(16, dl, MVT::i32); - SDValue T0 = getNode(Hexagon::V6_vmpyiowh, dl, ResTy, {Vs, Vt}, DAG); - SDValue T1 = getNode(Hexagon::V6_vaslw, dl, ResTy, {T0, S16}, DAG); - SDValue T2 = getNode(Hexagon::V6_vmpyiewuh_acc, dl, ResTy, - {T1, Vs, Vt}, DAG); + SDValue T0 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {Vs, Vt}, DAG); + SDValue T1 = getInstr(Hexagon::V6_vaslw, dl, ResTy, {T0, S16}, DAG); + SDValue T2 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy, + {T1, Vs, Vt}, DAG); return T2; } default: @@ -463,78 +1247,109 @@ HexagonTargetLowering::LowerHvxMul(SDValue Op, SelectionDAG &DAG) const { } SDValue -HexagonTargetLowering::LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const { - MVT VecTy = ty(Op.getOperand(0)); - assert(VecTy == ty(Op.getOperand(1))); - - SDValue Cmp = Op.getOperand(2); - ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get(); - bool Negate = false, Swap = false; - - // HVX has instructions for SETEQ, SETGT, SETUGT. The other comparisons - // can be arranged as operand-swapped/negated versions of these. Since - // the generated code will have the original CC expressed as - // (negate (swap-op NewCmp)), - // the condition code for the NewCmp should be calculated from the original - // CC by applying these operations in the reverse order. - // - // This could also be done through setCondCodeAction, but for negation it - // uses a xor with a vector of -1s, which it obtains from BUILD_VECTOR. - // That is far too expensive for what can be done with a single instruction. - - switch (CC) { - case ISD::SETNE: // !eq - case ISD::SETLE: // !gt - case ISD::SETGE: // !lt - case ISD::SETULE: // !ugt - case ISD::SETUGE: // !ult - CC = ISD::getSetCCInverse(CC, true); - Negate = true; - break; - default: - break; +HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const { + MVT ResTy = ty(Op); + assert(ResTy.isVector()); + const SDLoc &dl(Op); + SmallVector<int,256> ShuffMask; + + MVT ElemTy = ResTy.getVectorElementType(); + unsigned VecLen = ResTy.getVectorNumElements(); + SDValue Vs = Op.getOperand(0); + SDValue Vt = Op.getOperand(1); + bool IsSigned = Op.getOpcode() == ISD::MULHS; + + if (ElemTy == MVT::i8 || ElemTy == MVT::i16) { + // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), + // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, + // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...). + // For i16, use V6_vmpyhv, which behaves in an analogous way to + // V6_vmpybv: results Lo and Hi are products of even/odd elements + // respectively. + MVT ExtTy = typeExtElem(ResTy, 2); + unsigned MpyOpc = ElemTy == MVT::i8 + ? (IsSigned ? Hexagon::V6_vmpybv : Hexagon::V6_vmpyubv) + : (IsSigned ? Hexagon::V6_vmpyhv : Hexagon::V6_vmpyuhv); + SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG); + + // Discard low halves of the resulting values, collect the high halves. + for (unsigned I = 0; I < VecLen; I += 2) { + ShuffMask.push_back(I+1); // Pick even element. + ShuffMask.push_back(I+VecLen+1); // Pick odd element. + } + VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG); + SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG); + return DAG.getBitcast(ResTy, BS); } - switch (CC) { - case ISD::SETLT: // swap gt - case ISD::SETULT: // swap ugt - CC = ISD::getSetCCSwappedOperands(CC); - Swap = true; - break; - default: - break; + assert(ElemTy == MVT::i32); + SDValue S16 = DAG.getConstant(16, dl, MVT::i32); + + if (IsSigned) { + // mulhs(Vs,Vt) = + // = [(Hi(Vs)*2^16 + Lo(Vs)) *s (Hi(Vt)*2^16 + Lo(Vt))] >> 32 + // = [Hi(Vs)*2^16 *s Hi(Vt)*2^16 + Hi(Vs) *su Lo(Vt)*2^16 + // + Lo(Vs) *us (Hi(Vt)*2^16 + Lo(Vt))] >> 32 + // = [Hi(Vs) *s Hi(Vt)*2^32 + Hi(Vs) *su Lo(Vt)*2^16 + // + Lo(Vs) *us Vt] >> 32 + // The low half of Lo(Vs)*Lo(Vt) will be discarded (it's not added to + // anything, so it cannot produce any carry over to higher bits), + // so everything in [] can be shifted by 16 without loss of precision. + // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + Lo(Vs)*Vt >> 16] >> 16 + // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + V6_vmpyewuh(Vs,Vt)] >> 16 + // Denote Hi(Vs) = Vs': + // = [Vs'*s Hi(Vt)*2^16 + Vs' *su Lo(Vt) + V6_vmpyewuh(Vt,Vs)] >> 16 + // = Vs'*s Hi(Vt) + (V6_vmpyiewuh(Vs',Vt) + V6_vmpyewuh(Vt,Vs)) >> 16 + SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, ResTy, {Vt, Vs}, DAG); + // Get Vs': + SDValue S0 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {Vs, S16}, DAG); + SDValue T1 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy, + {T0, S0, Vt}, DAG); + // Shift by 16: + SDValue S2 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {T1, S16}, DAG); + // Get Vs'*Hi(Vt): + SDValue T2 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {S0, Vt}, DAG); + // Add: + SDValue T3 = DAG.getNode(ISD::ADD, dl, ResTy, {S2, T2}); + return T3; } - assert(CC == ISD::SETEQ || CC == ISD::SETGT || CC == ISD::SETUGT); + // Unsigned mulhw. (Would expansion using signed mulhw be better?) - MVT ElemTy = VecTy.getVectorElementType(); - unsigned ElemWidth = ElemTy.getSizeInBits(); - assert(isPowerOf2_32(ElemWidth)); - - auto getIdx = [] (unsigned Code) { - static const unsigned Idx[] = { ISD::SETEQ, ISD::SETGT, ISD::SETUGT }; - for (unsigned I = 0, E = array_lengthof(Idx); I != E; ++I) - if (Code == Idx[I]) - return I; - llvm_unreachable("Unhandled CondCode"); + auto LoVec = [&DAG,ResTy,dl] (SDValue Pair) { + return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, ResTy, Pair); }; - - static unsigned OpcTable[3][3] = { - // SETEQ SETGT, SETUGT - /* Byte */ { Hexagon::V6_veqb, Hexagon::V6_vgtb, Hexagon::V6_vgtub }, - /* Half */ { Hexagon::V6_veqh, Hexagon::V6_vgth, Hexagon::V6_vgtuh }, - /* Word */ { Hexagon::V6_veqw, Hexagon::V6_vgtw, Hexagon::V6_vgtuw } + auto HiVec = [&DAG,ResTy,dl] (SDValue Pair) { + return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, ResTy, Pair); }; - unsigned CmpOpc = OpcTable[Log2_32(ElemWidth)-3][getIdx(CC)]; - - MVT ResTy = ty(Op); - const SDLoc &dl(Op); - SDValue OpL = Swap ? Op.getOperand(1) : Op.getOperand(0); - SDValue OpR = Swap ? Op.getOperand(0) : Op.getOperand(1); - SDValue CmpV = getNode(CmpOpc, dl, ResTy, {OpL, OpR}, DAG); - return Negate ? getNode(Hexagon::V6_pred_not, dl, ResTy, {CmpV}, DAG) - : CmpV; + MVT PairTy = typeJoin({ResTy, ResTy}); + SDValue P = getInstr(Hexagon::V6_lvsplatw, dl, ResTy, + {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG); + // Multiply-unsigned halfwords: + // LoVec = Vs.uh[2i] * Vt.uh[2i], + // HiVec = Vs.uh[2i+1] * Vt.uh[2i+1] + SDValue T0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, Vt}, DAG); + // The low halves in the LoVec of the pair can be discarded. They are + // not added to anything (in the full-precision product), so they cannot + // produce a carry into the higher bits. + SDValue T1 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {LoVec(T0), S16}, DAG); + // Swap low and high halves in Vt, and do the halfword multiplication + // to get products Vs.uh[2i] * Vt.uh[2i+1] and Vs.uh[2i+1] * Vt.uh[2i]. + SDValue D0 = getInstr(Hexagon::V6_vdelta, dl, ResTy, {Vt, P}, DAG); + SDValue T2 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, D0}, DAG); + // T2 has mixed products of halfwords: Lo(Vt)*Hi(Vs) and Hi(Vt)*Lo(Vs). + // These products are words, but cannot be added directly because the + // sums could overflow. Add these products, by halfwords, where each sum + // of a pair of halfwords gives a word. + SDValue T3 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, + {LoVec(T2), HiVec(T2)}, DAG); + // Add the high halfwords from the products of the low halfwords. + SDValue T4 = DAG.getNode(ISD::ADD, dl, ResTy, {T1, LoVec(T3)}); + SDValue T5 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {T4, S16}, DAG); + SDValue T6 = DAG.getNode(ISD::ADD, dl, ResTy, {HiVec(T0), HiVec(T3)}); + SDValue T7 = DAG.getNode(ISD::ADD, dl, ResTy, {T5, T6}); + return T7; } SDValue @@ -543,3 +1358,163 @@ HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG); return DAG.getZeroExtendVectorInReg(Op.getOperand(0), SDLoc(Op), ty(Op)); } + +SDValue +HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const { + if (SDValue S = getVectorShiftByInt(Op, DAG)) + return S; + return Op; +} + +SDValue +HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const { + assert(!Op.isMachineOpcode()); + SmallVector<SDValue,2> OpsL, OpsH; + const SDLoc &dl(Op); + + auto SplitVTNode = [&DAG,this] (const VTSDNode *N) { + MVT Ty = typeSplit(N->getVT().getSimpleVT()).first; + SDValue TV = DAG.getValueType(Ty); + return std::make_pair(TV, TV); + }; + + for (SDValue A : Op.getNode()->ops()) { + VectorPair P = Subtarget.isHVXVectorType(ty(A), true) + ? opSplit(A, dl, DAG) + : std::make_pair(A, A); + // Special case for type operand. + if (Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { + if (const auto *N = dyn_cast<const VTSDNode>(A.getNode())) + P = SplitVTNode(N); + } + OpsL.push_back(P.first); + OpsH.push_back(P.second); + } + + MVT ResTy = ty(Op); + MVT HalfTy = typeSplit(ResTy).first; + SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL); + SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH); + SDValue S = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, L, H); + return S; +} + +SDValue +HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const { + LSBaseSDNode *BN = cast<LSBaseSDNode>(Op.getNode()); + assert(BN->isUnindexed()); + MVT MemTy = BN->getMemoryVT().getSimpleVT(); + if (!isHvxPairTy(MemTy)) + return Op; + + const SDLoc &dl(Op); + unsigned HwLen = Subtarget.getVectorLength(); + MVT SingleTy = typeSplit(MemTy).first; + SDValue Chain = BN->getChain(); + SDValue Base0 = BN->getBasePtr(); + SDValue Base1 = DAG.getMemBasePlusOffset(Base0, HwLen, dl); + + MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr; + if (MachineMemOperand *MMO = BN->getMemOperand()) { + MachineFunction &MF = DAG.getMachineFunction(); + MOp0 = MF.getMachineMemOperand(MMO, 0, HwLen); + MOp1 = MF.getMachineMemOperand(MMO, HwLen, HwLen); + } + + unsigned MemOpc = BN->getOpcode(); + SDValue NewOp; + + if (MemOpc == ISD::LOAD) { + SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0); + SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1); + NewOp = DAG.getMergeValues( + { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1), + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Load0.getValue(1), Load1.getValue(1)) }, dl); + } else { + assert(MemOpc == ISD::STORE); + VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG); + SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0); + SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1); + NewOp = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1); + } + + return NewOp; +} + +SDValue +HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { + unsigned Opc = Op.getOpcode(); + bool IsPairOp = isHvxPairTy(ty(Op)) || + llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) { + return isHvxPairTy(ty(V)); + }); + + if (IsPairOp) { + switch (Opc) { + default: + break; + case ISD::LOAD: + case ISD::STORE: + return SplitHvxMemOp(Op, DAG); + case ISD::CTPOP: + case ISD::CTLZ: + case ISD::CTTZ: + case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SRA: + case ISD::SHL: + case ISD::SRL: + case ISD::SETCC: + case ISD::VSELECT: + case ISD::SIGN_EXTEND_INREG: + return SplitHvxPairOp(Op, DAG); + } + } + + switch (Opc) { + default: + break; + case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG); + case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG); + case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG); + case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG); + case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG); + + case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG); + case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG); + case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG); + case ISD::CTTZ: return LowerHvxCttz(Op, DAG); + case ISD::SRA: + case ISD::SHL: + case ISD::SRL: return LowerHvxShift(Op, DAG); + case ISD::MUL: return LowerHvxMul(Op, DAG); + case ISD::MULHS: + case ISD::MULHU: return LowerHvxMulh(Op, DAG); + case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG); + case ISD::SETCC: + case ISD::INTRINSIC_VOID: return Op; + // Unaligned loads will be handled by the default lowering. + case ISD::LOAD: return SDValue(); + } +#ifndef NDEBUG + Op.dumpr(&DAG); +#endif + llvm_unreachable("Unhandled HVX operation"); +} + +bool +HexagonTargetLowering::isHvxOperation(SDValue Op) const { + // If the type of the result, or any operand type are HVX vector types, + // this is an HVX operation. + return Subtarget.isHVXVectorType(ty(Op), true) || + llvm::any_of(Op.getNode()->ops(), + [this] (SDValue V) { + return Subtarget.isHVXVectorType(ty(V), true); + }); +} diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/lib/Target/Hexagon/HexagonInstrFormatsV60.td index 14bda0e0107d..1347a655353f 100644 --- a/lib/Target/Hexagon/HexagonInstrFormatsV60.td +++ b/lib/Target/Hexagon/HexagonInstrFormatsV60.td @@ -19,4 +19,4 @@ class CVI_VA_Resource<dag outs, dag ins, string asmstr, list<dag> pattern = [], string cstr = "", InstrItinClass itin = CVI_VA> : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA>, - OpcodeHexagon, Requires<[HasV60T, UseHVX]>; + OpcodeHexagon, Requires<[HasV60, UseHVX]>; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index b82a0157e81f..6019c7c5d024 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -34,7 +34,6 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" @@ -49,6 +48,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -134,7 +134,7 @@ static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB, MachineBasicBlock::const_instr_iterator MIE) { unsigned Count = 0; for (; MIB != MIE; ++MIB) { - if (!MIB->isDebugValue()) + if (!MIB->isDebugInstr()) ++Count; } return Count; @@ -144,9 +144,9 @@ static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB, /// On Hexagon, we have two instructions used to set-up the hardware loop /// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions /// to indicate the end of a loop. -static MachineInstr *findLoopInstr(MachineBasicBlock *BB, unsigned EndLoopOp, - MachineBasicBlock *TargetBB, - SmallPtrSet<MachineBasicBlock *, 8> &Visited) { +MachineInstr *HexagonInstrInfo::findLoopInstr(MachineBasicBlock *BB, + unsigned EndLoopOp, MachineBasicBlock *TargetBB, + SmallPtrSet<MachineBasicBlock *, 8> &Visited) const { unsigned LOOPi; unsigned LOOPr; if (EndLoopOp == Hexagon::ENDLOOP0) { @@ -240,41 +240,41 @@ static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) { unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const { switch (MI.getOpcode()) { - default: - break; - case Hexagon::L2_loadri_io: - case Hexagon::L2_loadrd_io: - case Hexagon::V6_vL32b_ai: - case Hexagon::V6_vL32b_nt_ai: - case Hexagon::V6_vL32Ub_ai: - case Hexagon::LDriw_pred: - case Hexagon::LDriw_mod: - case Hexagon::PS_vloadrq_ai: - case Hexagon::PS_vloadrw_ai: - case Hexagon::PS_vloadrw_nt_ai: { - const MachineOperand OpFI = MI.getOperand(1); - if (!OpFI.isFI()) - return 0; - const MachineOperand OpOff = MI.getOperand(2); - if (!OpOff.isImm() || OpOff.getImm() != 0) - return 0; - FrameIndex = OpFI.getIndex(); - return MI.getOperand(0).getReg(); - } + default: + break; + case Hexagon::L2_loadri_io: + case Hexagon::L2_loadrd_io: + case Hexagon::V6_vL32b_ai: + case Hexagon::V6_vL32b_nt_ai: + case Hexagon::V6_vL32Ub_ai: + case Hexagon::LDriw_pred: + case Hexagon::LDriw_ctr: + case Hexagon::PS_vloadrq_ai: + case Hexagon::PS_vloadrw_ai: + case Hexagon::PS_vloadrw_nt_ai: { + const MachineOperand OpFI = MI.getOperand(1); + if (!OpFI.isFI()) + return 0; + const MachineOperand OpOff = MI.getOperand(2); + if (!OpOff.isImm() || OpOff.getImm() != 0) + return 0; + FrameIndex = OpFI.getIndex(); + return MI.getOperand(0).getReg(); + } - case Hexagon::L2_ploadrit_io: - case Hexagon::L2_ploadrif_io: - case Hexagon::L2_ploadrdt_io: - case Hexagon::L2_ploadrdf_io: { - const MachineOperand OpFI = MI.getOperand(2); - if (!OpFI.isFI()) - return 0; - const MachineOperand OpOff = MI.getOperand(3); - if (!OpOff.isImm() || OpOff.getImm() != 0) - return 0; - FrameIndex = OpFI.getIndex(); - return MI.getOperand(0).getReg(); - } + case Hexagon::L2_ploadrit_io: + case Hexagon::L2_ploadrif_io: + case Hexagon::L2_ploadrdt_io: + case Hexagon::L2_ploadrdf_io: { + const MachineOperand OpFI = MI.getOperand(2); + if (!OpFI.isFI()) + return 0; + const MachineOperand OpOff = MI.getOperand(3); + if (!OpOff.isImm() || OpOff.getImm() != 0) + return 0; + FrameIndex = OpFI.getIndex(); + return MI.getOperand(0).getReg(); + } } return 0; @@ -288,48 +288,84 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const { switch (MI.getOpcode()) { - default: - break; - case Hexagon::S2_storerb_io: - case Hexagon::S2_storerh_io: - case Hexagon::S2_storeri_io: - case Hexagon::S2_storerd_io: - case Hexagon::V6_vS32b_ai: - case Hexagon::V6_vS32Ub_ai: - case Hexagon::STriw_pred: - case Hexagon::STriw_mod: - case Hexagon::PS_vstorerq_ai: - case Hexagon::PS_vstorerw_ai: { - const MachineOperand &OpFI = MI.getOperand(0); - if (!OpFI.isFI()) - return 0; - const MachineOperand &OpOff = MI.getOperand(1); - if (!OpOff.isImm() || OpOff.getImm() != 0) - return 0; - FrameIndex = OpFI.getIndex(); - return MI.getOperand(2).getReg(); + default: + break; + case Hexagon::S2_storerb_io: + case Hexagon::S2_storerh_io: + case Hexagon::S2_storeri_io: + case Hexagon::S2_storerd_io: + case Hexagon::V6_vS32b_ai: + case Hexagon::V6_vS32Ub_ai: + case Hexagon::STriw_pred: + case Hexagon::STriw_ctr: + case Hexagon::PS_vstorerq_ai: + case Hexagon::PS_vstorerw_ai: { + const MachineOperand &OpFI = MI.getOperand(0); + if (!OpFI.isFI()) + return 0; + const MachineOperand &OpOff = MI.getOperand(1); + if (!OpOff.isImm() || OpOff.getImm() != 0) + return 0; + FrameIndex = OpFI.getIndex(); + return MI.getOperand(2).getReg(); + } + + case Hexagon::S2_pstorerbt_io: + case Hexagon::S2_pstorerbf_io: + case Hexagon::S2_pstorerht_io: + case Hexagon::S2_pstorerhf_io: + case Hexagon::S2_pstorerit_io: + case Hexagon::S2_pstorerif_io: + case Hexagon::S2_pstorerdt_io: + case Hexagon::S2_pstorerdf_io: { + const MachineOperand &OpFI = MI.getOperand(1); + if (!OpFI.isFI()) + return 0; + const MachineOperand &OpOff = MI.getOperand(2); + if (!OpOff.isImm() || OpOff.getImm() != 0) + return 0; + FrameIndex = OpFI.getIndex(); + return MI.getOperand(3).getReg(); + } } - case Hexagon::S2_pstorerbt_io: - case Hexagon::S2_pstorerbf_io: - case Hexagon::S2_pstorerht_io: - case Hexagon::S2_pstorerhf_io: - case Hexagon::S2_pstorerit_io: - case Hexagon::S2_pstorerif_io: - case Hexagon::S2_pstorerdt_io: - case Hexagon::S2_pstorerdf_io: { - const MachineOperand &OpFI = MI.getOperand(1); - if (!OpFI.isFI()) - return 0; - const MachineOperand &OpOff = MI.getOperand(2); - if (!OpOff.isImm() || OpOff.getImm() != 0) - return 0; - FrameIndex = OpFI.getIndex(); - return MI.getOperand(3).getReg(); + return 0; +} + +/// This function checks if the instruction or bundle of instructions +/// has load from stack slot and returns frameindex and machine memory +/// operand of that instruction if true. +bool HexagonInstrInfo::hasLoadFromStackSlot(const MachineInstr &MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const { + if (MI.isBundle()) { + const MachineBasicBlock *MBB = MI.getParent(); + MachineBasicBlock::const_instr_iterator MII = MI.getIterator(); + for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII) + if (TargetInstrInfo::hasLoadFromStackSlot(*MII, MMO, FrameIndex)) + return true; + return false; } + + return TargetInstrInfo::hasLoadFromStackSlot(MI, MMO, FrameIndex); +} + +/// This function checks if the instruction or bundle of instructions +/// has store to stack slot and returns frameindex and machine memory +/// operand of that instruction if true. +bool HexagonInstrInfo::hasStoreToStackSlot(const MachineInstr &MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const { + if (MI.isBundle()) { + const MachineBasicBlock *MBB = MI.getParent(); + MachineBasicBlock::const_instr_iterator MII = MI.getIterator(); + for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII) + if (TargetInstrInfo::hasStoreToStackSlot(*MII, MMO, FrameIndex)) + return true; + return false; } - return 0; + return TargetInstrInfo::hasStoreToStackSlot(MI, MMO, FrameIndex); } /// This function can analyze one/two way branching only and should (mostly) be @@ -383,7 +419,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, I = MBB.instr_end(); --I; - while (I->isDebugValue()) { + while (I->isDebugInstr()) { if (I == MBB.instr_begin()) return false; --I; @@ -394,7 +430,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, // Delete the J2_jump if it's equivalent to a fall-through. if (AllowModify && JumpToBlock && MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { - DEBUG(dbgs() << "\nErasing the jump to successor block\n";); + LLVM_DEBUG(dbgs() << "\nErasing the jump to successor block\n";); I->eraseFromParent(); I = MBB.instr_end(); if (I == MBB.instr_begin()) @@ -463,8 +499,8 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, Cond.push_back(LastInst->getOperand(1)); return false; } - DEBUG(dbgs() << "\nCant analyze " << printMBBReference(MBB) - << " with one jump\n";); + LLVM_DEBUG(dbgs() << "\nCant analyze " << printMBBReference(MBB) + << " with one jump\n";); // Otherwise, don't know what this is. return true; } @@ -511,8 +547,8 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, FBB = LastInst->getOperand(0).getMBB(); return false; } - DEBUG(dbgs() << "\nCant analyze " << printMBBReference(MBB) - << " with two jumps";); + LLVM_DEBUG(dbgs() << "\nCant analyze " << printMBBReference(MBB) + << " with two jumps";); // Otherwise, can't handle this. return true; } @@ -521,12 +557,12 @@ unsigned HexagonInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const { assert(!BytesRemoved && "code size not handled"); - DEBUG(dbgs() << "\nRemoving branches out of " << printMBBReference(MBB)); + LLVM_DEBUG(dbgs() << "\nRemoving branches out of " << printMBBReference(MBB)); MachineBasicBlock::iterator I = MBB.end(); unsigned Count = 0; while (I != MBB.begin()) { --I; - if (I->isDebugValue()) + if (I->isDebugInstr()) continue; // Only removing branches from end of MBB. if (!I->isBranch()) @@ -593,7 +629,8 @@ unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB, // (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset) // (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset) unsigned Flags1 = getUndefRegState(Cond[1].isUndef()); - DEBUG(dbgs() << "\nInserting NVJump for " << printMBBReference(MBB);); + LLVM_DEBUG(dbgs() << "\nInserting NVJump for " + << printMBBReference(MBB);); if (Cond[2].isReg()) { unsigned Flags2 = getUndefRegState(Cond[2].isUndef()); BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1). @@ -864,7 +901,7 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::ModRegsRegClass.hasSubClassEq(RC)) { - BuildMI(MBB, I, DL, get(Hexagon::STriw_mod)) + BuildMI(MBB, I, DL, get(Hexagon::STriw_ctr)) .addFrameIndex(FI).addImm(0) .addReg(SrcReg, KillFlag).addMemOperand(MMO); } else if (Hexagon::HvxQRRegClass.hasSubClassEq(RC)) { @@ -926,7 +963,7 @@ void HexagonInstrInfo::loadRegFromStackSlot( BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::ModRegsRegClass.hasSubClassEq(RC)) { - BuildMI(MBB, I, DL, get(Hexagon::LDriw_mod), DestReg) + BuildMI(MBB, I, DL, get(Hexagon::LDriw_ctr), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO); } else if (Hexagon::HvxQRRegClass.hasSubClassEq(RC)) { BuildMI(MBB, I, DL, get(Hexagon::PS_vloadrq_ai), DestReg) @@ -980,6 +1017,20 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { DebugLoc DL = MI.getDebugLoc(); unsigned Opc = MI.getOpcode(); + auto RealCirc = [&](unsigned Opc, bool HasImm, unsigned MxOp) { + unsigned Mx = MI.getOperand(MxOp).getReg(); + unsigned CSx = (Mx == Hexagon::M0 ? Hexagon::CS0 : Hexagon::CS1); + BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrrcr), CSx) + .add(MI.getOperand((HasImm ? 5 : 4))); + auto MIB = BuildMI(MBB, MI, DL, get(Opc)).add(MI.getOperand(0)) + .add(MI.getOperand(1)).add(MI.getOperand(2)).add(MI.getOperand(3)); + if (HasImm) + MIB.add(MI.getOperand(4)); + MIB.addReg(CSx, RegState::Implicit); + MBB.erase(MI); + return true; + }; + switch (Opc) { case TargetOpcode::COPY: { MachineOperand &MD = MI.getOperand(0); @@ -1088,6 +1139,28 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MBB.erase(MI); return true; } + case Hexagon::PS_qtrue: { + BuildMI(MBB, MI, DL, get(Hexagon::V6_veqw), MI.getOperand(0).getReg()) + .addReg(Hexagon::V0, RegState::Undef) + .addReg(Hexagon::V0, RegState::Undef); + MBB.erase(MI); + return true; + } + case Hexagon::PS_qfalse: { + BuildMI(MBB, MI, DL, get(Hexagon::V6_vgtw), MI.getOperand(0).getReg()) + .addReg(Hexagon::V0, RegState::Undef) + .addReg(Hexagon::V0, RegState::Undef); + MBB.erase(MI); + return true; + } + case Hexagon::PS_vdd0: { + unsigned Vd = MI.getOperand(0).getReg(); + BuildMI(MBB, MI, DL, get(Hexagon::V6_vsubw_dv), Vd) + .addReg(Vd, RegState::Undef) + .addReg(Vd, RegState::Undef); + MBB.erase(MI); + return true; + } case Hexagon::PS_vmulw: { // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies. unsigned DstReg = MI.getOperand(0).getReg(); @@ -1344,6 +1417,50 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MBB.erase(MI); return true; + case Hexagon::PS_loadrub_pci: + return RealCirc(Hexagon::L2_loadrub_pci, /*HasImm*/true, /*MxOp*/4); + case Hexagon::PS_loadrb_pci: + return RealCirc(Hexagon::L2_loadrb_pci, /*HasImm*/true, /*MxOp*/4); + case Hexagon::PS_loadruh_pci: + return RealCirc(Hexagon::L2_loadruh_pci, /*HasImm*/true, /*MxOp*/4); + case Hexagon::PS_loadrh_pci: + return RealCirc(Hexagon::L2_loadrh_pci, /*HasImm*/true, /*MxOp*/4); + case Hexagon::PS_loadri_pci: + return RealCirc(Hexagon::L2_loadri_pci, /*HasImm*/true, /*MxOp*/4); + case Hexagon::PS_loadrd_pci: + return RealCirc(Hexagon::L2_loadrd_pci, /*HasImm*/true, /*MxOp*/4); + case Hexagon::PS_loadrub_pcr: + return RealCirc(Hexagon::L2_loadrub_pcr, /*HasImm*/false, /*MxOp*/3); + case Hexagon::PS_loadrb_pcr: + return RealCirc(Hexagon::L2_loadrb_pcr, /*HasImm*/false, /*MxOp*/3); + case Hexagon::PS_loadruh_pcr: + return RealCirc(Hexagon::L2_loadruh_pcr, /*HasImm*/false, /*MxOp*/3); + case Hexagon::PS_loadrh_pcr: + return RealCirc(Hexagon::L2_loadrh_pcr, /*HasImm*/false, /*MxOp*/3); + case Hexagon::PS_loadri_pcr: + return RealCirc(Hexagon::L2_loadri_pcr, /*HasImm*/false, /*MxOp*/3); + case Hexagon::PS_loadrd_pcr: + return RealCirc(Hexagon::L2_loadrd_pcr, /*HasImm*/false, /*MxOp*/3); + case Hexagon::PS_storerb_pci: + return RealCirc(Hexagon::S2_storerb_pci, /*HasImm*/true, /*MxOp*/3); + case Hexagon::PS_storerh_pci: + return RealCirc(Hexagon::S2_storerh_pci, /*HasImm*/true, /*MxOp*/3); + case Hexagon::PS_storerf_pci: + return RealCirc(Hexagon::S2_storerf_pci, /*HasImm*/true, /*MxOp*/3); + case Hexagon::PS_storeri_pci: + return RealCirc(Hexagon::S2_storeri_pci, /*HasImm*/true, /*MxOp*/3); + case Hexagon::PS_storerd_pci: + return RealCirc(Hexagon::S2_storerd_pci, /*HasImm*/true, /*MxOp*/3); + case Hexagon::PS_storerb_pcr: + return RealCirc(Hexagon::S2_storerb_pcr, /*HasImm*/false, /*MxOp*/2); + case Hexagon::PS_storerh_pcr: + return RealCirc(Hexagon::S2_storerh_pcr, /*HasImm*/false, /*MxOp*/2); + case Hexagon::PS_storerf_pcr: + return RealCirc(Hexagon::S2_storerf_pcr, /*HasImm*/false, /*MxOp*/2); + case Hexagon::PS_storeri_pcr: + return RealCirc(Hexagon::S2_storeri_pcr, /*HasImm*/false, /*MxOp*/2); + case Hexagon::PS_storerd_pcr: + return RealCirc(Hexagon::S2_storerd_pcr, /*HasImm*/false, /*MxOp*/2); } return false; @@ -1393,7 +1510,7 @@ bool HexagonInstrInfo::PredicateInstruction( MachineInstr &MI, ArrayRef<MachineOperand> Cond) const { if (Cond.empty() || isNewValueJump(Cond[0].getImm()) || isEndLoopN(Cond[0].getImm())) { - DEBUG(dbgs() << "\nCannot predicate:"; MI.dump();); + LLVM_DEBUG(dbgs() << "\nCannot predicate:"; MI.dump();); return false; } int Opc = MI.getOpcode(); @@ -1483,7 +1600,7 @@ bool HexagonInstrInfo::isPredicable(const MachineInstr &MI) const { } // HVX loads are not predicable on v60, but are on v62. - if (!Subtarget.hasV62TOps()) { + if (!Subtarget.hasV62Ops()) { switch (MI.getOpcode()) { case Hexagon::V6_vL32b_ai: case Hexagon::V6_vL32b_pi: @@ -1518,7 +1635,7 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr &MI, // considered a scheduling hazard, which is wrong. It should be the actual // instruction preceding the dbg_value instruction(s), just like it is // when debug info is not present. - if (MI.isDebugValue()) + if (MI.isDebugInstr()) return false; // Throwing call is a boundary. @@ -1586,7 +1703,7 @@ HexagonInstrInfo::CreateTargetPostRAHazardRecognizer( return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); } -/// \brief For a comparison instruction, return the source registers in +/// For a comparison instruction, return the source registers in /// \p SrcReg and \p SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction /// can be analyzed. @@ -1836,6 +1953,10 @@ bool HexagonInstrInfo::isAccumulator(const MachineInstr &MI) const { return((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask); } +bool HexagonInstrInfo::isBaseImmOffset(const MachineInstr &MI) const { + return getAddrMode(MI) == HexagonII::BaseImmOffset; +} + bool HexagonInstrInfo::isComplex(const MachineInstr &MI) const { return !isTC1(MI) && !isTC2Early(MI) && !MI.getDesc().mayLoad() && !MI.getDesc().mayStore() && @@ -2139,13 +2260,13 @@ bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI, bool isLate = isLateResultInstr(LRMI); bool isEarly = isEarlySourceInstr(ESMI); - DEBUG(dbgs() << "V60" << (isLate ? "-LR " : " -- ")); - DEBUG(LRMI.dump()); - DEBUG(dbgs() << "V60" << (isEarly ? "-ES " : " -- ")); - DEBUG(ESMI.dump()); + LLVM_DEBUG(dbgs() << "V60" << (isLate ? "-LR " : " -- ")); + LLVM_DEBUG(LRMI.dump()); + LLVM_DEBUG(dbgs() << "V60" << (isEarly ? "-ES " : " -- ")); + LLVM_DEBUG(ESMI.dump()); if (isLate && isEarly) { - DEBUG(dbgs() << "++Is Late Result feeding Early Source\n"); + LLVM_DEBUG(dbgs() << "++Is Late Result feeding Early Source\n"); return true; } @@ -2472,6 +2593,13 @@ bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, int Offset) const { case MVT::i16: case MVT::i32: case MVT::i64: + case MVT::f32: + case MVT::f64: + case MVT::v2i16: + case MVT::v2i32: + case MVT::v4i8: + case MVT::v4i16: + case MVT::v8i8: return isInt<4>(Count); // For HVX vectors the auto-inc is s3 case MVT::v64i8: @@ -2599,8 +2727,8 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, // any size. Later pass knows how to handle it. case Hexagon::STriw_pred: case Hexagon::LDriw_pred: - case Hexagon::STriw_mod: - case Hexagon::LDriw_mod: + case Hexagon::STriw_ctr: + case Hexagon::LDriw_ctr: return true; case Hexagon::PS_fi: @@ -2754,7 +2882,7 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1, return false; } -/// \brief Get the base register and byte offset of a load/store instr. +/// Get the base register and byte offset of a load/store instr. bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const { @@ -2765,7 +2893,7 @@ bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, return BaseReg != 0; } -/// \brief Can these instructions execute at the same time in a bundle. +/// Can these instructions execute at the same time in a bundle. bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First, const MachineInstr &Second) const { if (Second.mayStore() && First.getOpcode() == Hexagon::S2_allocframe) { @@ -2860,11 +2988,14 @@ bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B) bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return ((F >> HexagonII::mayCVLoadPos) & HexagonII::mayCVLoadMask) && - Subtarget.hasV60TOps(); + Subtarget.hasV60Ops(); } // Returns true, if a ST insn can be promoted to a new-value store. bool HexagonInstrInfo::mayBeNewStore(const MachineInstr &MI) const { + if (MI.mayStore() && !Subtarget.useNewValueStores()) + return false; + const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::mayNVStorePos) & HexagonII::mayNVStoreMask; } @@ -2917,10 +3048,29 @@ bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI, return false; } - // Hexagon Programmer's Reference says that decbin, memw_locked, and - // memd_locked cannot be used as .new as well, - // but we don't seem to have these instructions defined. - return MI.getOpcode() != Hexagon::A4_tlbmatch; + // Instruction that produce late predicate cannot be used as sources of + // dot-new. + switch (MI.getOpcode()) { + case Hexagon::A4_addp_c: + case Hexagon::A4_subp_c: + case Hexagon::A4_tlbmatch: + case Hexagon::A5_ACS: + case Hexagon::F2_sfinvsqrta: + case Hexagon::F2_sfrecipa: + case Hexagon::J2_endloop0: + case Hexagon::J2_endloop01: + case Hexagon::J2_ploop1si: + case Hexagon::J2_ploop1sr: + case Hexagon::J2_ploop2si: + case Hexagon::J2_ploop2sr: + case Hexagon::J2_ploop3si: + case Hexagon::J2_ploop3sr: + case Hexagon::S2_cabacdecbin: + case Hexagon::S2_storew_locked: + case Hexagon::S4_stored_locked: + return false; + } + return true; } bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const { @@ -3047,7 +3197,7 @@ SmallVector<MachineInstr*, 2> HexagonInstrInfo::getBranchingInstrs( I = MBB.instr_end(); --I; - while (I->isDebugValue()) { + while (I->isDebugInstr()) { if (I == MBB.instr_begin()) return Jumpers; --I; @@ -3496,7 +3646,7 @@ int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const { assert(NewOp >= 0 && "Couldn't change new-value store to its old form."); } - if (Subtarget.hasV60TOps()) + if (Subtarget.hasV60Ops()) return NewOp; // Subtargets prior to V60 didn't support 'taken' forms of predicated jumps. @@ -3893,9 +4043,9 @@ int HexagonInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo(); // Get DefIdx and UseIdx for super registers. - MachineOperand DefMO = DefMI.getOperand(DefIdx); + const MachineOperand &DefMO = DefMI.getOperand(DefIdx); - if (HRI.isPhysicalRegister(DefMO.getReg())) { + if (DefMO.isReg() && HRI.isPhysicalRegister(DefMO.getReg())) { if (DefMO.isImplicit()) { for (MCSuperRegIterator SR(DefMO.getReg(), &HRI); SR.isValid(); ++SR) { int Idx = DefMI.findRegisterDefOperandIdx(*SR, false, false, &HRI); @@ -3906,7 +4056,7 @@ int HexagonInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } } - MachineOperand UseMO = UseMI.getOperand(UseIdx); + const MachineOperand &UseMO = UseMI.getOperand(UseIdx); if (UseMO.isImplicit()) { for (MCSuperRegIterator SR(UseMO.getReg(), &HRI); SR.isValid(); ++SR) { int Idx = UseMI.findRegisterUseOperandIdx(*SR, false, &HRI); @@ -4057,7 +4207,7 @@ bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond, return false; assert(Cond.size() == 2); if (isNewValueJump(Cond[0].getImm()) || Cond[1].isMBB()) { - DEBUG(dbgs() << "No predregs for new-value jumps/endloop"); + LLVM_DEBUG(dbgs() << "No predregs for new-value jumps/endloop"); return false; } PredReg = Cond[1].getReg(); @@ -4084,7 +4234,7 @@ short HexagonInstrInfo::getRegForm(const MachineInstr &MI) const { // use a constant extender, which requires another 4 bytes. // For debug instructions and prolog labels, return 0. unsigned HexagonInstrInfo::getSize(const MachineInstr &MI) const { - if (MI.isDebugValue() || MI.isPosition()) + if (MI.isDebugInstr() || MI.isPosition()) return 0; unsigned Size = MI.getDesc().getSize(); @@ -4159,9 +4309,9 @@ void HexagonInstrInfo::immediateExtend(MachineInstr &MI) const { bool HexagonInstrInfo::invertAndChangeJumpTarget( MachineInstr &MI, MachineBasicBlock *NewTarget) const { - DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to " - << printMBBReference(*NewTarget); - MI.dump();); + LLVM_DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to " + << printMBBReference(*NewTarget); + MI.dump();); assert(MI.isBranch()); unsigned NewOpcode = getInvertedPredicatedOpcode(MI.getOpcode()); int TargetPos = MI.getNumOperands() - 1; @@ -4189,8 +4339,9 @@ void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const { for (unsigned insn = TargetOpcode::GENERIC_OP_END+1; insn < Hexagon::INSTRUCTION_LIST_END; ++insn) { NewMI = BuildMI(B, I, DL, get(insn)); - DEBUG(dbgs() << "\n" << getName(NewMI->getOpcode()) << - " Class: " << NewMI->getDesc().getSchedClass()); + LLVM_DEBUG(dbgs() << "\n" + << getName(NewMI->getOpcode()) + << " Class: " << NewMI->getDesc().getSchedClass()); NewMI->eraseFromParent(); } /* --- The code above is used to generate complete set of Hexagon Insn --- */ @@ -4200,7 +4351,7 @@ void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const { // p -> NotP // NotP -> P bool HexagonInstrInfo::reversePredSense(MachineInstr &MI) const { - DEBUG(dbgs() << "\nTrying to reverse pred. sense of:"; MI.dump()); + LLVM_DEBUG(dbgs() << "\nTrying to reverse pred. sense of:"; MI.dump()); MI.setDesc(get(getInvertedPredicatedOpcode(MI.getOpcode()))); return true; } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 4530d3b999cc..96b4ffaba02f 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -18,9 +18,9 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/MachineValueType.h" #include <cstdint> #include <vector> @@ -66,6 +66,20 @@ public: unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override; + /// Check if the instruction or the bundle of instructions has + /// load from stack slots. Return the frameindex and machine memory operand + /// if true. + bool hasLoadFromStackSlot(const MachineInstr &MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const override; + + /// Check if the instruction or the bundle of instructions has + /// store to stack slots. Return the frameindex and machine memory operand + /// if true. + bool hasStoreToStackSlot(const MachineInstr &MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const override; + /// Analyze the branching code at the end of MBB, returning /// true if it cannot be understood (e.g. it's a switch dispatch or isn't /// implemented for a target). Upon success, this returns false and returns @@ -122,8 +136,8 @@ public: bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, MachineInstr *&CmpInst) const override; - /// Generate code to reduce the loop iteration by one and check if the loop is - /// finished. Return the value/register of the the new loop count. We need + /// Generate code to reduce the loop iteration by one and check if the loop + /// is finished. Return the value/register of the new loop count. We need /// this function when peeling off one or more iterations of a loop. This /// function assumes the nth iteration is peeled first. unsigned reduceLoopCount(MachineBasicBlock &MBB, @@ -201,7 +215,7 @@ public: /// anything was changed. bool expandPostRAPseudo(MachineInstr &MI) const override; - /// \brief Get the base register and byte offset of a load/store instr. + /// Get the base register and byte offset of a load/store instr. bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) const override; @@ -332,7 +346,11 @@ public: /// HexagonInstrInfo specifics. unsigned createVR(MachineFunction *MF, MVT VT) const; + MachineInstr *findLoopInstr(MachineBasicBlock *BB, unsigned EndLoopOp, + MachineBasicBlock *TargetBB, + SmallPtrSet<MachineBasicBlock *, 8> &Visited) const; + bool isBaseImmOffset(const MachineInstr &MI) const; bool isAbsoluteSet(const MachineInstr &MI) const; bool isAccumulator(const MachineInstr &MI) const; bool isAddrModeWithOffset(const MachineInstr &MI) const; diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td index 1df143de6e80..b25e316709c5 100644 --- a/lib/Target/Hexagon/HexagonIntrinsics.td +++ b/lib/Target/Hexagon/HexagonIntrinsics.td @@ -807,7 +807,6 @@ def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32_0ImmPred, s8_0ImmPred>; // Shift halfword def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>; def : T_R_pat<A2_asrh, int_hexagon_A2_asrh>; -def : T_R_pat<A2_asrh, int_hexagon_SI_to_SXTHI_asrh>; // Sign/zero extend def : T_R_pat<A2_sxth, int_hexagon_A2_sxth>; @@ -1353,11 +1352,11 @@ class T_stb_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Val> : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru), (MI I32:$Rs, I32:$Ru, Val:$Rt)>; -def : T_stb_pat <S2_storerh_pbr, int_hexagon_brev_sth, I32>; -def : T_stb_pat <S2_storerb_pbr, int_hexagon_brev_stb, I32>; -def : T_stb_pat <S2_storeri_pbr, int_hexagon_brev_stw, I32>; -def : T_stb_pat <S2_storerf_pbr, int_hexagon_brev_sthhi, I32>; -def : T_stb_pat <S2_storerd_pbr, int_hexagon_brev_std, I64>; +def : T_stb_pat <S2_storerh_pbr, int_hexagon_S2_storerh_pbr, I32>; +def : T_stb_pat <S2_storerb_pbr, int_hexagon_S2_storerb_pbr, I32>; +def : T_stb_pat <S2_storeri_pbr, int_hexagon_S2_storeri_pbr, I32>; +def : T_stb_pat <S2_storerf_pbr, int_hexagon_S2_storerf_pbr, I32>; +def : T_stb_pat <S2_storerd_pbr, int_hexagon_S2_storerd_pbr, I64>; class T_stc_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Imm, PatLeaf Val> : Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s), diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV5.td b/lib/Target/Hexagon/HexagonIntrinsicsV5.td index f27a63e20e61..29f67cffcf89 100644 --- a/lib/Target/Hexagon/HexagonIntrinsicsV5.td +++ b/lib/Target/Hexagon/HexagonIntrinsicsV5.td @@ -9,7 +9,7 @@ //Rdd[+]=vrmpybsu(Rss,Rtt) //Rdd[+]=vrmpybuu(Rss,Rtt) -let Predicates = [HasV5T] in { +let Predicates = [HasV5] in { def : T_PP_pat <M5_vrmpybsu, int_hexagon_M5_vrmpybsu>; def : T_PP_pat <M5_vrmpybuu, int_hexagon_M5_vrmpybuu>; diff --git a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index fd602257934a..f9ed03909233 100644 --- a/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -56,7 +57,7 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils.h" #include <algorithm> #include <array> #include <cassert> @@ -243,8 +244,8 @@ namespace { const Value *V; }; - raw_ostream &operator<< (raw_ostream &OS, const PE &P) LLVM_ATTRIBUTE_USED; - raw_ostream &operator<< (raw_ostream &OS, const PE &P) { + LLVM_ATTRIBUTE_USED + raw_ostream &operator<<(raw_ostream &OS, const PE &P) { P.C.print(OS, P.V ? P.V : P.C.Root); return OS; } @@ -608,9 +609,9 @@ namespace { unsigned getInverseMxN(unsigned QP); Value *generate(BasicBlock::iterator At, ParsedValues &PV); - void setupSimplifier(); + void setupPreSimplifier(Simplifier &S); + void setupPostSimplifier(Simplifier &S); - Simplifier Simp; Loop *CurLoop; const DataLayout &DL; const DominatorTree &DT; @@ -985,6 +986,7 @@ bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val, case Instruction::Xor: case Instruction::LShr: // Shift right is ok. case Instruction::Select: + case Instruction::Trunc: return true; case Instruction::ICmp: if (CmpInst *CI = cast<CmpInst>(In)) @@ -998,6 +1000,8 @@ bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val, void PolynomialMultiplyRecognize::promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB) { + Type *OrigTy = In->getType(); + // Leave boolean values alone. if (!In->getType()->isIntegerTy(1)) In->mutateType(DestTy); @@ -1028,6 +1032,14 @@ void PolynomialMultiplyRecognize::promoteTo(Instruction *In, Z->eraseFromParent(); return; } + if (TruncInst *T = dyn_cast<TruncInst>(In)) { + IntegerType *TruncTy = cast<IntegerType>(OrigTy); + Value *Mask = ConstantInt::get(DestTy, (1u << TruncTy->getBitWidth()) - 1); + Value *And = IRBuilder<>(In).CreateAnd(T->getOperand(0), Mask); + T->replaceAllUsesWith(And); + T->eraseFromParent(); + return; + } // Promote immediates. for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) { @@ -1050,14 +1062,11 @@ bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB, // Check if the exit values have types that are no wider than the type // that we want to promote to. unsigned DestBW = DestTy->getBitWidth(); - for (Instruction &In : *ExitB) { - PHINode *P = dyn_cast<PHINode>(&In); - if (!P) - break; - if (P->getNumIncomingValues() != 1) + for (PHINode &P : ExitB->phis()) { + if (P.getNumIncomingValues() != 1) return false; - assert(P->getIncomingBlock(0) == LoopB); - IntegerType *T = dyn_cast<IntegerType>(P->getType()); + assert(P.getIncomingBlock(0) == LoopB); + IntegerType *T = dyn_cast<IntegerType>(P.getType()); if (!T || T->getBitWidth() > DestBW) return false; } @@ -1572,8 +1581,8 @@ static bool hasZeroSignBit(const Value *V) { return false; } -void PolynomialMultiplyRecognize::setupSimplifier() { - Simp.addRule("sink-zext", +void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) { + S.addRule("sink-zext", // Sink zext past bitwise operations. [](Instruction *I, LLVMContext &Ctx) -> Value* { if (I->getOpcode() != Instruction::ZExt) @@ -1594,7 +1603,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() { B.CreateZExt(T->getOperand(0), I->getType()), B.CreateZExt(T->getOperand(1), I->getType())); }); - Simp.addRule("xor/and -> and/xor", + S.addRule("xor/and -> and/xor", // (xor (and x a) (and y a)) -> (and (xor x y) a) [](Instruction *I, LLVMContext &Ctx) -> Value* { if (I->getOpcode() != Instruction::Xor) @@ -1612,7 +1621,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() { return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)), And0->getOperand(1)); }); - Simp.addRule("sink binop into select", + S.addRule("sink binop into select", // (Op (select c x y) z) -> (select c (Op x z) (Op y z)) // (Op x (select c y z)) -> (select c (Op x y) (Op x z)) [](Instruction *I, LLVMContext &Ctx) -> Value* { @@ -1638,7 +1647,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() { } return nullptr; }); - Simp.addRule("fold select-select", + S.addRule("fold select-select", // (select c (select c x y) z) -> (select c x z) // (select c x (select c y z)) -> (select c x z) [](Instruction *I, LLVMContext &Ctx) -> Value* { @@ -1657,7 +1666,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() { } return nullptr; }); - Simp.addRule("or-signbit -> xor-signbit", + S.addRule("or-signbit -> xor-signbit", // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0) [](Instruction *I, LLVMContext &Ctx) -> Value* { if (I->getOpcode() != Instruction::Or) @@ -1669,7 +1678,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() { return nullptr; return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb); }); - Simp.addRule("sink lshr into binop", + S.addRule("sink lshr into binop", // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c)) [](Instruction *I, LLVMContext &Ctx) -> Value* { if (I->getOpcode() != Instruction::LShr) @@ -1691,7 +1700,7 @@ void PolynomialMultiplyRecognize::setupSimplifier() { B.CreateLShr(BitOp->getOperand(0), S), B.CreateLShr(BitOp->getOperand(1), S)); }); - Simp.addRule("expose bitop-const", + S.addRule("expose bitop-const", // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b)) [](Instruction *I, LLVMContext &Ctx) -> Value* { auto IsBitOp = [](unsigned Op) -> bool { @@ -1720,16 +1729,44 @@ void PolynomialMultiplyRecognize::setupSimplifier() { }); } +void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) { + S.addRule("(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a", + [](Instruction *I, LLVMContext &Ctx) -> Value* { + if (I->getOpcode() != Instruction::And) + return nullptr; + Instruction *Xor = dyn_cast<Instruction>(I->getOperand(0)); + ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(1)); + if (!Xor || !C0) + return nullptr; + if (Xor->getOpcode() != Instruction::Xor) + return nullptr; + Instruction *And0 = dyn_cast<Instruction>(Xor->getOperand(0)); + Instruction *And1 = dyn_cast<Instruction>(Xor->getOperand(1)); + // Pick the first non-null and. + if (!And0 || And0->getOpcode() != Instruction::And) + std::swap(And0, And1); + ConstantInt *C1 = dyn_cast<ConstantInt>(And0->getOperand(1)); + if (!C1) + return nullptr; + uint32_t V0 = C0->getZExtValue(); + uint32_t V1 = C1->getZExtValue(); + if (V0 != (V0 & V1)) + return nullptr; + IRBuilder<> B(Ctx); + return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1), C0); + }); +} + bool PolynomialMultiplyRecognize::recognize() { - DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n" - << *CurLoop << '\n'); + LLVM_DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n" + << *CurLoop << '\n'); // Restrictions: // - The loop must consist of a single block. // - The iteration count must be known at compile-time. // - The loop must have an induction variable starting from 0, and // incremented in each iteration of the loop. BasicBlock *LoopB = CurLoop->getHeader(); - DEBUG(dbgs() << "Loop header:\n" << *LoopB); + LLVM_DEBUG(dbgs() << "Loop header:\n" << *LoopB); if (LoopB != CurLoop->getLoopLatch()) return false; @@ -1749,10 +1786,12 @@ bool PolynomialMultiplyRecognize::recognize() { Value *CIV = getCountIV(LoopB); ParsedValues PV; + Simplifier PreSimp; PV.IterCount = IterCount; - DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount << '\n'); + LLVM_DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount + << '\n'); - setupSimplifier(); + setupPreSimplifier(PreSimp); // Perform a preliminary scan of select instructions to see if any of them // looks like a generator of the polynomial multiply steps. Assume that a @@ -1775,9 +1814,9 @@ bool PolynomialMultiplyRecognize::recognize() { continue; Simplifier::Context C(SI); - Value *T = Simp.simplify(C); + Value *T = PreSimp.simplify(C); SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI; - DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n'); + LLVM_DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n'); if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) { FoundPreScan = true; if (SelI != SI) { @@ -1790,7 +1829,7 @@ bool PolynomialMultiplyRecognize::recognize() { } if (!FoundPreScan) { - DEBUG(dbgs() << "Have not found candidates for pmpy\n"); + LLVM_DEBUG(dbgs() << "Have not found candidates for pmpy\n"); return false; } @@ -1801,6 +1840,24 @@ bool PolynomialMultiplyRecognize::recognize() { // wide as the target's pmpy instruction. if (!promoteTypes(LoopB, ExitB)) return false; + // Run post-promotion simplifications. + Simplifier PostSimp; + setupPostSimplifier(PostSimp); + for (Instruction &In : *LoopB) { + SelectInst *SI = dyn_cast<SelectInst>(&In); + if (!SI || !FeedsPHI(SI)) + continue; + Simplifier::Context C(SI); + Value *T = PostSimp.simplify(C); + SelectInst *SelI = dyn_cast_or_null<SelectInst>(T); + if (SelI != SI) { + Value *NewSel = C.materialize(LoopB, SI->getIterator()); + SI->replaceAllUsesWith(NewSel); + RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI); + } + break; + } + if (!convertShiftsToLeft(LoopB, ExitB, IterCount)) return false; cleanupLoopBody(LoopB); @@ -1812,14 +1869,14 @@ bool PolynomialMultiplyRecognize::recognize() { SelectInst *SelI = dyn_cast<SelectInst>(&In); if (!SelI) continue; - DEBUG(dbgs() << "scanSelect: " << *SelI << '\n'); + LLVM_DEBUG(dbgs() << "scanSelect: " << *SelI << '\n'); FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false); if (FoundScan) break; } assert(FoundScan); - DEBUG({ + LLVM_DEBUG({ StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; @@ -1913,7 +1970,7 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, // Get the location that may be stored across the loop. Since the access // is strided positively through memory, we say that the modified location // starts at the pointer and has infinite size. - uint64_t AccessSize = MemoryLocation::UnknownSize; + LocationSize AccessSize = MemoryLocation::UnknownSize; // If the loop iterates a fixed number of times, we can refine the access // size to be exactly the size of the memset, which is (BECount+1)*StoreSize @@ -2083,7 +2140,6 @@ CleanupAndExit: // pointer size if it isn't already. LLVMContext &Ctx = SI->getContext(); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy); - unsigned Alignment = std::min(SI->getAlignment(), LI->getAlignment()); DebugLoc DLoc = SI->getDebugLoc(); const SCEV *NumBytesS = @@ -2217,12 +2273,14 @@ CleanupAndExit: : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy); NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords}); } else { - NewCall = CondBuilder.CreateMemMove(StoreBasePtr, LoadBasePtr, - NumBytes, Alignment); + NewCall = CondBuilder.CreateMemMove(StoreBasePtr, SI->getAlignment(), + LoadBasePtr, LI->getAlignment(), + NumBytes); } } else { - NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, - NumBytes, Alignment); + NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(), + LoadBasePtr, LI->getAlignment(), + NumBytes); // Okay, the memcpy has been formed. Zap the original store and // anything that feeds into it. RecursivelyDeleteTriviallyDeadInstructions(SI, TLI); @@ -2230,15 +2288,16 @@ CleanupAndExit: NewCall->setDebugLoc(DLoc); - DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ") - << *NewCall << "\n" - << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" - << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); + LLVM_DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ") + << *NewCall << "\n" + << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" + << " from store ptr=" << *StoreEv << " at: " << *SI + << "\n"); return true; } -// \brief Check if the instructions in Insts, together with their dependencies +// Check if the instructions in Insts, together with their dependencies // cover the loop in the sense that the loop could be safely eliminated once // the instructions in Insts are removed. bool HexagonLoopIdiomRecognize::coverLoop(Loop *L, diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index b1c549aa13fa..74c550ce8226 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" @@ -47,26 +48,46 @@ using namespace llvm; static cl::opt<bool> IgnoreBBRegPressure("ignore-bb-reg-pressure", cl::Hidden, cl::ZeroOrMore, cl::init(false)); +static cl::opt<bool> UseNewerCandidate("use-newer-candidate", + cl::Hidden, cl::ZeroOrMore, cl::init(true)); + static cl::opt<unsigned> SchedDebugVerboseLevel("misched-verbose-level", cl::Hidden, cl::ZeroOrMore, cl::init(1)); -static cl::opt<bool> TopUseShorterTie("top-use-shorter-tie", - cl::Hidden, cl::ZeroOrMore, cl::init(false)); - -static cl::opt<bool> BotUseShorterTie("bot-use-shorter-tie", - cl::Hidden, cl::ZeroOrMore, cl::init(false)); - -static cl::opt<bool> DisableTCTie("disable-tc-tie", - cl::Hidden, cl::ZeroOrMore, cl::init(false)); - // Check if the scheduler should penalize instructions that are available to // early due to a zero-latency dependence. static cl::opt<bool> CheckEarlyAvail("check-early-avail", cl::Hidden, cl::ZeroOrMore, cl::init(true)); -/// Save the last formed packet -void VLIWResourceModel::savePacket() { - OldPacket = Packet; +// This value is used to determine if a register class is a high pressure set. +// We compute the maximum number of registers needed and divided by the total +// available. Then, we compare the result to this value. +static cl::opt<float> RPThreshold("hexagon-reg-pressure", cl::Hidden, + cl::init(0.75f), cl::desc("High register pressure threhold.")); + +/// Return true if there is a dependence between SUd and SUu. +static bool hasDependence(const SUnit *SUd, const SUnit *SUu, + const HexagonInstrInfo &QII) { + if (SUd->Succs.size() == 0) + return false; + + // Enable .cur formation. + if (QII.mayBeCurLoad(*SUd->getInstr())) + return false; + + if (QII.canExecuteInBundle(*SUd->getInstr(), *SUu->getInstr())) + return false; + + for (const auto &S : SUd->Succs) { + // Since we do not add pseudos to packets, might as well + // ignore order dependencies. + if (S.isCtrl()) + continue; + + if (S.getSUnit() == SUu && S.getLatency() > 0) + return true; + } + return false; } /// Check if scheduling of this SU is possible @@ -74,7 +95,7 @@ void VLIWResourceModel::savePacket() { /// It is _not_ precise (statefull), it is more like /// another heuristic. Many corner cases are figured /// empirically. -bool VLIWResourceModel::isResourceAvailable(SUnit *SU) { +bool VLIWResourceModel::isResourceAvailable(SUnit *SU, bool IsTop) { if (!SU || !SU->getInstr()) return false; @@ -94,49 +115,39 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU) { break; } - MachineFunction &MF = *SU->getInstr()->getParent()->getParent(); - auto &QII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); + MachineBasicBlock *MBB = SU->getInstr()->getParent(); + auto &QST = MBB->getParent()->getSubtarget<HexagonSubtarget>(); + const auto &QII = *QST.getInstrInfo(); // Now see if there are no other dependencies to instructions already // in the packet. - for (unsigned i = 0, e = Packet.size(); i != e; ++i) { - if (Packet[i]->Succs.size() == 0) - continue; - - // Enable .cur formation. - if (QII.mayBeCurLoad(*Packet[i]->getInstr())) - continue; - - for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), - E = Packet[i]->Succs.end(); I != E; ++I) { - // Since we do not add pseudos to packets, might as well - // ignore order dependencies. - if (I->isCtrl()) - continue; - - if (I->getSUnit() == SU) + if (IsTop) { + for (unsigned i = 0, e = Packet.size(); i != e; ++i) + if (hasDependence(Packet[i], SU, QII)) + return false; + } else { + for (unsigned i = 0, e = Packet.size(); i != e; ++i) + if (hasDependence(SU, Packet[i], QII)) return false; - } } return true; } /// Keep track of available resources. -bool VLIWResourceModel::reserveResources(SUnit *SU) { +bool VLIWResourceModel::reserveResources(SUnit *SU, bool IsTop) { bool startNewCycle = false; // Artificially reset state. if (!SU) { ResourcesModel->clearResources(); - savePacket(); Packet.clear(); TotalPackets++; return false; } - // If this SU does not fit in the packet + // If this SU does not fit in the packet or the packet is now full // start a new one. - if (!isResourceAvailable(SU)) { + if (!isResourceAvailable(SU, IsTop) || + Packet.size() >= SchedModel->getIssueWidth()) { ResourcesModel->clearResources(); - savePacket(); Packet.clear(); TotalPackets++; startNewCycle = true; @@ -161,24 +172,14 @@ bool VLIWResourceModel::reserveResources(SUnit *SU) { Packet.push_back(SU); #ifndef NDEBUG - DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n"); + LLVM_DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n"); for (unsigned i = 0, e = Packet.size(); i != e; ++i) { - DEBUG(dbgs() << "\t[" << i << "] SU("); - DEBUG(dbgs() << Packet[i]->NodeNum << ")\t"); - DEBUG(Packet[i]->getInstr()->dump()); + LLVM_DEBUG(dbgs() << "\t[" << i << "] SU("); + LLVM_DEBUG(dbgs() << Packet[i]->NodeNum << ")\t"); + LLVM_DEBUG(Packet[i]->getInstr()->dump()); } #endif - // If packet is now full, reset the state so in the next cycle - // we start fresh. - if (Packet.size() >= SchedModel->getIssueWidth()) { - ResourcesModel->clearResources(); - savePacket(); - Packet.clear(); - TotalPackets++; - startNewCycle = true; - } - return startNewCycle; } @@ -186,37 +187,43 @@ bool VLIWResourceModel::reserveResources(SUnit *SU) { /// after setting up the current scheduling region. [RegionBegin, RegionEnd) /// only includes instructions that have DAG nodes, not scheduling boundaries. void VLIWMachineScheduler::schedule() { - DEBUG(dbgs() << "********** MI Converging Scheduling VLIW " - << printMBBReference(*BB) << " " << BB->getName() << " in_func " - << BB->getParent()->getName() << " at loop depth " - << MLI->getLoopDepth(BB) << " \n"); + LLVM_DEBUG(dbgs() << "********** MI Converging Scheduling VLIW " + << printMBBReference(*BB) << " " << BB->getName() + << " in_func " << BB->getParent()->getName() + << " at loop depth " << MLI->getLoopDepth(BB) << " \n"); buildDAGWithRegPressure(); + Topo.InitDAGTopologicalSorting(); + + // Postprocess the DAG to add platform-specific artificial dependencies. + postprocessDAG(); + SmallVector<SUnit*, 8> TopRoots, BotRoots; findRootsAndBiasEdges(TopRoots, BotRoots); // Initialize the strategy before modifying the DAG. SchedImpl->initialize(this); - DEBUG(unsigned maxH = 0; - for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - if (SUnits[su].getHeight() > maxH) - maxH = SUnits[su].getHeight(); - dbgs() << "Max Height " << maxH << "\n";); - DEBUG(unsigned maxD = 0; - for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - if (SUnits[su].getDepth() > maxD) - maxD = SUnits[su].getDepth(); - dbgs() << "Max Depth " << maxD << "\n";); - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this)); + LLVM_DEBUG(unsigned maxH = 0; + for (unsigned su = 0, e = SUnits.size(); su != e; + ++su) if (SUnits[su].getHeight() > maxH) maxH = + SUnits[su].getHeight(); + dbgs() << "Max Height " << maxH << "\n";); + LLVM_DEBUG(unsigned maxD = 0; + for (unsigned su = 0, e = SUnits.size(); su != e; + ++su) if (SUnits[su].getDepth() > maxD) maxD = + SUnits[su].getDepth(); + dbgs() << "Max Depth " << maxD << "\n";); + LLVM_DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su] + .dumpAll(this)); initQueues(TopRoots, BotRoots); bool IsTopNode = false; while (true) { - DEBUG(dbgs() << "** VLIWMachineScheduler::schedule picking next node\n"); + LLVM_DEBUG( + dbgs() << "** VLIWMachineScheduler::schedule picking next node\n"); SUnit *SU = SchedImpl->pickNode(IsTopNode); if (!SU) break; @@ -225,16 +232,16 @@ void VLIWMachineScheduler::schedule() { scheduleMI(SU, IsTopNode); - updateQueues(SU, IsTopNode); - // Notify the scheduling strategy after updating the DAG. SchedImpl->schedNode(SU, IsTopNode); + + updateQueues(SU, IsTopNode); } assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); placeDebugValues(); - DEBUG({ + LLVM_DEBUG({ dbgs() << "*** Final schedule for " << printMBBReference(*begin()->getParent()) << " ***\n"; dumpSchedule(); @@ -264,6 +271,15 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) { Top.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel()); Bot.ResourceModel = new VLIWResourceModel(STI, DAG->getSchedModel()); + const std::vector<unsigned> &MaxPressure = + DAG->getRegPressure().MaxSetPressure; + HighPressureSets.assign(MaxPressure.size(), 0); + for (unsigned i = 0, e = MaxPressure.size(); i < e; ++i) { + unsigned Limit = DAG->getRegClassInfo()->getRegPressureSetLimit(i); + HighPressureSets[i] = + ((float) MaxPressure[i] > ((float) Limit * RPThreshold)); + } + assert((!ForceTopDown || !ForceBottomUp) && "-misched-topdown incompatible with -misched-bottomup"); } @@ -364,8 +380,8 @@ void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpCycle() { } CheckPending = true; - DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle " - << CurrCycle << '\n'); + LLVM_DEBUG(dbgs() << "*** Next cycle " << Available.getName() << " cycle " + << CurrCycle << '\n'); } /// Move the boundary of scheduled code by one SUnit. @@ -383,18 +399,18 @@ void ConvergingVLIWScheduler::VLIWSchedBoundary::bumpNode(SUnit *SU) { } // Update DFA model. - startNewCycle = ResourceModel->reserveResources(SU); + startNewCycle = ResourceModel->reserveResources(SU, isTop()); // Check the instruction group dispatch limit. // TODO: Check if this SU must end a dispatch group. IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); if (startNewCycle) { - DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); + LLVM_DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n'); bumpCycle(); } else - DEBUG(dbgs() << "*** IssueCount " << IssueCount - << " at cycle " << CurrCycle << '\n'); + LLVM_DEBUG(dbgs() << "*** IssueCount " << IssueCount << " at cycle " + << CurrCycle << '\n'); } /// Release pending ready nodes in to the available queue. This makes them @@ -443,10 +459,18 @@ SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() { if (CheckPending) releasePending(); - for (unsigned i = 0; Available.empty(); ++i) { + auto AdvanceCycle = [this]() { + if (Available.empty()) + return true; + if (Available.size() == 1 && Pending.size() > 0) + return !ResourceModel->isResourceAvailable(*Available.begin(), isTop()) || + getWeakLeft(*Available.begin(), isTop()) != 0; + return false; + }; + for (unsigned i = 0; AdvanceCycle(); ++i) { assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && "permanent hazard"); (void)i; - ResourceModel->reserveResources(nullptr); + ResourceModel->reserveResources(nullptr, isTop()); bumpCycle(); releasePending(); } @@ -520,13 +544,31 @@ static inline bool isSingleUnscheduledSucc(SUnit *SU, SUnit *SU2) { return true; } +/// Check if the instruction changes the register pressure of a register in the +/// high pressure set. The function returns a negative value if the pressure +/// decreases and a positive value is the pressure increases. If the instruction +/// doesn't use a high pressure register or doesn't change the register +/// pressure, then return 0. +int ConvergingVLIWScheduler::pressureChange(const SUnit *SU, bool isBotUp) { + PressureDiff &PD = DAG->getPressureDiff(SU); + for (auto &P : PD) { + if (!P.isValid()) + continue; + // The pressure differences are computed bottom-up, so the comparision for + // an increase is positive in the bottom direction, but negative in the + // top-down direction. + if (HighPressureSets[P.getPSet()]) + return (isBotUp ? P.getUnitInc() : -P.getUnitInc()); + } + return 0; +} + // Constants used to denote relative importance of // heuristic components for cost computation. static const unsigned PriorityOne = 200; static const unsigned PriorityTwo = 50; static const unsigned PriorityThree = 75; static const unsigned ScaleTwo = 10; -static const unsigned FactorOne = 2; /// Single point to compute overall scheduling cost. /// TODO: More heuristics will be used soon. @@ -541,20 +583,23 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, if (!SU || SU->isScheduled) return ResCount; - MachineInstr &Instr = *SU->getInstr(); - - DEBUG(if (verbose) dbgs() << ((Q.getID() == TopQID) ? "(top|" : "(bot|")); + LLVM_DEBUG(if (verbose) dbgs() + << ((Q.getID() == TopQID) ? "(top|" : "(bot|")); // Forced priority is high. if (SU->isScheduleHigh) { ResCount += PriorityOne; - DEBUG(dbgs() << "H|"); + LLVM_DEBUG(dbgs() << "H|"); } + unsigned IsAvailableAmt = 0; // Critical path first. if (Q.getID() == TopQID) { - ResCount += (SU->getHeight() * ScaleTwo); + if (Top.isLatencyBound(SU)) { + LLVM_DEBUG(if (verbose) dbgs() << "LB|"); + ResCount += (SU->getHeight() * ScaleTwo); + } - DEBUG(if (verbose) { + LLVM_DEBUG(if (verbose) { std::stringstream dbgstr; dbgstr << "h" << std::setw(3) << SU->getHeight() << "|"; dbgs() << dbgstr.str(); @@ -562,16 +607,19 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, // If resources are available for it, multiply the // chance of scheduling. - if (Top.ResourceModel->isResourceAvailable(SU)) { - ResCount <<= FactorOne; - ResCount += PriorityThree; - DEBUG(if (verbose) dbgs() << "A|"); + if (Top.ResourceModel->isResourceAvailable(SU, true)) { + IsAvailableAmt = (PriorityTwo + PriorityThree); + ResCount += IsAvailableAmt; + LLVM_DEBUG(if (verbose) dbgs() << "A|"); } else - DEBUG(if (verbose) dbgs() << " |"); + LLVM_DEBUG(if (verbose) dbgs() << " |"); } else { - ResCount += (SU->getDepth() * ScaleTwo); + if (Bot.isLatencyBound(SU)) { + LLVM_DEBUG(if (verbose) dbgs() << "LB|"); + ResCount += (SU->getDepth() * ScaleTwo); + } - DEBUG(if (verbose) { + LLVM_DEBUG(if (verbose) { std::stringstream dbgstr; dbgstr << "d" << std::setw(3) << SU->getDepth() << "|"; dbgs() << dbgstr.str(); @@ -579,12 +627,12 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, // If resources are available for it, multiply the // chance of scheduling. - if (Bot.ResourceModel->isResourceAvailable(SU)) { - ResCount <<= FactorOne; - ResCount += PriorityThree; - DEBUG(if (verbose) dbgs() << "A|"); + if (Bot.ResourceModel->isResourceAvailable(SU, false)) { + IsAvailableAmt = (PriorityTwo + PriorityThree); + ResCount += IsAvailableAmt; + LLVM_DEBUG(if (verbose) dbgs() << "A|"); } else - DEBUG(if (verbose) dbgs() << " |"); + LLVM_DEBUG(if (verbose) dbgs() << " |"); } unsigned NumNodesBlocking = 0; @@ -593,18 +641,20 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, // Look at all of the successors of this node. // Count the number of nodes that // this node is the sole unscheduled node for. - for (const SDep &SI : SU->Succs) - if (isSingleUnscheduledPred(SI.getSUnit(), SU)) - ++NumNodesBlocking; + if (Top.isLatencyBound(SU)) + for (const SDep &SI : SU->Succs) + if (isSingleUnscheduledPred(SI.getSUnit(), SU)) + ++NumNodesBlocking; } else { // How many unscheduled predecessors block this node? - for (const SDep &PI : SU->Preds) - if (isSingleUnscheduledSucc(PI.getSUnit(), SU)) - ++NumNodesBlocking; + if (Bot.isLatencyBound(SU)) + for (const SDep &PI : SU->Preds) + if (isSingleUnscheduledSucc(PI.getSUnit(), SU)) + ++NumNodesBlocking; } ResCount += (NumNodesBlocking * ScaleTwo); - DEBUG(if (verbose) { + LLVM_DEBUG(if (verbose) { std::stringstream dbgstr; dbgstr << "blk " << std::setw(2) << NumNodesBlocking << ")|"; dbgs() << dbgstr.str(); @@ -619,10 +669,17 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, // Decrease priority slightly if register pressure would increase over the // current maximum. ResCount -= (Delta.CurrentMax.getUnitInc()*PriorityTwo); - DEBUG(if (verbose) { - dbgs() << "RP " << Delta.Excess.getUnitInc() << "/" - << Delta.CriticalMax.getUnitInc() <<"/" - << Delta.CurrentMax.getUnitInc() << ")|"; + // If there are register pressure issues, then we remove the value added for + // the instruction being available. The rationale is that we really don't + // want to schedule an instruction that causes a spill. + if (IsAvailableAmt && pressureChange(SU, Q.getID() != TopQID) > 0 && + (Delta.Excess.getUnitInc() || Delta.CriticalMax.getUnitInc() || + Delta.CurrentMax.getUnitInc())) + ResCount -= IsAvailableAmt; + LLVM_DEBUG(if (verbose) { + dbgs() << "RP " << Delta.Excess.getUnitInc() << "/" + << Delta.CriticalMax.getUnitInc() << "/" + << Delta.CurrentMax.getUnitInc() << ")|"; }); } @@ -631,53 +688,39 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>(); auto &QII = *QST.getInstrInfo(); if (SU->isInstr() && QII.mayBeCurLoad(*SU->getInstr())) { - if (Q.getID() == TopQID && Top.ResourceModel->isResourceAvailable(SU)) { + if (Q.getID() == TopQID && + Top.ResourceModel->isResourceAvailable(SU, true)) { ResCount += PriorityTwo; - DEBUG(if (verbose) dbgs() << "C|"); + LLVM_DEBUG(if (verbose) dbgs() << "C|"); } else if (Q.getID() == BotQID && - Bot.ResourceModel->isResourceAvailable(SU)) { + Bot.ResourceModel->isResourceAvailable(SU, false)) { ResCount += PriorityTwo; - DEBUG(if (verbose) dbgs() << "C|"); + LLVM_DEBUG(if (verbose) dbgs() << "C|"); } } // Give preference to a zero latency instruction if the dependent // instruction is in the current packet. - if (Q.getID() == TopQID) { + if (Q.getID() == TopQID && getWeakLeft(SU, true) == 0) { for (const SDep &PI : SU->Preds) { if (!PI.getSUnit()->getInstr()->isPseudo() && PI.isAssignedRegDep() && PI.getLatency() == 0 && Top.ResourceModel->isInPacket(PI.getSUnit())) { ResCount += PriorityThree; - DEBUG(if (verbose) dbgs() << "Z|"); + LLVM_DEBUG(if (verbose) dbgs() << "Z|"); } } - } else { + } else if (Q.getID() == BotQID && getWeakLeft(SU, false) == 0) { for (const SDep &SI : SU->Succs) { if (!SI.getSUnit()->getInstr()->isPseudo() && SI.isAssignedRegDep() && SI.getLatency() == 0 && Bot.ResourceModel->isInPacket(SI.getSUnit())) { ResCount += PriorityThree; - DEBUG(if (verbose) dbgs() << "Z|"); + LLVM_DEBUG(if (verbose) dbgs() << "Z|"); } } } - // Give less preference to an instruction that will cause a stall with - // an instruction in the previous packet. - if (QII.isHVXVec(Instr)) { - // Check for stalls in the previous packet. - if (Q.getID() == TopQID) { - for (auto J : Top.ResourceModel->OldPacket) - if (QII.producesStall(*J->getInstr(), Instr)) - ResCount -= PriorityOne; - } else { - for (auto J : Bot.ResourceModel->OldPacket) - if (QII.producesStall(Instr, *J->getInstr())) - ResCount -= PriorityOne; - } - } - // If the instruction has a non-zero latency dependence with an instruction in // the current packet, then it should not be scheduled yet. The case occurs // when the dependent instruction is scheduled in a new packet, so the @@ -689,7 +732,7 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, if (PI.getLatency() > 0 && Top.ResourceModel->isInPacket(PI.getSUnit())) { ResCount -= PriorityOne; - DEBUG(if (verbose) dbgs() << "D|"); + LLVM_DEBUG(if (verbose) dbgs() << "D|"); } } } else { @@ -697,13 +740,13 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, if (SI.getLatency() > 0 && Bot.ResourceModel->isInPacket(SI.getSUnit())) { ResCount -= PriorityOne; - DEBUG(if (verbose) dbgs() << "D|"); + LLVM_DEBUG(if (verbose) dbgs() << "D|"); } } } } - DEBUG(if (verbose) { + LLVM_DEBUG(if (verbose) { std::stringstream dbgstr; dbgstr << "Total " << std::setw(4) << ResCount << ")"; dbgs() << dbgstr.str(); @@ -718,11 +761,12 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU, /// DAG building. To adjust for the current scheduling location we need to /// maintain the number of vreg uses remaining to be top-scheduled. ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler:: -pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, +pickNodeFromQueue(VLIWSchedBoundary &Zone, const RegPressureTracker &RPTracker, SchedCandidate &Candidate) { - DEBUG(if (SchedDebugVerboseLevel > 1) - readyQueueVerboseDump(RPTracker, Candidate, Q); - else Q.dump();); + ReadyQueue &Q = Zone.Available; + LLVM_DEBUG(if (SchedDebugVerboseLevel > 1) + readyQueueVerboseDump(RPTracker, Candidate, Q); + else Q.dump();); // getMaxPressureDelta temporarily modifies the tracker. RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); @@ -739,7 +783,7 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, // Initialize the candidate if needed. if (!Candidate.SU) { - DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost)); + LLVM_DEBUG(traceCandidate("DCAND", Q, *I, CurrentCost)); Candidate.SU = *I; Candidate.RPDelta = RPDelta; Candidate.SCost = CurrentCost; @@ -747,9 +791,23 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, continue; } + // Choose node order for negative cost candidates. There is no good + // candidate in this case. + if (CurrentCost < 0 && Candidate.SCost < 0) { + if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) + || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { + LLVM_DEBUG(traceCandidate("NCAND", Q, *I, CurrentCost)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = NodeOrder; + } + continue; + } + // Best cost. if (CurrentCost > Candidate.SCost) { - DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost)); + LLVM_DEBUG(traceCandidate("CCAND", Q, *I, CurrentCost)); Candidate.SU = *I; Candidate.RPDelta = RPDelta; Candidate.SCost = CurrentCost; @@ -757,65 +815,53 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker, continue; } - // Tie breaker using Timing Class. - if (!DisableTCTie) { - auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>(); - auto &QII = *QST.getInstrInfo(); - - const MachineInstr *MI = (*I)->getInstr(); - const MachineInstr *CandI = Candidate.SU->getInstr(); - const InstrItineraryData *InstrItins = QST.getInstrItineraryData(); - - unsigned InstrLatency = QII.getInstrTimingClassLatency(InstrItins, *MI); - unsigned CandLatency = QII.getInstrTimingClassLatency(InstrItins, *CandI); - DEBUG(dbgs() << "TC Tie Breaker Cand: " - << CandLatency << " Instr:" << InstrLatency << "\n" - << *MI << *CandI << "\n"); - if (Q.getID() == TopQID && CurrentCost == Candidate.SCost) { - if (InstrLatency < CandLatency && TopUseShorterTie) { - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - Candidate.SCost = CurrentCost; - FoundCandidate = BestCost; - DEBUG(dbgs() << "Used top shorter tie breaker\n"); - continue; - } else if (InstrLatency > CandLatency && !TopUseShorterTie) { - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - Candidate.SCost = CurrentCost; - FoundCandidate = BestCost; - DEBUG(dbgs() << "Used top longer tie breaker\n"); - continue; - } - } else if (Q.getID() == BotQID && CurrentCost == Candidate.SCost) { - if (InstrLatency < CandLatency && BotUseShorterTie) { - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - Candidate.SCost = CurrentCost; - FoundCandidate = BestCost; - DEBUG(dbgs() << "Used Bot shorter tie breaker\n"); - continue; - } else if (InstrLatency > CandLatency && !BotUseShorterTie) { - Candidate.SU = *I; - Candidate.RPDelta = RPDelta; - Candidate.SCost = CurrentCost; - FoundCandidate = BestCost; - DEBUG(dbgs() << "Used Bot longer tie breaker\n"); - continue; - } + // Choose an instruction that does not depend on an artificial edge. + unsigned CurrWeak = getWeakLeft(*I, (Q.getID() == TopQID)); + unsigned CandWeak = getWeakLeft(Candidate.SU, (Q.getID() == TopQID)); + if (CurrWeak != CandWeak) { + if (CurrWeak < CandWeak) { + LLVM_DEBUG(traceCandidate("WCAND", Q, *I, CurrentCost)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = Weak; } + continue; } - if (CurrentCost == Candidate.SCost) { - if ((Q.getID() == TopQID && - (*I)->Succs.size() > Candidate.SU->Succs.size()) || - (Q.getID() == BotQID && - (*I)->Preds.size() < Candidate.SU->Preds.size())) { - DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost)); + if (CurrentCost == Candidate.SCost && Zone.isLatencyBound(*I)) { + unsigned CurrSize, CandSize; + if (Q.getID() == TopQID) { + CurrSize = (*I)->Succs.size(); + CandSize = Candidate.SU->Succs.size(); + } else { + CurrSize = (*I)->Preds.size(); + CandSize = Candidate.SU->Preds.size(); + } + if (CurrSize > CandSize) { + LLVM_DEBUG(traceCandidate("SPCAND", Q, *I, CurrentCost)); Candidate.SU = *I; Candidate.RPDelta = RPDelta; Candidate.SCost = CurrentCost; FoundCandidate = BestCost; + } + // Keep the old candidate if it's a better candidate. That is, don't use + // the subsequent tie breaker. + if (CurrSize != CandSize) + continue; + } + + // Tie breaker. + // To avoid scheduling indeterminism, we need a tie breaker + // for the case when cost is identical for two nodes. + if (UseNewerCandidate && CurrentCost == Candidate.SCost) { + if ((Q.getID() == TopQID && (*I)->NodeNum < Candidate.SU->NodeNum) + || (Q.getID() == BotQID && (*I)->NodeNum > Candidate.SU->NodeNum)) { + LLVM_DEBUG(traceCandidate("TCAND", Q, *I, CurrentCost)); + Candidate.SU = *I; + Candidate.RPDelta = RPDelta; + Candidate.SCost = CurrentCost; + FoundCandidate = NodeOrder; continue; } } @@ -833,18 +879,18 @@ SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { // Schedule as far as possible in the direction of no choice. This is most // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { - DEBUG(dbgs() << "Picked only Bottom\n"); + LLVM_DEBUG(dbgs() << "Picked only Bottom\n"); IsTopNode = false; return SU; } if (SUnit *SU = Top.pickOnlyChoice()) { - DEBUG(dbgs() << "Picked only Top\n"); + LLVM_DEBUG(dbgs() << "Picked only Top\n"); IsTopNode = true; return SU; } SchedCandidate BotCand; // Prefer bottom scheduling when heuristics are silent. - CandResult BotResult = pickNodeFromQueue(Bot.Available, + CandResult BotResult = pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); assert(BotResult != NoCand && "failed to find the first candidate"); @@ -856,40 +902,40 @@ SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) { // increase pressure for one of the excess PSets, then schedule in that // direction first to provide more freedom in the other direction. if (BotResult == SingleExcess || BotResult == SingleCritical) { - DEBUG(dbgs() << "Prefered Bottom Node\n"); + LLVM_DEBUG(dbgs() << "Prefered Bottom Node\n"); IsTopNode = false; return BotCand.SU; } // Check if the top Q has a better candidate. SchedCandidate TopCand; - CandResult TopResult = pickNodeFromQueue(Top.Available, + CandResult TopResult = pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); assert(TopResult != NoCand && "failed to find the first candidate"); if (TopResult == SingleExcess || TopResult == SingleCritical) { - DEBUG(dbgs() << "Prefered Top Node\n"); + LLVM_DEBUG(dbgs() << "Prefered Top Node\n"); IsTopNode = true; return TopCand.SU; } // If either Q has a single candidate that minimizes pressure above the // original region's pressure pick it. if (BotResult == SingleMax) { - DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n"); + LLVM_DEBUG(dbgs() << "Prefered Bottom Node SingleMax\n"); IsTopNode = false; return BotCand.SU; } if (TopResult == SingleMax) { - DEBUG(dbgs() << "Prefered Top Node SingleMax\n"); + LLVM_DEBUG(dbgs() << "Prefered Top Node SingleMax\n"); IsTopNode = true; return TopCand.SU; } if (TopCand.SCost > BotCand.SCost) { - DEBUG(dbgs() << "Prefered Top Node Cost\n"); + LLVM_DEBUG(dbgs() << "Prefered Top Node Cost\n"); IsTopNode = true; return TopCand.SU; } // Otherwise prefer the bottom candidate in node order. - DEBUG(dbgs() << "Prefered Bottom in Node order\n"); + LLVM_DEBUG(dbgs() << "Prefered Bottom in Node order\n"); IsTopNode = false; return BotCand.SU; } @@ -907,7 +953,7 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { if (!SU) { SchedCandidate TopCand; CandResult TopResult = - pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand); + pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); assert(TopResult != NoCand && "failed to find the first candidate"); (void)TopResult; SU = TopCand.SU; @@ -918,7 +964,7 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { if (!SU) { SchedCandidate BotCand; CandResult BotResult = - pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand); + pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); assert(BotResult != NoCand && "failed to find the first candidate"); (void)BotResult; SU = BotCand.SU; @@ -932,10 +978,11 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { if (SU->isBottomReady()) Bot.removeReady(SU); - DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") - << " Scheduling Instruction in cycle " - << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; - SU->dump(DAG)); + LLVM_DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") + << " Scheduling instruction in cycle " + << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << " (" + << reportPackets() << ")\n"; + SU->dump(DAG)); return SU; } @@ -945,10 +992,10 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { /// does. void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { - SU->TopReadyCycle = Top.CurrCycle; Top.bumpNode(SU); + SU->TopReadyCycle = Top.CurrCycle; } else { - SU->BotReadyCycle = Bot.CurrCycle; Bot.bumpNode(SU); + SU->BotReadyCycle = Bot.CurrCycle; } } diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h index bf7fe2d484a2..585a7858ad2b 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.h +++ b/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -49,9 +49,6 @@ class VLIWResourceModel { unsigned TotalPackets = 0; public: - /// Save the last formed packet. - std::vector<SUnit *> OldPacket; - VLIWResourceModel(const TargetSubtargetInfo &STI, const TargetSchedModel *SM) : SchedModel(SM) { ResourcesModel = STI.getInstrInfo()->CreateTargetScheduleState(STI); @@ -62,8 +59,6 @@ public: Packet.resize(SchedModel->getIssueWidth()); Packet.clear(); - OldPacket.resize(SchedModel->getIssueWidth()); - OldPacket.clear(); ResourcesModel->clearResources(); } @@ -84,9 +79,8 @@ public: ResourcesModel->clearResources(); } - bool isResourceAvailable(SUnit *SU); - bool reserveResources(SUnit *SU); - void savePacket(); + bool isResourceAvailable(SUnit *SU, bool IsTop); + bool reserveResources(SUnit *SU, bool IsTop); unsigned getTotalPackets() const { return TotalPackets; } bool isInPacket(SUnit *SU) const { return is_contained(Packet, SU); } }; @@ -102,6 +96,9 @@ public: /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's /// time to do some work. void schedule() override; + + RegisterClassInfo *getRegClassInfo() { return RegClassInfo; } + int getBBSize() { return BB->size(); } }; //===----------------------------------------------------------------------===// @@ -129,7 +126,7 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy { /// Represent the type of SchedCandidate found within a single queue. enum CandResult { NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure, - BestCost}; + BestCost, Weak}; /// Each Scheduling boundary is associated with ready queues. It tracks the /// current cycle in whichever direction at has moved, and maintains the state @@ -147,6 +144,7 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy { unsigned CurrCycle = 0; unsigned IssueCount = 0; + unsigned CriticalPathLength = 0; /// MinReadyCycle - Cycle of the soonest available instruction. unsigned MinReadyCycle = std::numeric_limits<unsigned>::max(); @@ -168,7 +166,27 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy { void init(VLIWMachineScheduler *dag, const TargetSchedModel *smodel) { DAG = dag; SchedModel = smodel; + CurrCycle = 0; IssueCount = 0; + // Initialize the critical path length limit, which used by the scheduling + // cost model to determine the value for scheduling an instruction. We use + // a slightly different heuristic for small and large functions. For small + // functions, it's important to use the height/depth of the instruction. + // For large functions, prioritizing by height or depth increases spills. + CriticalPathLength = DAG->getBBSize() / SchedModel->getIssueWidth(); + if (DAG->getBBSize() < 50) + // We divide by two as a cheap and simple heuristic to reduce the + // critcal path length, which increases the priority of using the graph + // height/depth in the scheduler's cost computation. + CriticalPathLength >>= 1; + else { + // For large basic blocks, we prefer a larger critical path length to + // decrease the priority of using the graph height/depth. + unsigned MaxPath = 0; + for (auto &SU : DAG->SUnits) + MaxPath = std::max(MaxPath, isTop() ? SU.getHeight() : SU.getDepth()); + CriticalPathLength = std::max(CriticalPathLength, MaxPath) + 1; + } } bool isTop() const { @@ -188,6 +206,13 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy { void removeReady(SUnit *SU); SUnit *pickOnlyChoice(); + + bool isLatencyBound(SUnit *SU) { + if (CurrCycle >= CriticalPathLength) + return true; + unsigned PathLength = isTop() ? SU->getHeight() : SU->getDepth(); + return CriticalPathLength - CurrCycle <= PathLength; + } }; VLIWMachineScheduler *DAG = nullptr; @@ -197,6 +222,9 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy { VLIWSchedBoundary Top; VLIWSchedBoundary Bot; + /// List of pressure sets that have a high pressure level in the region. + std::vector<bool> HighPressureSets; + public: /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) enum { @@ -217,7 +245,7 @@ public: void releaseBottomNode(SUnit *SU) override; - unsigned ReportPackets() { + unsigned reportPackets() { return Top.ResourceModel->getTotalPackets() + Bot.ResourceModel->getTotalPackets(); } @@ -225,11 +253,13 @@ public: protected: SUnit *pickNodeBidrectional(bool &IsTopNode); + int pressureChange(const SUnit *SU, bool isBotUp); + int SchedulingCost(ReadyQueue &Q, SUnit *SU, SchedCandidate &Candidate, RegPressureDelta &Delta, bool verbose); - CandResult pickNodeFromQueue(ReadyQueue &Q, + CandResult pickNodeFromQueue(VLIWSchedBoundary &Zone, const RegPressureTracker &RPTracker, SchedCandidate &Candidate); #ifndef NDEBUG diff --git a/lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td b/lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td index 718d3ac7d45a..c29a75e6fe74 100644 --- a/lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td +++ b/lib/Target/Hexagon/HexagonMapAsm2IntrinV65.gen.td @@ -7,80 +7,80 @@ // //===----------------------------------------------------------------------===// -def: Pat<(int_hexagon_A6_vcmpbeq_notany DoubleRegs:$src1, DoubleRegs:$src2), (A6_vcmpbeq_notany DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV65T]>; -def: Pat<(int_hexagon_V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vasruwuhsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vasruhubsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vasruhubrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vabsb HvxVR:$src1), (V6_vabsb HvxVR:$src1)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vabsb_128B HvxVR:$src1), (V6_vabsb HvxVR:$src1)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vabsb_sat HvxVR:$src1), (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vabsb_sat_128B HvxVR:$src1), (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vaslh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vasrh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vavguw HvxVR:$src1, HvxVR:$src2), (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vavguw_128B HvxVR:$src1, HvxVR:$src2), (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vavguwrnd HvxVR:$src1, HvxVR:$src2), (V6_vavguwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vavguwrnd_128B HvxVR:$src1, HvxVR:$src2), (V6_vavguwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vavgb HvxVR:$src1, HvxVR:$src2), (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vavgb_128B HvxVR:$src1, HvxVR:$src2), (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vavgbrnd HvxVR:$src1, HvxVR:$src2), (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vavgbrnd_128B HvxVR:$src1, HvxVR:$src2), (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vlut4 HvxVR:$src1, DoubleRegs:$src2), (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vlut4_128B HvxVR:$src1, DoubleRegs:$src2), (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vnavgb HvxVR:$src1, HvxVR:$src2), (V6_vnavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vnavgb_128B HvxVR:$src1, HvxVR:$src2), (V6_vnavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vmpabuu HvxWR:$src1, IntRegs:$src2), (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vmpabuu_128B HvxWR:$src1, IntRegs:$src2), (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3), (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vmpabuu_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3), (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vmpahhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vmpauhuhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vmpsuhuhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vmpyh_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vmpyuhe HvxVR:$src1, IntRegs:$src2), (V6_vmpyuhe HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vmpyuhe_128B HvxVR:$src1, IntRegs:$src2), (V6_vmpyuhe HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vmpyuhe_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vprefixqb HvxQR:$src1), (V6_vprefixqb HvxQR:$src1)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vprefixqb_128B HvxQR:$src1), (V6_vprefixqb HvxQR:$src1)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vprefixqh HvxQR:$src1), (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vprefixqh_128B HvxQR:$src1), (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vprefixqw HvxQR:$src1), (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vprefixqw_128B HvxQR:$src1), (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vrmpyub_rtt_128B HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vrmpyub_rtt_acc_128B HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vrmpybub_rtt_128B HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vrmpybub_rtt_acc_128B HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5), (V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vscattermw_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vscattermh_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vscattermw_add_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vscattermh_add_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vscattermwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vscattermhq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vscattermhw_128B IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vscattermhw_add_128B IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vscattermhwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5), (V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vdd0), (V6_vdd0)>, Requires<[HasV65T, UseHVX]>; -def: Pat<(int_hexagon_V6_vdd0_128B), (V6_vdd0)>, Requires<[HasV65T, UseHVX]>; +def: Pat<(int_hexagon_A6_vcmpbeq_notany DoubleRegs:$src1, DoubleRegs:$src2), (A6_vcmpbeq_notany DoubleRegs:$src1, DoubleRegs:$src2)>, Requires<[HasV65]>; +def: Pat<(int_hexagon_V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vasruwuhsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruwuhsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vasruhubsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vasruhubrndsat_128B HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3), (V6_vasruhubrndsat HvxVR:$src1, HvxVR:$src2, IntRegsLow8:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vabsb HvxVR:$src1), (V6_vabsb HvxVR:$src1)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vabsb_128B HvxVR:$src1), (V6_vabsb HvxVR:$src1)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vabsb_sat HvxVR:$src1), (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vabsb_sat_128B HvxVR:$src1), (V6_vabsb_sat HvxVR:$src1)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vaslh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vaslh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vasrh_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vasrh_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vavguw HvxVR:$src1, HvxVR:$src2), (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vavguw_128B HvxVR:$src1, HvxVR:$src2), (V6_vavguw HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vavguwrnd HvxVR:$src1, HvxVR:$src2), (V6_vavguwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vavguwrnd_128B HvxVR:$src1, HvxVR:$src2), (V6_vavguwrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vavgb HvxVR:$src1, HvxVR:$src2), (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vavgb_128B HvxVR:$src1, HvxVR:$src2), (V6_vavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vavgbrnd HvxVR:$src1, HvxVR:$src2), (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vavgbrnd_128B HvxVR:$src1, HvxVR:$src2), (V6_vavgbrnd HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vlut4 HvxVR:$src1, DoubleRegs:$src2), (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vlut4_128B HvxVR:$src1, DoubleRegs:$src2), (V6_vlut4 HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vnavgb HvxVR:$src1, HvxVR:$src2), (V6_vnavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vnavgb_128B HvxVR:$src1, HvxVR:$src2), (V6_vnavgb HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpabuu HvxWR:$src1, IntRegs:$src2), (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpabuu_128B HvxWR:$src1, IntRegs:$src2), (V6_vmpabuu HvxWR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3), (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpabuu_acc_128B HvxWR:$src1, HvxWR:$src2, IntRegs:$src3), (V6_vmpabuu_acc HvxWR:$src1, HvxWR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpahhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpahhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpauhuhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpauhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpsuhuhsat_128B HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vmpsuhuhsat HvxVR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpyh_acc_128B HvxWR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyh_acc HvxWR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpyuhe HvxVR:$src1, IntRegs:$src2), (V6_vmpyuhe HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpyuhe_128B HvxVR:$src1, IntRegs:$src2), (V6_vmpyuhe HvxVR:$src1, IntRegs:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vmpyuhe_acc_128B HvxVR:$src1, HvxVR:$src2, IntRegs:$src3), (V6_vmpyuhe_acc HvxVR:$src1, HvxVR:$src2, IntRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vprefixqb HvxQR:$src1), (V6_vprefixqb HvxQR:$src1)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vprefixqb_128B HvxQR:$src1), (V6_vprefixqb HvxQR:$src1)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vprefixqh HvxQR:$src1), (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vprefixqh_128B HvxQR:$src1), (V6_vprefixqh HvxQR:$src1)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vprefixqw HvxQR:$src1), (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vprefixqw_128B HvxQR:$src1), (V6_vprefixqw HvxQR:$src1)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vrmpyub_rtt_128B HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpyub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vrmpyub_rtt_acc_128B HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpyub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vrmpybub_rtt_128B HvxVR:$src1, DoubleRegs:$src2), (V6_vrmpybub_rtt HvxVR:$src1, DoubleRegs:$src2)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vrmpybub_rtt_acc_128B HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3), (V6_vrmpybub_rtt_acc HvxWR:$src1, HvxVR:$src2, DoubleRegs:$src3)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5), (V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermw_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermh_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermw_add_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermw_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermh_add_128B IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4), (V6_vscattermh_add IntRegs:$src1, ModRegs:$src2, HvxVR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermhq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5), (V6_vscattermhq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxVR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermhw_128B IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermhw_add_128B IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4), (V6_vscattermhw_add IntRegs:$src1, ModRegs:$src2, HvxWR:$src3, HvxVR:$src4)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vscattermhwq_128B HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5), (V6_vscattermhwq HvxQR:$src1, IntRegs:$src2, ModRegs:$src3, HvxWR:$src4, HvxVR:$src5)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vdd0), (V6_vdd0)>, Requires<[HasV65, UseHVX]>; +def: Pat<(int_hexagon_V6_vdd0_128B), (V6_vdd0)>, Requires<[HasV65, UseHVX]>; diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index ffa447cc1311..f2a6627c99be 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -16,7 +16,7 @@ // The basic approach looks for sequence of predicated jump, compare instruciton // that genereates the predicate and, the feeder to the predicate. Once it finds -// all, it collapses compare and jump instruction into a new valu jump +// all, it collapses compare and jump instruction into a new value jump // intstructions. // //===----------------------------------------------------------------------===// @@ -24,6 +24,7 @@ #include "Hexagon.h" #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -95,7 +96,7 @@ namespace { const HexagonInstrInfo *QII; const HexagonRegisterInfo *QRI; - /// \brief A handle to the branch probability pass. + /// A handle to the branch probability pass. const MachineBranchProbabilityInfo *MBPI; bool isNewValueJumpCandidate(const MachineInstr &MI) const; @@ -142,8 +143,24 @@ static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII, if (QII->isSolo(*II)) return false; - // Make sure there there is no 'def' or 'use' of any of the uses of - // feeder insn between it's definition, this MI and jump, jmpInst + if (QII->isFloat(*II)) + return false; + + // Make sure that the (unique) def operand is a register from IntRegs. + bool HadDef = false; + for (const MachineOperand &Op : II->operands()) { + if (!Op.isReg() || !Op.isDef()) + continue; + if (HadDef) + return false; + HadDef = true; + if (!Hexagon::IntRegsRegClass.contains(Op.getReg())) + return false; + } + assert(HadDef); + + // Make sure there is no 'def' or 'use' of any of the uses of + // feeder insn between its definition, this MI and jump, jmpInst // skipping compare, cmpInst. // Here's the example. // r21=memub(r22+r24<<#0) @@ -270,8 +287,8 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, if (cmpReg1 == cmpOp2) return false; - // Make sure that that second register is not from COPY - // At machine code level, we don't need this, but if we decide + // Make sure that the second register is not from COPY + // at machine code level, we don't need this, but if we decide // to move new value jump prior to RA, we would be needing this. MachineRegisterInfo &MRI = MF.getRegInfo(); if (secondReg && !TargetRegisterInfo::isPhysicalRegister(cmpOp2)) { @@ -285,7 +302,7 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, // and satisfy the following conditions. ++II; for (MachineBasicBlock::iterator localII = II; localII != end; ++localII) { - if (localII->isDebugValue()) + if (localII->isDebugInstr()) continue; // Check 1. @@ -431,8 +448,8 @@ bool HexagonNewValueJump::isNewValueJumpCandidate( } bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n" - << "********** Function: " << MF.getName() << "\n"); + LLVM_DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n" + << "********** Function: " << MF.getName() << "\n"); if (skipFunction(MF.getFunction())) return false; @@ -445,9 +462,9 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { MF.getSubtarget().getRegisterInfo()); MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); - if (DisableNewValueJumps) { + if (DisableNewValueJumps || + !MF.getSubtarget<HexagonSubtarget>().useNewValueJumps()) return false; - } int nvjCount = DbgNVJCount; int nvjGenerated = 0; @@ -457,9 +474,10 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { MBBb != MBBe; ++MBBb) { MachineBasicBlock *MBB = &*MBBb; - DEBUG(dbgs() << "** dumping bb ** " << MBB->getNumber() << "\n"); - DEBUG(MBB->dump()); - DEBUG(dbgs() << "\n" << "********** dumping instr bottom up **********\n"); + LLVM_DEBUG(dbgs() << "** dumping bb ** " << MBB->getNumber() << "\n"); + LLVM_DEBUG(MBB->dump()); + LLVM_DEBUG(dbgs() << "\n" + << "********** dumping instr bottom up **********\n"); bool foundJump = false; bool foundCompare = false; bool invertPredicate = false; @@ -477,14 +495,14 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { for (MachineBasicBlock::iterator MII = MBB->end(), E = MBB->begin(); MII != E;) { MachineInstr &MI = *--MII; - if (MI.isDebugValue()) { + if (MI.isDebugInstr()) { continue; } if ((nvjCount == 0) || (nvjCount > -1 && nvjCount <= nvjGenerated)) break; - DEBUG(dbgs() << "Instr: "; MI.dump(); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Instr: "; MI.dump(); dbgs() << "\n"); if (!foundJump && (MI.getOpcode() == Hexagon::J2_jumpt || MI.getOpcode() == Hexagon::J2_jumptpt || @@ -505,7 +523,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { // operands, the following check on the kill flag would suffice. // if(!jmpInstr->getOperand(0).isKill()) break; - // This predicate register is live out out of BB + // This predicate register is live out of BB // this would only work if we can actually use Live // variable analysis on phy regs - but LLVM does not // provide LV analysis on phys regs. diff --git a/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/lib/Target/Hexagon/HexagonOptAddrMode.cpp index 4738a4d32409..29c044b3b729 100644 --- a/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ b/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Pass.h" @@ -78,7 +79,9 @@ private: using MISetType = DenseSet<MachineInstr *>; using InstrEvalMap = DenseMap<MachineInstr *, bool>; + MachineRegisterInfo *MRI = nullptr; const HexagonInstrInfo *HII = nullptr; + const HexagonRegisterInfo *HRI = nullptr; MachineDominatorTree *MDT = nullptr; DataFlowGraph *DFG = nullptr; DataFlowGraph::DefStackMap DefM; @@ -88,11 +91,16 @@ private: bool processBlock(NodeAddr<BlockNode *> BA); bool xformUseMI(MachineInstr *TfrMI, MachineInstr *UseMI, NodeAddr<UseNode *> UseN, unsigned UseMOnum); + bool processAddUses(NodeAddr<StmtNode *> AddSN, MachineInstr *AddMI, + const NodeList &UNodeList); + bool updateAddUses(MachineInstr *AddMI, MachineInstr *UseMI); bool analyzeUses(unsigned DefR, const NodeList &UNodeList, InstrEvalMap &InstrEvalResult, short &SizeInc); bool hasRepForm(MachineInstr &MI, unsigned TfrDefR); bool canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN, MachineInstr &MI, const NodeList &UNodeList); + bool isSafeToExtLR(NodeAddr<StmtNode *> SN, MachineInstr *MI, + unsigned LRExtReg, const NodeList &UNodeList); void getAllRealUses(NodeAddr<StmtNode *> SN, NodeList &UNodeList); bool allValidCandidates(NodeAddr<StmtNode *> SA, NodeList &UNodeList); short getBaseWithLongOffset(const MachineInstr &MI) const; @@ -101,6 +109,7 @@ private: bool changeLoad(MachineInstr *OldMI, MachineOperand ImmOp, unsigned ImmOpNum); bool changeAddAsl(NodeAddr<UseNode *> AddAslUN, MachineInstr *AddAslMI, const MachineOperand &ImmOp, unsigned ImmOpNum); + bool isValidOffset(MachineInstr *MI, int Offset); }; } // end anonymous namespace @@ -208,7 +217,7 @@ bool HexagonOptAddrMode::allValidCandidates(NodeAddr<StmtNode *> SA, NodeSet Visited, Defs; const auto &P = LV->getAllReachingDefsRec(UR, UN, Visited, Defs); if (!P.second) { - DEBUG({ + LLVM_DEBUG({ dbgs() << "*** Unable to collect all reaching defs for use ***\n" << PrintNode<UseNode*>(UN, *DFG) << '\n' << "The program's complexity may exceed the limits.\n"; @@ -217,7 +226,7 @@ bool HexagonOptAddrMode::allValidCandidates(NodeAddr<StmtNode *> SA, } const auto &ReachingDefs = P.first; if (ReachingDefs.size() > 1) { - DEBUG({ + LLVM_DEBUG({ dbgs() << "*** Multiple Reaching Defs found!!! ***\n"; for (auto DI : ReachingDefs) { NodeAddr<UseNode *> DA = DFG->addr<UseNode *>(DI); @@ -235,15 +244,15 @@ bool HexagonOptAddrMode::allValidCandidates(NodeAddr<StmtNode *> SA, void HexagonOptAddrMode::getAllRealUses(NodeAddr<StmtNode *> SA, NodeList &UNodeList) { for (NodeAddr<DefNode *> DA : SA.Addr->members_if(DFG->IsDef, *DFG)) { - DEBUG(dbgs() << "\t\t[DefNode]: " << Print<NodeAddr<DefNode *>>(DA, *DFG) - << "\n"); + LLVM_DEBUG(dbgs() << "\t\t[DefNode]: " + << Print<NodeAddr<DefNode *>>(DA, *DFG) << "\n"); RegisterRef DR = DFG->getPRI().normalize(DA.Addr->getRegRef(*DFG)); auto UseSet = LV->getAllReachedUses(DR, DA); for (auto UI : UseSet) { NodeAddr<UseNode *> UA = DFG->addr<UseNode *>(UI); - DEBUG({ + LLVM_DEBUG({ NodeAddr<StmtNode *> TempIA = UA.Addr->getOwner(*DFG); dbgs() << "\t\t\t[Reached Use]: " << Print<NodeAddr<InstrNode *>>(TempIA, *DFG) << "\n"; @@ -253,8 +262,8 @@ void HexagonOptAddrMode::getAllRealUses(NodeAddr<StmtNode *> SA, NodeAddr<PhiNode *> PA = UA.Addr->getOwner(*DFG); NodeId id = PA.Id; const Liveness::RefMap &phiUse = LV->getRealUses(id); - DEBUG(dbgs() << "\t\t\t\tphi real Uses" - << Print<Liveness::RefMap>(phiUse, *DFG) << "\n"); + LLVM_DEBUG(dbgs() << "\t\t\t\tphi real Uses" + << Print<Liveness::RefMap>(phiUse, *DFG) << "\n"); if (!phiUse.empty()) { for (auto I : phiUse) { if (!DFG->getPRI().alias(RegisterRef(I.first), DR)) @@ -272,6 +281,153 @@ void HexagonOptAddrMode::getAllRealUses(NodeAddr<StmtNode *> SA, } } +bool HexagonOptAddrMode::isSafeToExtLR(NodeAddr<StmtNode *> SN, + MachineInstr *MI, unsigned LRExtReg, + const NodeList &UNodeList) { + RegisterRef LRExtRR; + NodeId LRExtRegRD = 0; + // Iterate through all the UseNodes in SN and find the reaching def + // for the LRExtReg. + for (NodeAddr<UseNode *> UA : SN.Addr->members_if(DFG->IsUse, *DFG)) { + RegisterRef RR = UA.Addr->getRegRef(*DFG); + if (LRExtReg == RR.Reg) { + LRExtRR = RR; + LRExtRegRD = UA.Addr->getReachingDef(); + } + } + + for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) { + NodeAddr<UseNode *> UA = *I; + NodeAddr<InstrNode *> IA = UA.Addr->getOwner(*DFG); + // The reaching def of LRExtRR at load/store node should be same as the + // one reaching at the SN. + if (UA.Addr->getFlags() & NodeAttrs::PhiRef) + return false; + NodeAddr<RefNode*> AA = LV->getNearestAliasedRef(LRExtRR, IA); + if ((DFG->IsDef(AA) && AA.Id != LRExtRegRD) || + AA.Addr->getReachingDef() != LRExtRegRD) { + LLVM_DEBUG( + dbgs() << "isSafeToExtLR: Returning false; another reaching def\n"); + return false; + } + + MachineInstr *UseMI = NodeAddr<StmtNode *>(IA).Addr->getCode(); + NodeAddr<DefNode *> LRExtRegDN = DFG->addr<DefNode *>(LRExtRegRD); + // Reaching Def to LRExtReg can't be a phi. + if ((LRExtRegDN.Addr->getFlags() & NodeAttrs::PhiRef) && + MI->getParent() != UseMI->getParent()) + return false; + } + return true; +} + +bool HexagonOptAddrMode::isValidOffset(MachineInstr *MI, int Offset) { + unsigned AlignMask = 0; + switch (HII->getMemAccessSize(*MI)) { + case HexagonII::MemAccessSize::DoubleWordAccess: + AlignMask = 0x7; + break; + case HexagonII::MemAccessSize::WordAccess: + AlignMask = 0x3; + break; + case HexagonII::MemAccessSize::HalfWordAccess: + AlignMask = 0x1; + break; + case HexagonII::MemAccessSize::ByteAccess: + AlignMask = 0x0; + break; + default: + return false; + } + + if ((AlignMask & Offset) != 0) + return false; + return HII->isValidOffset(MI->getOpcode(), Offset, HRI, false); +} + +bool HexagonOptAddrMode::processAddUses(NodeAddr<StmtNode *> AddSN, + MachineInstr *AddMI, + const NodeList &UNodeList) { + + unsigned AddDefR = AddMI->getOperand(0).getReg(); + for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) { + NodeAddr<UseNode *> UN = *I; + NodeAddr<StmtNode *> SN = UN.Addr->getOwner(*DFG); + MachineInstr *MI = SN.Addr->getCode(); + const MCInstrDesc &MID = MI->getDesc(); + if ((!MID.mayLoad() && !MID.mayStore()) || + HII->getAddrMode(*MI) != HexagonII::BaseImmOffset || + HII->isHVXVec(*MI)) + return false; + + MachineOperand BaseOp = MID.mayLoad() ? MI->getOperand(1) + : MI->getOperand(0); + + if (!BaseOp.isReg() || BaseOp.getReg() != AddDefR) + return false; + + MachineOperand OffsetOp = MID.mayLoad() ? MI->getOperand(2) + : MI->getOperand(1); + if (!OffsetOp.isImm()) + return false; + + int64_t newOffset = OffsetOp.getImm() + AddMI->getOperand(2).getImm(); + if (!isValidOffset(MI, newOffset)) + return false; + + // Since we'll be extending the live range of Rt in the following example, + // make sure that is safe. another definition of Rt doesn't exist between 'add' + // and load/store instruction. + // + // Ex: Rx= add(Rt,#10) + // memw(Rx+#0) = Rs + // will be replaced with => memw(Rt+#10) = Rs + unsigned BaseReg = AddMI->getOperand(1).getReg(); + if (!isSafeToExtLR(AddSN, AddMI, BaseReg, UNodeList)) + return false; + } + + // Update all the uses of 'add' with the appropriate base and offset + // values. + bool Changed = false; + for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) { + NodeAddr<UseNode *> UseN = *I; + assert(!(UseN.Addr->getFlags() & NodeAttrs::PhiRef) && + "Found a PhiRef node as a real reached use!!"); + + NodeAddr<StmtNode *> OwnerN = UseN.Addr->getOwner(*DFG); + MachineInstr *UseMI = OwnerN.Addr->getCode(); + LLVM_DEBUG(dbgs() << "\t\t[MI <BB#" << UseMI->getParent()->getNumber() + << ">]: " << *UseMI << "\n"); + Changed |= updateAddUses(AddMI, UseMI); + } + + if (Changed) + Deleted.insert(AddMI); + + return Changed; +} + +bool HexagonOptAddrMode::updateAddUses(MachineInstr *AddMI, + MachineInstr *UseMI) { + const MachineOperand ImmOp = AddMI->getOperand(2); + const MachineOperand AddRegOp = AddMI->getOperand(1); + unsigned newReg = AddRegOp.getReg(); + const MCInstrDesc &MID = UseMI->getDesc(); + + MachineOperand &BaseOp = MID.mayLoad() ? UseMI->getOperand(1) + : UseMI->getOperand(0); + MachineOperand &OffsetOp = MID.mayLoad() ? UseMI->getOperand(2) + : UseMI->getOperand(1); + BaseOp.setReg(newReg); + BaseOp.setIsUndef(AddRegOp.isUndef()); + BaseOp.setImplicit(AddRegOp.isImplicit()); + OffsetOp.setImm(ImmOp.getImm() + OffsetOp.getImm()); + MRI->clearKillFlags(newReg); + + return true; +} + bool HexagonOptAddrMode::analyzeUses(unsigned tfrDefR, const NodeList &UNodeList, InstrEvalMap &InstrEvalResult, @@ -296,7 +452,7 @@ bool HexagonOptAddrMode::analyzeUses(unsigned tfrDefR, } else if (MI.getOpcode() == Hexagon::S2_addasl_rrri) { NodeList AddaslUseList; - DEBUG(dbgs() << "\nGetting ReachedUses for === " << MI << "\n"); + LLVM_DEBUG(dbgs() << "\nGetting ReachedUses for === " << MI << "\n"); getAllRealUses(SN, AddaslUseList); // Process phi nodes. if (allValidCandidates(SN, AddaslUseList) && @@ -360,8 +516,8 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp, } else Changed = false; - DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n"); - DEBUG(dbgs() << "[TO]: " << *MIB << "\n"); + LLVM_DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n"); + LLVM_DEBUG(dbgs() << "[TO]: " << *MIB << "\n"); } else if (ImmOpNum == 2 && OldMI->getOperand(3).getImm() == 0) { short NewOpCode = HII->changeAddrMode_rr_io(*OldMI); assert(NewOpCode >= 0 && "Invalid New opcode\n"); @@ -371,8 +527,8 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp, MIB.add(ImmOp); OpStart = 4; Changed = true; - DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n"); - DEBUG(dbgs() << "[TO]: " << *MIB << "\n"); + LLVM_DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n"); + LLVM_DEBUG(dbgs() << "[TO]: " << *MIB << "\n"); } if (Changed) @@ -413,8 +569,8 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp, OpStart = 3; } Changed = true; - DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n"); - DEBUG(dbgs() << "[TO]: " << *MIB << "\n"); + LLVM_DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n"); + LLVM_DEBUG(dbgs() << "[TO]: " << *MIB << "\n"); } else if (ImmOpNum == 1 && OldMI->getOperand(2).getImm() == 0) { short NewOpCode = HII->changeAddrMode_rr_io(*OldMI); assert(NewOpCode >= 0 && "Invalid New opcode\n"); @@ -423,8 +579,8 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp, MIB.add(ImmOp); OpStart = 3; Changed = true; - DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n"); - DEBUG(dbgs() << "[TO]: " << *MIB << "\n"); + LLVM_DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n"); + LLVM_DEBUG(dbgs() << "[TO]: " << *MIB << "\n"); } if (Changed) for (unsigned i = OpStart; i < OpEnd; ++i) @@ -447,7 +603,7 @@ bool HexagonOptAddrMode::changeAddAsl(NodeAddr<UseNode *> AddAslUN, unsigned ImmOpNum) { NodeAddr<StmtNode *> SA = AddAslUN.Addr->getOwner(*DFG); - DEBUG(dbgs() << "Processing addasl :" << *AddAslMI << "\n"); + LLVM_DEBUG(dbgs() << "Processing addasl :" << *AddAslMI << "\n"); NodeList UNodeList; getAllRealUses(SA, UNodeList); @@ -458,11 +614,11 @@ bool HexagonOptAddrMode::changeAddAsl(NodeAddr<UseNode *> AddAslUN, "Can't transform this 'AddAsl' instruction!"); NodeAddr<StmtNode *> UseIA = UseUN.Addr->getOwner(*DFG); - DEBUG(dbgs() << "[InstrNode]: " << Print<NodeAddr<InstrNode *>>(UseIA, *DFG) - << "\n"); + LLVM_DEBUG(dbgs() << "[InstrNode]: " + << Print<NodeAddr<InstrNode *>>(UseIA, *DFG) << "\n"); MachineInstr *UseMI = UseIA.Addr->getCode(); - DEBUG(dbgs() << "[MI <" << printMBBReference(*UseMI->getParent()) - << ">]: " << *UseMI << "\n"); + LLVM_DEBUG(dbgs() << "[MI <" << printMBBReference(*UseMI->getParent()) + << ">]: " << *UseMI << "\n"); const MCInstrDesc &UseMID = UseMI->getDesc(); assert(HII->getAddrMode(*UseMI) == HexagonII::BaseImmOffset); @@ -534,13 +690,15 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) { NodeAddr<StmtNode *> SA = IA; MachineInstr *MI = SA.Addr->getCode(); - if (MI->getOpcode() != Hexagon::A2_tfrsi || - !MI->getOperand(1).isGlobal()) - continue; + if ((MI->getOpcode() != Hexagon::A2_tfrsi || + !MI->getOperand(1).isGlobal()) && + (MI->getOpcode() != Hexagon::A2_addi || + !MI->getOperand(2).isImm() || HII->isConstExtended(*MI))) + continue; - DEBUG(dbgs() << "[Analyzing " << HII->getName(MI->getOpcode()) << "]: " - << *MI << "\n\t[InstrNode]: " - << Print<NodeAddr<InstrNode *>>(IA, *DFG) << '\n'); + LLVM_DEBUG(dbgs() << "[Analyzing " << HII->getName(MI->getOpcode()) + << "]: " << *MI << "\n\t[InstrNode]: " + << Print<NodeAddr<InstrNode *>>(IA, *DFG) << '\n'); NodeList UNodeList; getAllRealUses(SA, UNodeList); @@ -548,6 +706,21 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) { if (!allValidCandidates(SA, UNodeList)) continue; + // Analyze all uses of 'add'. If the output of 'add' is used as an address + // in the base+immediate addressing mode load/store instructions, see if + // they can be updated to use the immediate value as an offet. Thus, + // providing us the opportunity to eliminate 'add'. + // Ex: Rx= add(Rt,#12) + // memw(Rx+#0) = Rs + // This can be replaced with memw(Rt+#12) = Rs + // + // This transformation is only performed if all uses can be updated and + // the offset isn't required to be constant extended. + if (MI->getOpcode() == Hexagon::A2_addi) { + Changed |= processAddUses(SA, MI, UNodeList); + continue; + } + short SizeInc = 0; unsigned DefR = MI->getOperand(0).getReg(); InstrEvalMap InstrEvalResult; @@ -561,8 +734,9 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) { bool KeepTfr = false; - DEBUG(dbgs() << "\t[Total reached uses] : " << UNodeList.size() << "\n"); - DEBUG(dbgs() << "\t[Processing Reached Uses] ===\n"); + LLVM_DEBUG(dbgs() << "\t[Total reached uses] : " << UNodeList.size() + << "\n"); + LLVM_DEBUG(dbgs() << "\t[Processing Reached Uses] ===\n"); for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) { NodeAddr<UseNode *> UseN = *I; assert(!(UseN.Addr->getFlags() & NodeAttrs::PhiRef) && @@ -570,8 +744,8 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) { NodeAddr<StmtNode *> OwnerN = UseN.Addr->getOwner(*DFG); MachineInstr *UseMI = OwnerN.Addr->getCode(); - DEBUG(dbgs() << "\t\t[MI <" << printMBBReference(*UseMI->getParent()) - << ">]: " << *UseMI << "\n"); + LLVM_DEBUG(dbgs() << "\t\t[MI <" << printMBBReference(*UseMI->getParent()) + << ">]: " << *UseMI << "\n"); int UseMOnum = -1; unsigned NumOperands = UseMI->getNumOperands(); @@ -580,9 +754,11 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) { if (op.isReg() && op.isUse() && DefR == op.getReg()) UseMOnum = j; } - assert(UseMOnum >= 0 && "Invalid reached use!"); + // It is possible that the register will not be found in any operand. + // This could happen, for example, when DefR = R4, but the used + // register is D2. - if (InstrEvalResult[UseMI]) + if (UseMOnum >= 0 && InstrEvalResult[UseMI]) // Change UseMI if replacement is possible. Changed |= xformUseMI(MI, UseMI, UseN, UseMOnum); else @@ -600,27 +776,27 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; auto &HST = MF.getSubtarget<HexagonSubtarget>(); - auto &MRI = MF.getRegInfo(); + MRI = &MF.getRegInfo(); HII = HST.getInstrInfo(); + HRI = HST.getRegisterInfo(); const auto &MDF = getAnalysis<MachineDominanceFrontier>(); MDT = &getAnalysis<MachineDominatorTree>(); - const auto &TRI = *MF.getSubtarget().getRegisterInfo(); const TargetOperandInfo TOI(*HII); - DataFlowGraph G(MF, *HII, TRI, *MDT, MDF, TOI); + DataFlowGraph G(MF, *HII, *HRI, *MDT, MDF, TOI); // Need to keep dead phis because we can propagate uses of registers into // nodes dominated by those would-be phis. G.build(BuildOptions::KeepDeadPhis); DFG = &G; - Liveness L(MRI, *DFG); + Liveness L(*MRI, *DFG); L.computePhiInfo(); LV = &L; Deleted.clear(); NodeAddr<FuncNode *> FA = DFG->getFunc(); - DEBUG(dbgs() << "==== [RefMap#]=====:\n " - << Print<NodeAddr<FuncNode *>>(FA, *DFG) << "\n"); + LLVM_DEBUG(dbgs() << "==== [RefMap#]=====:\n " + << Print<NodeAddr<FuncNode *>>(FA, *DFG) << "\n"); for (NodeAddr<BlockNode *> BA : FA.Addr->members(*DFG)) Changed |= processBlock(BA); diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td index cdc2085986a5..384fda4ce39a 100644 --- a/lib/Target/Hexagon/HexagonPatterns.td +++ b/lib/Target/Hexagon/HexagonPatterns.td @@ -100,6 +100,17 @@ def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>; def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>; def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>; +def SDTVecVecIntOp: + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>, + SDTCisVT<3,i32>]>; + +def HexagonVALIGN: SDNode<"HexagonISD::VALIGN", SDTVecVecIntOp>; +def HexagonVALIGNADDR: SDNode<"HexagonISD::VALIGNADDR", SDTIntUnaryOp>; + +def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru), + (HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>; +def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>; + // Pattern fragments to extract the low and high subregisters from a // 64-bit value. def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>; @@ -109,16 +120,6 @@ def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{ return isOrEquivalentToAdd(N); }]>; -def IsVecOff : PatLeaf<(i32 imm), [{ - int32_t V = N->getSExtValue(); - int32_t VecSize = HRI->getSpillSize(Hexagon::HvxVRRegClass); - assert(isPowerOf2_32(VecSize)); - if ((uint32_t(V) & (uint32_t(VecSize)-1)) != 0) - return false; - int32_t L = Log2_32(VecSize); - return isInt<4>(V >> L); -}]>; - def IsPow2_32: PatLeaf<(i32 imm), [{ uint32_t V = N->getZExtValue(); return isPowerOf2_32(V); @@ -214,7 +215,7 @@ def NegImm32: SDNodeXForm<imm, [{ // Helpers for type promotions/contractions. def I1toI32: OutPatFrag<(ops node:$Rs), (C2_muxii (i1 $Rs), 1, 0)>; -def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_tfrrp (i32 $Rs)))>; +def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_cmpgtui (i32 $Rs), (i32 0)))>; def ToZext64: OutPatFrag<(ops node:$Rs), (i64 (A4_combineir 0, (i32 $Rs)))>; def ToSext64: OutPatFrag<(ops node:$Rs), (i64 (A2_sxtw (i32 $Rs)))>; @@ -249,23 +250,6 @@ def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off), (PS_fi (i32 AddrFI:$Rs), imm:$off)>; -def alignedload: PatFrag<(ops node:$a), (load $a), [{ - return isAlignedMemNode(dyn_cast<MemSDNode>(N)); -}]>; - -def unalignedload: PatFrag<(ops node:$a), (load $a), [{ - return !isAlignedMemNode(dyn_cast<MemSDNode>(N)); -}]>; - -def alignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ - return isAlignedMemNode(dyn_cast<MemSDNode>(N)); -}]>; - -def unalignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ - return !isAlignedMemNode(dyn_cast<MemSDNode>(N)); -}]>; - - // Converters from unary/binary SDNode to PatFrag. class pf1<SDNode Op> : PatFrag<(ops node:$a), (Op node:$a)>; class pf2<SDNode Op> : PatFrag<(ops node:$a, node:$b), (Op node:$a, node:$b)>; @@ -274,7 +258,7 @@ class Not2<PatFrag P> : PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>; class Su<PatFrag Op> - : PatFrag<Op.Operands, Op.Fragment, [{ return hasOneUse(N); }], + : PatFrag<Op.Operands, !head(Op.Fragments), [{ return hasOneUse(N); }], Op.OperandTransform>; // Main selection macros. @@ -298,9 +282,9 @@ class AccRRI_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op, (MI RegPred:$Rx, RegPred:$Rs, imm:$I)>; class AccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op, - PatFrag RsPred, PatFrag RtPred> - : Pat<(AccOp RsPred:$Rx, (Op RsPred:$Rs, RtPred:$Rt)), - (MI RsPred:$Rx, RsPred:$Rs, RtPred:$Rt)>; + PatFrag RxPred, PatFrag RsPred, PatFrag RtPred> + : Pat<(AccOp RxPred:$Rx, (Op RsPred:$Rs, RtPred:$Rt)), + (MI RxPred:$Rx, RsPred:$Rs, RtPred:$Rt)>; multiclass SelMinMax_pats<PatFrag CmpOp, PatFrag Val, InstHexagon InstA, InstHexagon InstB> { @@ -316,6 +300,7 @@ def Add: pf2<add>; def And: pf2<and>; def Sra: pf2<sra>; def Sub: pf2<sub>; def Or: pf2<or>; def Srl: pf2<srl>; def Mul: pf2<mul>; def Xor: pf2<xor>; def Shl: pf2<shl>; +def Rol: pf2<rotl>; // --(1) Immediate ------------------------------------------------------- // @@ -363,7 +348,7 @@ def ToI32: OutPatFrag<(ops node:$V), (A2_tfrsi $V)>; // --(2) Type cast ------------------------------------------------------- // -let Predicates = [HasV5T] in { +let Predicates = [HasV5] in { def: OpR_R_pat<F2_conv_sf2df, pf1<fpextend>, f64, F32>; def: OpR_R_pat<F2_conv_df2sf, pf1<fpround>, f32, F64>; @@ -389,7 +374,7 @@ let Predicates = [HasV5T] in { } // Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. -let Predicates = [HasV5T] in { +let Predicates = [HasV5] in { def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>; def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>; def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>; @@ -422,9 +407,14 @@ def: Pat<(i64 (sext I1:$Pu)), (Combinew (C2_muxii PredRegs:$Pu, -1, 0), (C2_muxii PredRegs:$Pu, -1, 0))>; -def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>; -def: Pat<(i32 (zext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; -def: Pat<(i64 (zext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; +def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>; +def: Pat<(i32 (zext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; +def: Pat<(i64 (zext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; +def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>; +def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>; +def: Pat<(v4i8 (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>; +def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>; +def: Pat<(v8i8 (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>; def: Pat<(i64 (sext I32:$Rs)), (A2_sxtw I32:$Rs)>; def: Pat<(Zext64 I32:$Rs), (ToZext64 $Rs)>; @@ -441,6 +431,20 @@ let AddedComplexity = 20 in { def: Pat<(i32 (anyext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; def: Pat<(i64 (anyext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; +def Vsplatpi: OutPatFrag<(ops node:$V), + (Combinew (A2_tfrsi $V), (A2_tfrsi $V))>; +def: Pat<(v8i8 (zext V8I1:$Pu)), + (A2_andp (C2_mask V8I1:$Pu), (Vsplatpi (i32 0x01010101)))>; +def: Pat<(v4i16 (zext V4I1:$Pu)), + (A2_andp (C2_mask V4I1:$Pu), (Vsplatpi (i32 0x00010001)))>; +def: Pat<(v2i32 (zext V2I1:$Pu)), + (A2_andp (C2_mask V2I1:$Pu), (A2_combineii (i32 1), (i32 1)))>; + +def: Pat<(v4i8 (zext V4I1:$Pu)), + (A2_andir (LoReg (C2_mask V4I1:$Pu)), (i32 0x01010101))>; +def: Pat<(v2i16 (zext V2I1:$Pu)), + (A2_andir (LoReg (C2_mask V2I1:$Pu)), (i32 0x00010001))>; + def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; @@ -475,25 +479,40 @@ def: Pat<(v2i16 (trunc V2I32:$Rs)), // def: Pat<(not I1:$Ps), (C2_not I1:$Ps)>; +def: Pat<(not V8I1:$Ps), (C2_not V8I1:$Ps)>; def: Pat<(add I1:$Ps, -1), (C2_not I1:$Ps)>; -def: OpR_RR_pat<C2_and, And, i1, I1>; -def: OpR_RR_pat<C2_or, Or, i1, I1>; -def: OpR_RR_pat<C2_xor, Xor, i1, I1>; -def: OpR_RR_pat<C2_andn, Not2<And>, i1, I1>; -def: OpR_RR_pat<C2_orn, Not2<Or>, i1, I1>; +multiclass BoolOpR_RR_pat<InstHexagon MI, PatFrag Op> { + def: OpR_RR_pat<MI, Op, i1, I1>; + def: OpR_RR_pat<MI, Op, v2i1, V2I1>; + def: OpR_RR_pat<MI, Op, v4i1, V4I1>; + def: OpR_RR_pat<MI, Op, v8i1, V8I1>; +} + +multiclass BoolAccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op> { + def: AccRRR_pat<MI, AccOp, Op, I1, I1, I1>; + def: AccRRR_pat<MI, AccOp, Op, V2I1, V2I1, V2I1>; + def: AccRRR_pat<MI, AccOp, Op, V4I1, V4I1, V4I1>; + def: AccRRR_pat<MI, AccOp, Op, V8I1, V8I1, V8I1>; +} + +defm: BoolOpR_RR_pat<C2_and, And>; +defm: BoolOpR_RR_pat<C2_or, Or>; +defm: BoolOpR_RR_pat<C2_xor, Xor>; +defm: BoolOpR_RR_pat<C2_andn, Not2<And>>; +defm: BoolOpR_RR_pat<C2_orn, Not2<Or>>; // op(Ps, op(Pt, Pu)) -def: AccRRR_pat<C4_and_and, And, Su<And>, I1, I1>; -def: AccRRR_pat<C4_and_or, And, Su<Or>, I1, I1>; -def: AccRRR_pat<C4_or_and, Or, Su<And>, I1, I1>; -def: AccRRR_pat<C4_or_or, Or, Su<Or>, I1, I1>; +defm: BoolAccRRR_pat<C4_and_and, And, Su<And>>; +defm: BoolAccRRR_pat<C4_and_or, And, Su<Or>>; +defm: BoolAccRRR_pat<C4_or_and, Or, Su<And>>; +defm: BoolAccRRR_pat<C4_or_or, Or, Su<Or>>; // op(Ps, op(Pt, ~Pu)) -def: AccRRR_pat<C4_and_andn, And, Su<Not2<And>>, I1, I1>; -def: AccRRR_pat<C4_and_orn, And, Su<Not2<Or>>, I1, I1>; -def: AccRRR_pat<C4_or_andn, Or, Su<Not2<And>>, I1, I1>; -def: AccRRR_pat<C4_or_orn, Or, Su<Not2<Or>>, I1, I1>; +defm: BoolAccRRR_pat<C4_and_andn, And, Su<Not2<And>>>; +defm: BoolAccRRR_pat<C4_and_orn, And, Su<Not2<Or>>>; +defm: BoolAccRRR_pat<C4_or_andn, Or, Su<Not2<And>>>; +defm: BoolAccRRR_pat<C4_or_orn, Or, Su<Not2<Or>>>; // --(5) Compare --------------------------------------------------------- @@ -519,7 +538,7 @@ def: Pat<(i1 (setult I32:$Rs, u32_0ImmPred:$u9)), // Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones // that reverse the order of the operands. class RevCmp<PatFrag F> - : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment, F.PredicateCode, + : PatFrag<(ops node:$rhs, node:$lhs), !head(F.Fragments), F.PredicateCode, F.OperandTransform>; def: OpR_RR_pat<C2_cmpeq, seteq, i1, I32>; @@ -563,7 +582,7 @@ def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, v2i1, V2I32>; def: OpR_RR_pat<A2_vcmpwgtu, setugt, i1, V2I32>; def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>; -let Predicates = [HasV5T] in { +let Predicates = [HasV5] in { def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>; def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>; def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>; @@ -598,27 +617,40 @@ def: Pat<(i1 (setle I32:$Rs, anyimm:$u5)), def: Pat<(i1 (setule I32:$Rs, anyimm:$u5)), (C2_not (C2_cmpgtui I32:$Rs, imm:$u5))>; -def: Pat<(i1 (setne I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpeq I32:$Rs, I32:$Rt))>; -def: Pat<(i1 (setle I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpgt I32:$Rs, I32:$Rt))>; -def: Pat<(i1 (setule I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpgtu I32:$Rs, I32:$Rt))>; -def: Pat<(i1 (setge I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpgt I32:$Rt, I32:$Rs))>; -def: Pat<(i1 (setuge I32:$Rs, I32:$Rt)), - (C2_not (C2_cmpgtu I32:$Rt, I32:$Rs))>; - -def: Pat<(i1 (setle I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpgtp I64:$Rs, I64:$Rt))>; -def: Pat<(i1 (setne I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpeqp I64:$Rs, I64:$Rt))>; -def: Pat<(i1 (setge I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpgtp I64:$Rt, I64:$Rs))>; -def: Pat<(i1 (setuge I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpgtup I64:$Rt, I64:$Rs))>; -def: Pat<(i1 (setule I64:$Rs, I64:$Rt)), - (C2_not (C2_cmpgtup I64:$Rs, I64:$Rt))>; +class OpmR_RR_pat<PatFrag Output, PatFrag Op, ValueType ResType, + PatFrag RsPred, PatFrag RtPred = RsPred> + : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), + (Output RsPred:$Rs, RtPred:$Rt)>; + +class Outn<InstHexagon MI> + : OutPatFrag<(ops node:$Rs, node:$Rt), + (C2_not (MI $Rs, $Rt))>; + +def: OpmR_RR_pat<Outn<C2_cmpeq>, setne, i1, I32>; +def: OpmR_RR_pat<Outn<C2_cmpgt>, setle, i1, I32>; +def: OpmR_RR_pat<Outn<C2_cmpgtu>, setule, i1, I32>; +def: OpmR_RR_pat<Outn<C2_cmpgt>, RevCmp<setge>, i1, I32>; +def: OpmR_RR_pat<Outn<C2_cmpgtu>, RevCmp<setuge>, i1, I32>; +def: OpmR_RR_pat<Outn<C2_cmpeqp>, setne, i1, I64>; +def: OpmR_RR_pat<Outn<C2_cmpgtp>, setle, i1, I64>; +def: OpmR_RR_pat<Outn<C2_cmpgtup>, setule, i1, I64>; +def: OpmR_RR_pat<Outn<C2_cmpgtp>, RevCmp<setge>, i1, I64>; +def: OpmR_RR_pat<Outn<C2_cmpgtup>, RevCmp<setuge>, i1, I64>; +def: OpmR_RR_pat<Outn<A2_vcmpbeq>, setne, v8i1, V8I8>; +def: OpmR_RR_pat<Outn<A4_vcmpbgt>, setle, v8i1, V8I8>; +def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, setule, v8i1, V8I8>; +def: OpmR_RR_pat<Outn<A4_vcmpbgt>, RevCmp<setge>, v8i1, V8I8>; +def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, RevCmp<setuge>, v8i1, V8I8>; +def: OpmR_RR_pat<Outn<A2_vcmpheq>, setne, v4i1, V4I16>; +def: OpmR_RR_pat<Outn<A2_vcmphgt>, setle, v4i1, V4I16>; +def: OpmR_RR_pat<Outn<A2_vcmphgtu>, setule, v4i1, V4I16>; +def: OpmR_RR_pat<Outn<A2_vcmphgt>, RevCmp<setge>, v4i1, V4I16>; +def: OpmR_RR_pat<Outn<A2_vcmphgtu>, RevCmp<setuge>, v4i1, V4I16>; +def: OpmR_RR_pat<Outn<A2_vcmpweq>, setne, v2i1, V2I32>; +def: OpmR_RR_pat<Outn<A2_vcmpwgt>, setle, v2i1, V2I32>; +def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, setule, v2i1, V2I32>; +def: OpmR_RR_pat<Outn<A2_vcmpwgt>, RevCmp<setge>, v2i1, V2I32>; +def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, RevCmp<setuge>, v2i1, V2I32>; let AddedComplexity = 100 in { def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 255), 0)), @@ -680,25 +712,10 @@ def: Pat<(i32 (zext (i1 (seteq I32:$Rs, anyimm:$s8)))), def: Pat<(i32 (zext (i1 (setne I32:$Rs, anyimm:$s8)))), (A4_rcmpneqi I32:$Rs, imm:$s8)>; -def: Pat<(i1 (setne I1:$Ps, I1:$Pt)), - (C2_xor I1:$Ps, I1:$Pt)>; - -def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), - (A2_vcmpbeq (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), - (A4_vcmpbgt (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), - (A2_vcmpbgtu (ToZext64 $Rs), (ToZext64 $Rt))>; - -def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), - (A2_vcmpheq (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), - (A2_vcmphgt (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), - (A2_vcmphgtu (ToZext64 $Rs), (ToZext64 $Rt))>; - -def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), - (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; +def: Pat<(i1 (seteq I1:$Ps, (i1 -1))), (I1:$Ps)>; +def: Pat<(i1 (setne I1:$Ps, (i1 -1))), (C2_not I1:$Ps)>; +def: Pat<(i1 (seteq I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, (C2_not I1:$Pt))>; +def: Pat<(i1 (setne I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>; // Floating-point comparisons with checks for ordered/unordered status. @@ -706,18 +723,13 @@ class T3<InstHexagon MI1, InstHexagon MI2, InstHexagon MI3> : OutPatFrag<(ops node:$Rs, node:$Rt), (MI1 (MI2 $Rs, $Rt), (MI3 $Rs, $Rt))>; -class OpmR_RR_pat<PatFrag Output, PatFrag Op, ValueType ResType, - PatFrag RsPred, PatFrag RtPred = RsPred> - : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), - (Output RsPred:$Rs, RtPred:$Rt)>; - class Cmpuf<InstHexagon MI>: T3<C2_or, F2_sfcmpuo, MI>; class Cmpud<InstHexagon MI>: T3<C2_or, F2_dfcmpuo, MI>; class Cmpufn<InstHexagon MI>: T3<C2_orn, F2_sfcmpuo, MI>; class Cmpudn<InstHexagon MI>: T3<C2_orn, F2_dfcmpuo, MI>; -let Predicates = [HasV5T] in { +let Predicates = [HasV5] in { def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>, setueq, i1, F32>; def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, setuge, i1, F32>; def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, setugt, i1, F32>; @@ -733,11 +745,7 @@ let Predicates = [HasV5T] in { def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>; } -class Outn<InstHexagon MI> - : OutPatFrag<(ops node:$Rs, node:$Rt), - (C2_not (MI $Rs, $Rt))>; - -let Predicates = [HasV5T] in { +let Predicates = [HasV5] in { def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>; def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>; @@ -776,7 +784,7 @@ def: Pat<(select I1:$Pu, I64:$Rs, I64:$Rt), (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; -let Predicates = [HasV5T] in { +let Predicates = [HasV5] in { def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I), (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>; def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt), @@ -813,20 +821,6 @@ def: Pat<(vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt), def: Pat<(vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt), (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; - -class HvxSel_pat<InstHexagon MI, PatFrag RegPred> - : Pat<(select I1:$Pu, RegPred:$Vs, RegPred:$Vt), - (MI I1:$Pu, RegPred:$Vs, RegPred:$Vt)>; - -let Predicates = [HasV60T,UseHVX] in { - def: HvxSel_pat<PS_vselect, HVI8>; - def: HvxSel_pat<PS_vselect, HVI16>; - def: HvxSel_pat<PS_vselect, HVI32>; - def: HvxSel_pat<PS_wselect, HWI8>; - def: HvxSel_pat<PS_wselect, HWI16>; - def: HvxSel_pat<PS_wselect, HWI32>; -} - // From LegalizeDAG.cpp: (Pu ? Pv : Pw) <=> (Pu & Pv) | (!Pu & Pw). def: Pat<(select I1:$Pu, I1:$Pv, I1:$Pw), (C2_or (C2_and I1:$Pu, I1:$Pv), @@ -878,7 +872,7 @@ let AddedComplexity = 200 in { defm: SelMinMax_pats<setult, I64, A2_minup, A2_maxup>; } -let AddedComplexity = 100, Predicates = [HasV5T] in { +let AddedComplexity = 100, Predicates = [HasV5] in { defm: SelMinMax_pats<setolt, F32, F2_sfmin, F2_sfmax>; defm: SelMinMax_pats<setole, F32, F2_sfmin, F2_sfmax>; defm: SelMinMax_pats<setogt, F32, F2_sfmax, F2_sfmin>; @@ -892,40 +886,34 @@ let AddedComplexity = 100, Predicates = [HasV5T] in { def SDTHexagonINSERT: SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>; -def SDTHexagonINSERTRP: - SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisInt<0>, SDTCisVT<3, i64>]>; - def HexagonINSERT: SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>; -def HexagonINSERTRP: SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>; -def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2), - (S2_insert I32:$Rs, I32:$Rt, imm:$u1, imm:$u2)>; -def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2), - (S2_insertp I64:$Rs, I64:$Rt, imm:$u1, imm:$u2)>; -def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru), - (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>; -def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru), - (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>; +let AddedComplexity = 10 in { + def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2), + (S2_insert I32:$Rs, I32:$Rt, imm:$u1, imm:$u2)>; + def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2), + (S2_insertp I64:$Rs, I64:$Rt, imm:$u1, imm:$u2)>; +} +def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, I32:$Width, I32:$Off), + (S2_insert_rp I32:$Rs, I32:$Rt, (Combinew $Width, $Off))>; +def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, I32:$Width, I32:$Off), + (S2_insertp_rp I64:$Rs, I64:$Rt, (Combinew $Width, $Off))>; def SDTHexagonEXTRACTU : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; -def SDTHexagonEXTRACTURP - : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, - SDTCisVT<2, i64>]>; - def HexagonEXTRACTU: SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>; -def HexagonEXTRACTURP: SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>; -def: Pat<(HexagonEXTRACTU I32:$Rs, u5_0ImmPred:$u5, u5_0ImmPred:$U5), - (S2_extractu I32:$Rs, imm:$u5, imm:$U5)>; -def: Pat<(HexagonEXTRACTU I64:$Rs, u6_0ImmPred:$u6, u6_0ImmPred:$U6), - (S2_extractup I64:$Rs, imm:$u6, imm:$U6)>; -def: Pat<(HexagonEXTRACTURP I32:$Rs, I64:$Rt), - (S2_extractu_rp I32:$Rs, I64:$Rt)>; -def: Pat<(HexagonEXTRACTURP I64:$Rs, I64:$Rt), - (S2_extractup_rp I64:$Rs, I64:$Rt)>; +let AddedComplexity = 10 in { + def: Pat<(HexagonEXTRACTU I32:$Rs, u5_0ImmPred:$u5, u5_0ImmPred:$U5), + (S2_extractu I32:$Rs, imm:$u5, imm:$U5)>; + def: Pat<(HexagonEXTRACTU I64:$Rs, u6_0ImmPred:$u6, u6_0ImmPred:$U6), + (S2_extractup I64:$Rs, imm:$u6, imm:$U6)>; +} +def: Pat<(HexagonEXTRACTU I32:$Rs, I32:$Width, I32:$Off), + (S2_extractu_rp I32:$Rs, (Combinew $Width, $Off))>; +def: Pat<(HexagonEXTRACTU I64:$Rs, I32:$Width, I32:$Off), + (S2_extractup_rp I64:$Rs, (Combinew $Width, $Off))>; def SDTHexagonVSPLAT: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; @@ -938,20 +926,20 @@ def: Pat<(v2i32 (HexagonVSPLAT s8_0ImmPred:$s8)), (A2_combineii imm:$s8, imm:$s8)>; def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (Combinew I32:$Rs, I32:$Rs)>; +let AddedComplexity = 10 in +def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), (S6_vsplatrbp I32:$Rs)>, + Requires<[HasV62]>; +def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), + (Combinew (S2_vsplatrb I32:$Rs), (S2_vsplatrb I32:$Rs))>; + // --(8) Shift/permute --------------------------------------------------- // def SDTHexagonI64I32I32: SDTypeProfile<1, 2, [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; -def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, - SDTCisSubVecOfVec<1, 0>]>; -def SDTHexagonVPACK: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, SDTCisVec<1>]>; def HexagonCOMBINE: SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>; -def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>; -def HexagonVPACKE: SDNode<"HexagonISD::VPACKE", SDTHexagonVPACK>; -def HexagonVPACKO: SDNode<"HexagonISD::VPACKO", SDTHexagonVPACK>; def: Pat<(HexagonCOMBINE I32:$Rs, I32:$Rt), (Combinew $Rs, $Rt)>; @@ -1001,11 +989,15 @@ def: OpR_RR_pat<S2_asr_r_p, Sra, i64, I64, I32>; def: OpR_RR_pat<S2_lsr_r_p, Srl, i64, I64, I32>; def: OpR_RR_pat<S2_asl_r_p, Shl, i64, I64, I32>; +let Predicates = [HasV60] in { + def: OpR_RI_pat<S6_rol_i_r, Rol, i32, I32, u5_0ImmPred>; + def: OpR_RI_pat<S6_rol_i_p, Rol, i64, I64, u6_0ImmPred>; +} def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)), (S2_asr_i_r_rnd I32:$Rs, imm:$u5)>; def: Pat<(sra (add (sra I64:$Rs, u6_0ImmPred:$u6), 1), (i32 1)), - (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>, Requires<[HasV5T]>; + (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>, Requires<[HasV5]>; // Prefer S2_addasl_rrri over S2_asl_i_r_acc. let AddedComplexity = 120 in @@ -1046,41 +1038,55 @@ let AddedComplexity = 100 in { def: AccRRI_pat<S2_asl_i_p_and, And, Su<Shl>, I64, u6_0ImmPred>; def: AccRRI_pat<S2_asl_i_p_or, Or, Su<Shl>, I64, u6_0ImmPred>; def: AccRRI_pat<S2_asl_i_p_xacc, Xor, Su<Shl>, I64, u6_0ImmPred>; + + let Predicates = [HasV60] in { + def: AccRRI_pat<S6_rol_i_r_acc, Add, Su<Rol>, I32, u5_0ImmPred>; + def: AccRRI_pat<S6_rol_i_r_nac, Sub, Su<Rol>, I32, u5_0ImmPred>; + def: AccRRI_pat<S6_rol_i_r_and, And, Su<Rol>, I32, u5_0ImmPred>; + def: AccRRI_pat<S6_rol_i_r_or, Or, Su<Rol>, I32, u5_0ImmPred>; + def: AccRRI_pat<S6_rol_i_r_xacc, Xor, Su<Rol>, I32, u5_0ImmPred>; + + def: AccRRI_pat<S6_rol_i_p_acc, Add, Su<Rol>, I64, u6_0ImmPred>; + def: AccRRI_pat<S6_rol_i_p_nac, Sub, Su<Rol>, I64, u6_0ImmPred>; + def: AccRRI_pat<S6_rol_i_p_and, And, Su<Rol>, I64, u6_0ImmPred>; + def: AccRRI_pat<S6_rol_i_p_or, Or, Su<Rol>, I64, u6_0ImmPred>; + def: AccRRI_pat<S6_rol_i_p_xacc, Xor, Su<Rol>, I64, u6_0ImmPred>; + } } let AddedComplexity = 100 in { - def: AccRRR_pat<S2_asr_r_r_acc, Add, Su<Sra>, I32, I32>; - def: AccRRR_pat<S2_asr_r_r_nac, Sub, Su<Sra>, I32, I32>; - def: AccRRR_pat<S2_asr_r_r_and, And, Su<Sra>, I32, I32>; - def: AccRRR_pat<S2_asr_r_r_or, Or, Su<Sra>, I32, I32>; + def: AccRRR_pat<S2_asr_r_r_acc, Add, Su<Sra>, I32, I32, I32>; + def: AccRRR_pat<S2_asr_r_r_nac, Sub, Su<Sra>, I32, I32, I32>; + def: AccRRR_pat<S2_asr_r_r_and, And, Su<Sra>, I32, I32, I32>; + def: AccRRR_pat<S2_asr_r_r_or, Or, Su<Sra>, I32, I32, I32>; - def: AccRRR_pat<S2_asr_r_p_acc, Add, Su<Sra>, I64, I32>; - def: AccRRR_pat<S2_asr_r_p_nac, Sub, Su<Sra>, I64, I32>; - def: AccRRR_pat<S2_asr_r_p_and, And, Su<Sra>, I64, I32>; - def: AccRRR_pat<S2_asr_r_p_or, Or, Su<Sra>, I64, I32>; - def: AccRRR_pat<S2_asr_r_p_xor, Xor, Su<Sra>, I64, I32>; + def: AccRRR_pat<S2_asr_r_p_acc, Add, Su<Sra>, I64, I64, I32>; + def: AccRRR_pat<S2_asr_r_p_nac, Sub, Su<Sra>, I64, I64, I32>; + def: AccRRR_pat<S2_asr_r_p_and, And, Su<Sra>, I64, I64, I32>; + def: AccRRR_pat<S2_asr_r_p_or, Or, Su<Sra>, I64, I64, I32>; + def: AccRRR_pat<S2_asr_r_p_xor, Xor, Su<Sra>, I64, I64, I32>; - def: AccRRR_pat<S2_lsr_r_r_acc, Add, Su<Srl>, I32, I32>; - def: AccRRR_pat<S2_lsr_r_r_nac, Sub, Su<Srl>, I32, I32>; - def: AccRRR_pat<S2_lsr_r_r_and, And, Su<Srl>, I32, I32>; - def: AccRRR_pat<S2_lsr_r_r_or, Or, Su<Srl>, I32, I32>; + def: AccRRR_pat<S2_lsr_r_r_acc, Add, Su<Srl>, I32, I32, I32>; + def: AccRRR_pat<S2_lsr_r_r_nac, Sub, Su<Srl>, I32, I32, I32>; + def: AccRRR_pat<S2_lsr_r_r_and, And, Su<Srl>, I32, I32, I32>; + def: AccRRR_pat<S2_lsr_r_r_or, Or, Su<Srl>, I32, I32, I32>; - def: AccRRR_pat<S2_lsr_r_p_acc, Add, Su<Srl>, I64, I32>; - def: AccRRR_pat<S2_lsr_r_p_nac, Sub, Su<Srl>, I64, I32>; - def: AccRRR_pat<S2_lsr_r_p_and, And, Su<Srl>, I64, I32>; - def: AccRRR_pat<S2_lsr_r_p_or, Or, Su<Srl>, I64, I32>; - def: AccRRR_pat<S2_lsr_r_p_xor, Xor, Su<Srl>, I64, I32>; + def: AccRRR_pat<S2_lsr_r_p_acc, Add, Su<Srl>, I64, I64, I32>; + def: AccRRR_pat<S2_lsr_r_p_nac, Sub, Su<Srl>, I64, I64, I32>; + def: AccRRR_pat<S2_lsr_r_p_and, And, Su<Srl>, I64, I64, I32>; + def: AccRRR_pat<S2_lsr_r_p_or, Or, Su<Srl>, I64, I64, I32>; + def: AccRRR_pat<S2_lsr_r_p_xor, Xor, Su<Srl>, I64, I64, I32>; - def: AccRRR_pat<S2_asl_r_r_acc, Add, Su<Shl>, I32, I32>; - def: AccRRR_pat<S2_asl_r_r_nac, Sub, Su<Shl>, I32, I32>; - def: AccRRR_pat<S2_asl_r_r_and, And, Su<Shl>, I32, I32>; - def: AccRRR_pat<S2_asl_r_r_or, Or, Su<Shl>, I32, I32>; + def: AccRRR_pat<S2_asl_r_r_acc, Add, Su<Shl>, I32, I32, I32>; + def: AccRRR_pat<S2_asl_r_r_nac, Sub, Su<Shl>, I32, I32, I32>; + def: AccRRR_pat<S2_asl_r_r_and, And, Su<Shl>, I32, I32, I32>; + def: AccRRR_pat<S2_asl_r_r_or, Or, Su<Shl>, I32, I32, I32>; - def: AccRRR_pat<S2_asl_r_p_acc, Add, Su<Shl>, I64, I32>; - def: AccRRR_pat<S2_asl_r_p_nac, Sub, Su<Shl>, I64, I32>; - def: AccRRR_pat<S2_asl_r_p_and, And, Su<Shl>, I64, I32>; - def: AccRRR_pat<S2_asl_r_p_or, Or, Su<Shl>, I64, I32>; - def: AccRRR_pat<S2_asl_r_p_xor, Xor, Su<Shl>, I64, I32>; + def: AccRRR_pat<S2_asl_r_p_acc, Add, Su<Shl>, I64, I64, I32>; + def: AccRRR_pat<S2_asl_r_p_nac, Sub, Su<Shl>, I64, I64, I32>; + def: AccRRR_pat<S2_asl_r_p_and, And, Su<Shl>, I64, I64, I32>; + def: AccRRR_pat<S2_asl_r_p_or, Or, Su<Shl>, I64, I64, I32>; + def: AccRRR_pat<S2_asl_r_p_xor, Xor, Su<Shl>, I64, I64, I32>; } @@ -1170,11 +1176,13 @@ def: Pat<(shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))), // --(9) Arithmetic/bitwise ---------------------------------------------- // -def: Pat<(abs I32:$Rs), (A2_abs I32:$Rs)>; -def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>; -def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>; +def: Pat<(abs I32:$Rs), (A2_abs I32:$Rs)>; +def: Pat<(abs I64:$Rs), (A2_absp I64:$Rs)>; +def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>; +def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>; +def: Pat<(ineg I64:$Rs), (A2_negp I64:$Rs)>; -let Predicates = [HasV5T] in { +let Predicates = [HasV5] in { def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>; def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>; @@ -1186,13 +1194,6 @@ let Predicates = [HasV5T] in { (i32 (LoReg $Rs)))>; } -let AddedComplexity = 50 in -def: Pat<(xor (add (sra I32:$Rs, (i32 31)), - I32:$Rs), - (sra I32:$Rs, (i32 31))), - (A2_abs I32:$Rs)>; - - def: Pat<(add I32:$Rs, anyimm:$s16), (A2_addi I32:$Rs, imm:$s16)>; def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>; def: Pat<(and I32:$Rs, anyimm:$s10), (A2_andir I32:$Rs, imm:$s10)>; @@ -1221,18 +1222,20 @@ def: OpR_RR_pat<A2_vsubub, Sub, v8i8, V8I8>; def: OpR_RR_pat<A2_vsubh, Sub, v4i16, V4I16>; def: OpR_RR_pat<A2_vsubw, Sub, v2i32, V2I32>; +def: OpR_RR_pat<A2_and, And, v4i8, V4I8>; +def: OpR_RR_pat<A2_xor, Xor, v4i8, V4I8>; +def: OpR_RR_pat<A2_or, Or, v4i8, V4I8>; def: OpR_RR_pat<A2_and, And, v2i16, V2I16>; def: OpR_RR_pat<A2_xor, Xor, v2i16, V2I16>; def: OpR_RR_pat<A2_or, Or, v2i16, V2I16>; - def: OpR_RR_pat<A2_andp, And, v8i8, V8I8>; -def: OpR_RR_pat<A2_andp, And, v4i16, V4I16>; -def: OpR_RR_pat<A2_andp, And, v2i32, V2I32>; def: OpR_RR_pat<A2_orp, Or, v8i8, V8I8>; -def: OpR_RR_pat<A2_orp, Or, v4i16, V4I16>; -def: OpR_RR_pat<A2_orp, Or, v2i32, V2I32>; def: OpR_RR_pat<A2_xorp, Xor, v8i8, V8I8>; +def: OpR_RR_pat<A2_andp, And, v4i16, V4I16>; +def: OpR_RR_pat<A2_orp, Or, v4i16, V4I16>; def: OpR_RR_pat<A2_xorp, Xor, v4i16, V4I16>; +def: OpR_RR_pat<A2_andp, And, v2i32, V2I32>; +def: OpR_RR_pat<A2_orp, Or, v2i32, V2I32>; def: OpR_RR_pat<A2_xorp, Xor, v2i32, V2I32>; def: OpR_RR_pat<M2_mpyi, Mul, i32, I32>; @@ -1255,7 +1258,7 @@ def: OpR_RR_pat<C2_and, Mul, v2i1, V2I1>; def: OpR_RR_pat<C2_and, Mul, v4i1, V4I1>; def: OpR_RR_pat<C2_and, Mul, v8i1, V8I1>; -let Predicates = [HasV5T] in { +let Predicates = [HasV5] in { def: OpR_RR_pat<F2_sfadd, pf2<fadd>, f32, F32>; def: OpR_RR_pat<F2_sfsub, pf2<fsub>, f32, F32>; def: OpR_RR_pat<F2_sfmpy, pf2<fmul>, f32, F32>; @@ -1268,12 +1271,62 @@ let Predicates = [HasV5T] in { let AddedComplexity = 10 in { def: AccRRI_pat<M2_macsip, Add, Su<Mul>, I32, u32_0ImmPred>; def: AccRRI_pat<M2_macsin, Sub, Su<Mul>, I32, u32_0ImmPred>; - def: AccRRR_pat<M2_maci, Add, Su<Mul>, I32, I32>; + def: AccRRR_pat<M2_maci, Add, Su<Mul>, I32, I32, I32>; } def: AccRRI_pat<M2_naccii, Sub, Su<Add>, I32, s32_0ImmPred>; def: AccRRI_pat<M2_accii, Add, Su<Add>, I32, s32_0ImmPred>; -def: AccRRR_pat<M2_acci, Add, Su<Add>, I32, I32>; +def: AccRRR_pat<M2_acci, Add, Su<Add>, I32, I32, I32>; + +// Mulh for vectors +// +def: Pat<(v2i32 (mulhu V2I32:$Rss, V2I32:$Rtt)), + (Combinew (M2_mpyu_up (HiReg $Rss), (HiReg $Rtt)), + (M2_mpyu_up (LoReg $Rss), (LoReg $Rtt)))>; + +def: Pat<(v2i32 (mulhs V2I32:$Rs, V2I32:$Rt)), + (Combinew (M2_mpy_up (HiReg $Rs), (HiReg $Rt)), + (M2_mpy_up (LoReg $Rt), (LoReg $Rt)))>; + +def Mulhub: + OutPatFrag<(ops node:$Rss, node:$Rtt), + (Combinew (S2_vtrunohb (M5_vmpybuu (HiReg $Rss), (HiReg $Rtt))), + (S2_vtrunohb (M5_vmpybuu (LoReg $Rss), (LoReg $Rtt))))>; + +// Equivalent of byte-wise arithmetic shift right by 7 in v8i8. +def Asr7: + OutPatFrag<(ops node:$Rss), (C2_mask (C2_not (A4_vcmpbgti $Rss, 0)))>; + +def: Pat<(v8i8 (mulhu V8I8:$Rss, V8I8:$Rtt)), + (Mulhub $Rss, $Rtt)>; + +def: Pat<(v8i8 (mulhs V8I8:$Rss, V8I8:$Rtt)), + (A2_vsubub + (Mulhub $Rss, $Rtt), + (A2_vaddub (A2_andp V8I8:$Rss, (Asr7 $Rtt)), + (A2_andp V8I8:$Rtt, (Asr7 $Rss))))>; + +def Mpysh: + OutPatFrag<(ops node:$Rs, node:$Rt), (M2_vmpy2s_s0 $Rs, $Rt)>; +def Mpyshh: + OutPatFrag<(ops node:$Rss, node:$Rtt), (Mpysh (HiReg $Rss), (HiReg $Rtt))>; +def Mpyshl: + OutPatFrag<(ops node:$Rss, node:$Rtt), (Mpysh (LoReg $Rss), (LoReg $Rtt))>; + +def Mulhsh: + OutPatFrag<(ops node:$Rss, node:$Rtt), + (Combinew (A2_combine_hh (HiReg (Mpyshh $Rss, $Rtt)), + (LoReg (Mpyshh $Rss, $Rtt))), + (A2_combine_hh (HiReg (Mpyshl $Rss, $Rtt)), + (LoReg (Mpyshl $Rss, $Rtt))))>; + +def: Pat<(v4i16 (mulhs V4I16:$Rss, V4I16:$Rtt)), (Mulhsh $Rss, $Rtt)>; + +def: Pat<(v4i16 (mulhu V4I16:$Rss, V4I16:$Rtt)), + (A2_vaddh + (Mulhsh $Rss, $Rtt), + (A2_vaddh (A2_andp V4I16:$Rss, (S2_asr_i_vh $Rtt, 15)), + (A2_andp V4I16:$Rtt, (S2_asr_i_vh $Rss, 15))))>; def: Pat<(ineg (mul I32:$Rs, u8_0ImmPred:$u8)), @@ -1291,24 +1344,24 @@ def: Pat<(mul I32:$Rs, n8_0ImmPred:$n8), def: Pat<(add Sext64:$Rs, I64:$Rt), (A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>; -def: AccRRR_pat<M4_and_and, And, Su<And>, I32, I32>; -def: AccRRR_pat<M4_and_or, And, Su<Or>, I32, I32>; -def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32>; -def: AccRRR_pat<M4_or_and, Or, Su<And>, I32, I32>; -def: AccRRR_pat<M4_or_or, Or, Su<Or>, I32, I32>; -def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32>; -def: AccRRR_pat<M4_xor_and, Xor, Su<And>, I32, I32>; -def: AccRRR_pat<M4_xor_or, Xor, Su<Or>, I32, I32>; -def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32>; -def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64>; +def: AccRRR_pat<M4_and_and, And, Su<And>, I32, I32, I32>; +def: AccRRR_pat<M4_and_or, And, Su<Or>, I32, I32, I32>; +def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32, I32>; +def: AccRRR_pat<M4_or_and, Or, Su<And>, I32, I32, I32>; +def: AccRRR_pat<M4_or_or, Or, Su<Or>, I32, I32, I32>; +def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32, I32>; +def: AccRRR_pat<M4_xor_and, Xor, Su<And>, I32, I32, I32>; +def: AccRRR_pat<M4_xor_or, Xor, Su<Or>, I32, I32, I32>; +def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32, I32>; +def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64, I64>; // For dags like (or (and (not _), _), (shl _, _)) where the "or" with // one argument matches the patterns below, and with the other argument // matches S2_asl_r_r_or, etc, prefer the patterns below. let AddedComplexity = 110 in { // greater than S2_asl_r_r_and/or/xor. - def: AccRRR_pat<M4_and_andn, And, Su<Not2<And>>, I32, I32>; - def: AccRRR_pat<M4_or_andn, Or, Su<Not2<And>>, I32, I32>; - def: AccRRR_pat<M4_xor_andn, Xor, Su<Not2<And>>, I32, I32>; + def: AccRRR_pat<M4_and_andn, And, Su<Not2<And>>, I32, I32, I32>; + def: AccRRR_pat<M4_or_andn, Or, Su<Not2<And>>, I32, I32, I32>; + def: AccRRR_pat<M4_xor_andn, Xor, Su<Not2<And>>, I32, I32, I32>; } // S4_addaddi and S4_subaddi don't have tied operands, so give them @@ -1444,7 +1497,7 @@ def: Pat<(add I32:$Ru, (Su<Mul> I32:$Ry, I32:$Rs)), (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>; -let Predicates = [HasV5T] in { +let Predicates = [HasV5] in { def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx), (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>; def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx), @@ -1479,13 +1532,13 @@ def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)), // Multiplies two v4i8 vectors. def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>, - Requires<[HasV5T]>; + Requires<[HasV5]>; // Multiplies two v8i8 vectors. def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), (Combinew (S2_vtrunehb (M5_vmpybuu (HiReg $Rs), (HiReg $Rt))), (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>, - Requires<[HasV5T]>; + Requires<[HasV5]>; // --(10) Bit ------------------------------------------------------------ @@ -1519,7 +1572,6 @@ def: Pat<(i32 (ctpop I32:$Rs)), (S5_popcountp (A4_combineir 0, I32:$Rs))>; def: Pat<(bitreverse I32:$Rs), (S2_brev I32:$Rs)>; def: Pat<(bitreverse I64:$Rss), (S2_brevp I64:$Rss)>; - let AddedComplexity = 20 in { // Complexity greater than and/or/xor def: Pat<(and I32:$Rs, IsNPow2_32:$V), (S2_clrbit_i IntRegs:$Rs, (LogN2_32 $V))>; @@ -1582,6 +1634,15 @@ let AddedComplexity = 10 in // Complexity greater than compare reg-reg. def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), IntRegs:$Rt)), (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>; +def SDTTestBit: + SDTypeProfile<1, 2, [SDTCisVT<0, i1>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; +def HexagonTSTBIT: SDNode<"HexagonISD::TSTBIT", SDTTestBit>; + +def: Pat<(HexagonTSTBIT I32:$Rs, u5_0ImmPred:$u5), + (S2_tstbit_i I32:$Rs, imm:$u5)>; +def: Pat<(HexagonTSTBIT I32:$Rs, I32:$Rt), + (S2_tstbit_r I32:$Rs, I32:$Rt)>; + let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)), (S4_ntstbit_i I32:$Rs, imm:$u5)>; @@ -1790,7 +1851,12 @@ let AddedComplexity = 20 in { defm: Loadxi_pat<zextloadv2i8, v2i16, anyimm1, L2_loadbzw2_io>; defm: Loadxi_pat<zextloadv4i8, v4i16, anyimm2, L2_loadbzw4_io>; defm: Loadxi_pat<load, i32, anyimm2, L2_loadri_io>; + defm: Loadxi_pat<load, v2i16, anyimm2, L2_loadri_io>; + defm: Loadxi_pat<load, v4i8, anyimm2, L2_loadri_io>; defm: Loadxi_pat<load, i64, anyimm3, L2_loadrd_io>; + defm: Loadxi_pat<load, v2i32, anyimm3, L2_loadrd_io>; + defm: Loadxi_pat<load, v4i16, anyimm3, L2_loadrd_io>; + defm: Loadxi_pat<load, v8i8, anyimm3, L2_loadrd_io>; defm: Loadxi_pat<load, f32, anyimm2, L2_loadri_io>; defm: Loadxi_pat<load, f64, anyimm3, L2_loadrd_io>; // No sextloadi1. @@ -1828,10 +1894,15 @@ let AddedComplexity = 60 in { def: Loadxu_pat<zextloadi16, i32, anyimm1, L4_loadruh_ur>; def: Loadxu_pat<zextloadv2i8, v2i16, anyimm1, L4_loadbzw2_ur>; def: Loadxu_pat<zextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>; - def: Loadxu_pat<load, f32, anyimm2, L4_loadri_ur>; - def: Loadxu_pat<load, f64, anyimm3, L4_loadrd_ur>; def: Loadxu_pat<load, i32, anyimm2, L4_loadri_ur>; + def: Loadxu_pat<load, v2i16, anyimm2, L4_loadri_ur>; + def: Loadxu_pat<load, v4i8, anyimm2, L4_loadri_ur>; def: Loadxu_pat<load, i64, anyimm3, L4_loadrd_ur>; + def: Loadxu_pat<load, v2i32, anyimm3, L4_loadrd_ur>; + def: Loadxu_pat<load, v4i16, anyimm3, L4_loadrd_ur>; + def: Loadxu_pat<load, v8i8, anyimm3, L4_loadrd_ur>; + def: Loadxu_pat<load, f32, anyimm2, L4_loadri_ur>; + def: Loadxu_pat<load, f64, anyimm3, L4_loadrd_ur>; def: Loadxum_pat<sextloadi8, i64, anyimm0, ToSext64, L4_loadrb_ur>; def: Loadxum_pat<zextloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>; @@ -1845,29 +1916,39 @@ let AddedComplexity = 60 in { } let AddedComplexity = 40 in { - def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>; - def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>; - def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>; - def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>; - def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>; - def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>; - def: Loadxr_shl_pat<load, i32, L4_loadri_rr>; - def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>; - def: Loadxr_shl_pat<load, f32, L4_loadri_rr>; - def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>; + def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>; + def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>; + def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>; + def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>; + def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>; + def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>; + def: Loadxr_shl_pat<load, i32, L4_loadri_rr>; + def: Loadxr_shl_pat<load, v2i16, L4_loadri_rr>; + def: Loadxr_shl_pat<load, v4i8, L4_loadri_rr>; + def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>; + def: Loadxr_shl_pat<load, v2i32, L4_loadrd_rr>; + def: Loadxr_shl_pat<load, v4i16, L4_loadrd_rr>; + def: Loadxr_shl_pat<load, v8i8, L4_loadrd_rr>; + def: Loadxr_shl_pat<load, f32, L4_loadri_rr>; + def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>; } let AddedComplexity = 20 in { - def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>; - def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>; - def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>; - def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>; - def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>; - def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>; - def: Loadxr_add_pat<load, i32, L4_loadri_rr>; - def: Loadxr_add_pat<load, i64, L4_loadrd_rr>; - def: Loadxr_add_pat<load, f32, L4_loadri_rr>; - def: Loadxr_add_pat<load, f64, L4_loadrd_rr>; + def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>; + def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>; + def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>; + def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>; + def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>; + def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>; + def: Loadxr_add_pat<load, i32, L4_loadri_rr>; + def: Loadxr_add_pat<load, v2i16, L4_loadri_rr>; + def: Loadxr_add_pat<load, v4i8, L4_loadri_rr>; + def: Loadxr_add_pat<load, i64, L4_loadrd_rr>; + def: Loadxr_add_pat<load, v2i32, L4_loadrd_rr>; + def: Loadxr_add_pat<load, v4i16, L4_loadrd_rr>; + def: Loadxr_add_pat<load, v8i8, L4_loadrd_rr>; + def: Loadxr_add_pat<load, f32, L4_loadri_rr>; + def: Loadxr_add_pat<load, f64, L4_loadrd_rr>; } let AddedComplexity = 40 in { @@ -1897,17 +1978,22 @@ let AddedComplexity = 20 in { // Absolute address let AddedComplexity = 60 in { - def: Loada_pat<zextloadi1, i32, anyimm0, PS_loadrubabs>; - def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>; - def: Loada_pat<extloadi8, i32, anyimm0, PS_loadrubabs>; - def: Loada_pat<zextloadi8, i32, anyimm0, PS_loadrubabs>; - def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>; - def: Loada_pat<extloadi16, i32, anyimm1, PS_loadruhabs>; - def: Loada_pat<zextloadi16, i32, anyimm1, PS_loadruhabs>; - def: Loada_pat<load, i32, anyimm2, PS_loadriabs>; - def: Loada_pat<load, i64, anyimm3, PS_loadrdabs>; - def: Loada_pat<load, f32, anyimm2, PS_loadriabs>; - def: Loada_pat<load, f64, anyimm3, PS_loadrdabs>; + def: Loada_pat<zextloadi1, i32, anyimm0, PS_loadrubabs>; + def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>; + def: Loada_pat<extloadi8, i32, anyimm0, PS_loadrubabs>; + def: Loada_pat<zextloadi8, i32, anyimm0, PS_loadrubabs>; + def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>; + def: Loada_pat<extloadi16, i32, anyimm1, PS_loadruhabs>; + def: Loada_pat<zextloadi16, i32, anyimm1, PS_loadruhabs>; + def: Loada_pat<load, i32, anyimm2, PS_loadriabs>; + def: Loada_pat<load, v2i16, anyimm2, PS_loadriabs>; + def: Loada_pat<load, v4i8, anyimm2, PS_loadriabs>; + def: Loada_pat<load, i64, anyimm3, PS_loadrdabs>; + def: Loada_pat<load, v2i32, anyimm3, PS_loadrdabs>; + def: Loada_pat<load, v4i16, anyimm3, PS_loadrdabs>; + def: Loada_pat<load, v8i8, anyimm3, PS_loadrdabs>; + def: Loada_pat<load, f32, anyimm2, PS_loadriabs>; + def: Loada_pat<load, f64, anyimm3, PS_loadrdabs>; def: Loada_pat<atomic_load_8, i32, anyimm0, PS_loadrubabs>; def: Loada_pat<atomic_load_16, i32, anyimm1, PS_loadruhabs>; @@ -1933,18 +2019,23 @@ let AddedComplexity = 30 in { // GP-relative address let AddedComplexity = 100 in { - def: Loada_pat<extloadi1, i32, addrgp, L2_loadrubgp>; - def: Loada_pat<zextloadi1, i32, addrgp, L2_loadrubgp>; - def: Loada_pat<extloadi8, i32, addrgp, L2_loadrubgp>; - def: Loada_pat<sextloadi8, i32, addrgp, L2_loadrbgp>; - def: Loada_pat<zextloadi8, i32, addrgp, L2_loadrubgp>; - def: Loada_pat<extloadi16, i32, addrgp, L2_loadruhgp>; - def: Loada_pat<sextloadi16, i32, addrgp, L2_loadrhgp>; - def: Loada_pat<zextloadi16, i32, addrgp, L2_loadruhgp>; - def: Loada_pat<load, i32, addrgp, L2_loadrigp>; - def: Loada_pat<load, i64, addrgp, L2_loadrdgp>; - def: Loada_pat<load, f32, addrgp, L2_loadrigp>; - def: Loada_pat<load, f64, addrgp, L2_loadrdgp>; + def: Loada_pat<extloadi1, i32, addrgp, L2_loadrubgp>; + def: Loada_pat<zextloadi1, i32, addrgp, L2_loadrubgp>; + def: Loada_pat<extloadi8, i32, addrgp, L2_loadrubgp>; + def: Loada_pat<sextloadi8, i32, addrgp, L2_loadrbgp>; + def: Loada_pat<zextloadi8, i32, addrgp, L2_loadrubgp>; + def: Loada_pat<extloadi16, i32, addrgp, L2_loadruhgp>; + def: Loada_pat<sextloadi16, i32, addrgp, L2_loadrhgp>; + def: Loada_pat<zextloadi16, i32, addrgp, L2_loadruhgp>; + def: Loada_pat<load, i32, addrgp, L2_loadrigp>; + def: Loada_pat<load, v2i16, addrgp, L2_loadrigp>; + def: Loada_pat<load, v4i8, addrgp, L2_loadrigp>; + def: Loada_pat<load, i64, addrgp, L2_loadrdgp>; + def: Loada_pat<load, v2i32, addrgp, L2_loadrdgp>; + def: Loada_pat<load, v4i16, addrgp, L2_loadrdgp>; + def: Loada_pat<load, v8i8, addrgp, L2_loadrdgp>; + def: Loada_pat<load, f32, addrgp, L2_loadrigp>; + def: Loada_pat<load, f64, addrgp, L2_loadrdgp>; def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>; def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>; @@ -1983,46 +2074,10 @@ def: Pat<(i1 (load (add I32:$Rs, anyimm0:$Off))), def: Pat<(i1 (load I32:$Rs)), (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>; -// HVX loads - -multiclass HvxLd_pat<InstHexagon MI, PatFrag Load, ValueType VT, - PatFrag ImmPred> { - def: Pat<(VT (Load I32:$Rt)), (MI I32:$Rt, 0)>; - def: Pat<(VT (Load (add I32:$Rt, ImmPred:$s))), (MI I32:$Rt, imm:$s)>; - // The HVX selection code for shuffles can generate vector constants. - // Calling "Select" on the resulting loads from CP fails without these - // patterns. - def: Pat<(VT (Load (HexagonCP tconstpool:$A))), (MI (A2_tfrsi imm:$A), 0)>; - def: Pat<(VT (Load (HexagonAtPcrel tconstpool:$A))), - (MI (C4_addipc imm:$A), 0)>; -} - - -let Predicates = [UseHVX] in { - multiclass HvxLdVs_pat<InstHexagon MI, PatFrag Load> { - defm: HvxLd_pat<MI, Load, VecI8, IsVecOff>; - defm: HvxLd_pat<MI, Load, VecI16, IsVecOff>; - defm: HvxLd_pat<MI, Load, VecI32, IsVecOff>; - } - defm: HvxLdVs_pat<V6_vL32b_nt_ai, alignednontemporalload>; - defm: HvxLdVs_pat<V6_vL32b_ai, alignedload>; - defm: HvxLdVs_pat<V6_vL32Ub_ai, unalignedload>; - - multiclass HvxLdWs_pat<InstHexagon MI, PatFrag Load> { - defm: HvxLd_pat<MI, Load, VecPI8, IsVecOff>; - defm: HvxLd_pat<MI, Load, VecPI16, IsVecOff>; - defm: HvxLd_pat<MI, Load, VecPI32, IsVecOff>; - } - defm: HvxLdWs_pat<PS_vloadrw_nt_ai, alignednontemporalload>; - defm: HvxLdWs_pat<PS_vloadrw_ai, alignedload>; - defm: HvxLdWs_pat<PS_vloadrwu_ai, unalignedload>; -} - // --(13) Store ---------------------------------------------------------- // - class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset, InstHexagon MI> : Pat<(Store Value:$Rt, I32:$Rx, Offset:$s4), (MI I32:$Rx, imm:$s4, Value:$Rt)>; @@ -2135,7 +2190,7 @@ class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod, // swapped. This relies on the knowledge that the F.Fragment uses names // "ptr" and "val". class AtomSt<PatFrag F> - : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode, + : PatFrag<(ops node:$val, node:$ptr), !head(F.Fragments), F.PredicateCode, F.OperandTransform> { let IsAtomic = F.IsAtomic; let MemoryVT = F.MemoryVT; @@ -2459,36 +2514,6 @@ let AddedComplexity = 10 in { def: Storexi_base_pat<AtomSt<atomic_store_64>, I64, S2_storerd_io>; } -// HVX stores - -multiclass HvxSt_pat<InstHexagon MI, PatFrag Store, PatFrag ImmPred, - PatFrag Value> { - def: Pat<(Store Value:$Vs, I32:$Rt), - (MI I32:$Rt, 0, Value:$Vs)>; - def: Pat<(Store Value:$Vs, (add I32:$Rt, ImmPred:$s)), - (MI I32:$Rt, imm:$s, Value:$Vs)>; -} - -let Predicates = [UseHVX] in { - multiclass HvxStVs_pat<InstHexagon MI, PatFrag Store> { - defm: HvxSt_pat<MI, Store, IsVecOff, HVI8>; - defm: HvxSt_pat<MI, Store, IsVecOff, HVI16>; - defm: HvxSt_pat<MI, Store, IsVecOff, HVI32>; - } - defm: HvxStVs_pat<V6_vS32b_nt_ai, alignednontemporalstore>; - defm: HvxStVs_pat<V6_vS32b_ai, alignedstore>; - defm: HvxStVs_pat<V6_vS32Ub_ai, unalignedstore>; - - multiclass HvxStWs_pat<InstHexagon MI, PatFrag Store> { - defm: HvxSt_pat<MI, Store, IsVecOff, HWI8>; - defm: HvxSt_pat<MI, Store, IsVecOff, HWI16>; - defm: HvxSt_pat<MI, Store, IsVecOff, HWI32>; - } - defm: HvxStWs_pat<PS_vstorerw_nt_ai, alignednontemporalstore>; - defm: HvxStWs_pat<PS_vstorerw_ai, alignedstore>; - defm: HvxStWs_pat<PS_vstorerwu_ai, unalignedstore>; -} - // --(14) Memop ---------------------------------------------------------- // @@ -2570,8 +2595,10 @@ multiclass Memopxr_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, SDNode Oper, InstHexagon MI> { - defm: Memopxr_base_pat <Load, Store, Oper, MI>; - defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>; + let Predicates = [UseMEMOPS] in { + defm: Memopxr_base_pat <Load, Store, Oper, MI>; + defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>; + } } let AddedComplexity = 200 in { @@ -2669,8 +2696,10 @@ multiclass Memopxi_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod, InstHexagon MI> { - defm: Memopxi_base_pat <Load, Store, Oper, Arg, ArgMod, MI>; - defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>; + let Predicates = [UseMEMOPS] in { + defm: Memopxi_base_pat <Load, Store, Oper, Arg, ArgMod, MI>; + defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>; + } } let AddedComplexity = 220 in { @@ -2829,6 +2858,8 @@ def: Pat<(brcond (not I1:$Pu), bb:$dst), (J2_jumpf I1:$Pu, bb:$dst)>; def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst), (J2_jumpf I1:$Pu, bb:$dst)>; +def: Pat<(brcond (i1 (seteq I1:$Pu, 0)), bb:$dst), + (J2_jumpf I1:$Pu, bb:$dst)>; def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst), (J2_jumpt I1:$Pu, bb:$dst)>; @@ -2898,97 +2929,17 @@ def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf, def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>; - -def SDTVecLeaf: SDTypeProfile<1, 0, [SDTCisVec<0>]>; - -def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2, - [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>; -def HexagonVEXTRACTW : SDNode<"HexagonISD::VEXTRACTW", SDTHexagonVEXTRACTW>; - -def SDTHexagonVINSERTW0: SDTypeProfile<1, 2, - [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; -def HexagonVINSERTW0 : SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>; - -def Combinev: OutPatFrag<(ops node:$Rs, node:$Rt), - (REG_SEQUENCE HvxWR, $Rs, vsub_hi, $Rt, vsub_lo)>; - -def LoVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_lo)>; -def HiVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_hi)>; - -let Predicates = [UseHVX] in { - def: OpR_RR_pat<V6_vpackeb, pf2<HexagonVPACKE>, VecI8, HVI8>; - def: OpR_RR_pat<V6_vpackob, pf2<HexagonVPACKO>, VecI8, HVI8>; - def: OpR_RR_pat<V6_vpackeh, pf2<HexagonVPACKE>, VecI16, HVI16>; - def: OpR_RR_pat<V6_vpackoh, pf2<HexagonVPACKO>, VecI16, HVI16>; -} - -def HexagonVZERO: SDNode<"HexagonISD::VZERO", SDTVecLeaf>; -def vzero: PatFrag<(ops), (HexagonVZERO)>; - -let Predicates = [UseHVX] in { - def: Pat<(VecI8 vzero), (V6_vd0)>; - def: Pat<(VecI16 vzero), (V6_vd0)>; - def: Pat<(VecI32 vzero), (V6_vd0)>; - - def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)), - (Combinev HvxVR:$Vt, HvxVR:$Vs)>; - def: Pat<(VecPI16 (concat_vectors HVI16:$Vs, HVI16:$Vt)), - (Combinev HvxVR:$Vt, HvxVR:$Vs)>; - def: Pat<(VecPI32 (concat_vectors HVI32:$Vs, HVI32:$Vt)), - (Combinev HvxVR:$Vt, HvxVR:$Vs)>; - - def: Pat<(HexagonVEXTRACTW HVI8:$Vu, I32:$Rs), - (V6_extractw HvxVR:$Vu, I32:$Rs)>; - def: Pat<(HexagonVEXTRACTW HVI16:$Vu, I32:$Rs), - (V6_extractw HvxVR:$Vu, I32:$Rs)>; - def: Pat<(HexagonVEXTRACTW HVI32:$Vu, I32:$Rs), - (V6_extractw HvxVR:$Vu, I32:$Rs)>; - - def: Pat<(HexagonVINSERTW0 HVI8:$Vu, I32:$Rt), - (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; - def: Pat<(HexagonVINSERTW0 HVI16:$Vu, I32:$Rt), - (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; - def: Pat<(HexagonVINSERTW0 HVI32:$Vu, I32:$Rt), - (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; - - def: Pat<(add HVI8:$Vs, HVI8:$Vt), (V6_vaddb HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(add HVI16:$Vs, HVI16:$Vt), (V6_vaddh HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(add HVI32:$Vs, HVI32:$Vt), (V6_vaddw HvxVR:$Vs, HvxVR:$Vt)>; - - def: Pat<(sub HVI8:$Vs, HVI8:$Vt), (V6_vsubb HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(sub HVI16:$Vs, HVI16:$Vt), (V6_vsubh HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(sub HVI32:$Vs, HVI32:$Vt), (V6_vsubw HvxVR:$Vs, HvxVR:$Vt)>; - - def: Pat<(and HVI8:$Vs, HVI8:$Vt), (V6_vand HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(or HVI8:$Vs, HVI8:$Vt), (V6_vor HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(xor HVI8:$Vs, HVI8:$Vt), (V6_vxor HvxVR:$Vs, HvxVR:$Vt)>; - - def: Pat<(vselect HQ8:$Qu, HVI8:$Vs, HVI8:$Vt), - (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(vselect HQ16:$Qu, HVI16:$Vs, HVI16:$Vt), - (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(vselect HQ32:$Qu, HVI32:$Vs, HVI32:$Vt), - (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; - - def: Pat<(VecPI16 (sext HVI8:$Vs)), (V6_vsb HvxVR:$Vs)>; - def: Pat<(VecPI32 (sext HVI16:$Vs)), (V6_vsh HvxVR:$Vs)>; - def: Pat<(VecPI16 (zext HVI8:$Vs)), (V6_vzb HvxVR:$Vs)>; - def: Pat<(VecPI32 (zext HVI16:$Vs)), (V6_vzh HvxVR:$Vs)>; - - def: Pat<(sext_inreg HVI32:$Vs, v16i16), - (V6_vpackeb (LoVec (V6_vsh HvxVR:$Vs)), - (HiVec (V6_vsh HvxVR:$Vs)))>; - def: Pat<(sext_inreg HVI32:$Vs, v32i16), - (V6_vpackeb (LoVec (V6_vsh HvxVR:$Vs)), - (HiVec (V6_vsh HvxVR:$Vs)))>; - - def: Pat<(VecI16 (sext_invec HVI8:$Vs)), (LoVec (V6_vsb HvxVR:$Vs))>; - def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (V6_vsh HvxVR:$Vs))>; - def: Pat<(VecI32 (sext_invec HVI8:$Vs)), - (LoVec (V6_vsh (LoVec (V6_vsb HvxVR:$Vs))))>; - - def: Pat<(VecI16 (zext_invec HVI8:$Vs)), (LoVec (V6_vzb HvxVR:$Vs))>; - def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (V6_vzh HvxVR:$Vs))>; - def: Pat<(VecI32 (zext_invec HVI8:$Vs)), - (LoVec (V6_vzh (LoVec (V6_vzb HvxVR:$Vs))))>; +// The declared return value of the store-locked intrinsics is i32, but +// the instructions actually define i1. To avoid register copies from +// IntRegs to PredRegs and back, fold the entire pattern checking the +// result against true/false. +let AddedComplexity = 100 in { + def: Pat<(i1 (setne (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)), + (S2_storew_locked I32:$Rs, I32:$Rt)>; + def: Pat<(i1 (seteq (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)), + (C2_not (S2_storew_locked I32:$Rs, I32:$Rt))>; + def: Pat<(i1 (setne (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)), + (S4_stored_locked I32:$Rs, I64:$Rt)>; + def: Pat<(i1 (seteq (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)), + (C2_not (S4_stored_locked I32:$Rs, I64:$Rt))>; } diff --git a/lib/Target/Hexagon/HexagonPatternsHVX.td b/lib/Target/Hexagon/HexagonPatternsHVX.td new file mode 100644 index 000000000000..a4cfca9ac7d7 --- /dev/null +++ b/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -0,0 +1,497 @@ +def SDTVecLeaf: + SDTypeProfile<1, 0, [SDTCisVec<0>]>; +def SDTVecBinOp: + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>; + +def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2, + [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>; +def HexagonVEXTRACTW : SDNode<"HexagonISD::VEXTRACTW", SDTHexagonVEXTRACTW>; + +def SDTHexagonVINSERTW0: SDTypeProfile<1, 2, + [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; +def HexagonVINSERTW0: SDNode<"HexagonISD::VINSERTW0", SDTHexagonVINSERTW0>; + +def SDTHexagonVSPLATW: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; +def HexagonVSPLATW: SDNode<"HexagonISD::VSPLATW", SDTHexagonVSPLATW>; + +def HwLen2: SDNodeXForm<imm, [{ + const auto &ST = static_cast<const HexagonSubtarget&>(CurDAG->getSubtarget()); + return CurDAG->getTargetConstant(ST.getVectorLength()/2, SDLoc(N), MVT::i32); +}]>; + +def Q2V: OutPatFrag<(ops node:$Qs), (V6_vandqrt $Qs, (A2_tfrsi -1))>; + +def Combinev: OutPatFrag<(ops node:$Vs, node:$Vt), + (REG_SEQUENCE HvxWR, $Vs, vsub_hi, $Vt, vsub_lo)>; + +def Combineq: OutPatFrag<(ops node:$Qs, node:$Qt), + (V6_vandvrt + (V6_vor + (V6_vror (V6_vpackeb (V6_vd0), (Q2V $Qs)), + (A2_tfrsi (HwLen2 (i32 0)))), // Half the vector length + (V6_vpackeb (V6_vd0), (Q2V $Qt))), + (A2_tfrsi -1))>; + +def LoVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_lo)>; +def HiVec: OutPatFrag<(ops node:$Vs), (EXTRACT_SUBREG $Vs, vsub_hi)>; + +def HexagonVZERO: SDNode<"HexagonISD::VZERO", SDTVecLeaf>; +def HexagonQCAT: SDNode<"HexagonISD::QCAT", SDTVecBinOp>; +def HexagonQTRUE: SDNode<"HexagonISD::QTRUE", SDTVecLeaf>; +def HexagonQFALSE: SDNode<"HexagonISD::QFALSE", SDTVecLeaf>; + +def vzero: PatFrag<(ops), (HexagonVZERO)>; +def qtrue: PatFrag<(ops), (HexagonQTRUE)>; +def qfalse: PatFrag<(ops), (HexagonQFALSE)>; +def qcat: PatFrag<(ops node:$Qs, node:$Qt), + (HexagonQCAT node:$Qs, node:$Qt)>; + +def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>; + +def VSxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackb $Vs)>; +def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh $Vs)>; +def VZxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackub $Vs)>; +def VZxth: OutPatFrag<(ops node:$Vs), (V6_vunpackuh $Vs)>; + +def SplatB: SDNodeXForm<imm, [{ + uint32_t V = N->getZExtValue(); + assert(isUInt<8>(V)); + uint32_t S = V << 24 | V << 16 | V << 8 | V; + return CurDAG->getTargetConstant(S, SDLoc(N), MVT::i32); +}]>; + +def SplatH: SDNodeXForm<imm, [{ + uint32_t V = N->getZExtValue(); + assert(isUInt<16>(V)); + return CurDAG->getTargetConstant(V << 16 | V, SDLoc(N), MVT::i32); +}]>; + +def IsVecOff : PatLeaf<(i32 imm), [{ + int32_t V = N->getSExtValue(); + int32_t VecSize = HRI->getSpillSize(Hexagon::HvxVRRegClass); + assert(isPowerOf2_32(VecSize)); + if ((uint32_t(V) & (uint32_t(VecSize)-1)) != 0) + return false; + int32_t L = Log2_32(VecSize); + return isInt<4>(V >> L); +}]>; + + +def alignedload: PatFrag<(ops node:$a), (load $a), [{ + return isAlignedMemNode(dyn_cast<MemSDNode>(N)); +}]>; + +def unalignedload: PatFrag<(ops node:$a), (load $a), [{ + return !isAlignedMemNode(dyn_cast<MemSDNode>(N)); +}]>; + +def alignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ + return isAlignedMemNode(dyn_cast<MemSDNode>(N)); +}]>; + +def unalignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ + return !isAlignedMemNode(dyn_cast<MemSDNode>(N)); +}]>; + + +// HVX loads + +multiclass HvxLd_pat<InstHexagon MI, PatFrag Load, ValueType ResType, + PatFrag ImmPred> { + def: Pat<(ResType (Load I32:$Rt)), + (MI I32:$Rt, 0)>; + def: Pat<(ResType (Load (add I32:$Rt, ImmPred:$s))), + (MI I32:$Rt, imm:$s)>; + // The HVX selection code for shuffles can generate vector constants. + // Calling "Select" on the resulting loads from CP fails without these + // patterns. + def: Pat<(ResType (Load (HexagonCP tconstpool:$A))), + (MI (A2_tfrsi imm:$A), 0)>; + def: Pat<(ResType (Load (HexagonAtPcrel tconstpool:$A))), + (MI (C4_addipc imm:$A), 0)>; +} + +multiclass HvxLda_pat<InstHexagon MI, PatFrag Load, ValueType ResType, + PatFrag ImmPred> { + let AddedComplexity = 50 in { + def: Pat<(ResType (Load (valignaddr I32:$Rt))), + (MI I32:$Rt, 0)>; + def: Pat<(ResType (Load (add (valignaddr I32:$Rt), ImmPred:$Off))), + (MI I32:$Rt, imm:$Off)>; + } + defm: HvxLd_pat<MI, Load, ResType, ImmPred>; +} + +let Predicates = [UseHVX] in { + defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI8, IsVecOff>; + defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI16, IsVecOff>; + defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecI32, IsVecOff>; + + defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI8, IsVecOff>; + defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI16, IsVecOff>; + defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecI32, IsVecOff>; + + defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI8, IsVecOff>; + defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI16, IsVecOff>; + defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecI32, IsVecOff>; +} + +// HVX stores + +multiclass HvxSt_pat<InstHexagon MI, PatFrag Store, PatFrag ImmPred, + PatFrag Value> { + def: Pat<(Store Value:$Vs, I32:$Rt), + (MI I32:$Rt, 0, Value:$Vs)>; + def: Pat<(Store Value:$Vs, (add I32:$Rt, ImmPred:$s)), + (MI I32:$Rt, imm:$s, Value:$Vs)>; +} + +let Predicates = [UseHVX] in { + defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, IsVecOff, HVI8>; + defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, IsVecOff, HVI16>; + defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, IsVecOff, HVI32>; + + defm: HvxSt_pat<V6_vS32b_ai, alignedstore, IsVecOff, HVI8>; + defm: HvxSt_pat<V6_vS32b_ai, alignedstore, IsVecOff, HVI16>; + defm: HvxSt_pat<V6_vS32b_ai, alignedstore, IsVecOff, HVI32>; + + defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, IsVecOff, HVI8>; + defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, IsVecOff, HVI16>; + defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, IsVecOff, HVI32>; +} + +// Bitcasts between same-size vector types are no-ops, except for the +// actual type change. +class Bitcast<ValueType ResTy, ValueType InpTy, RegisterClass RC> + : Pat<(ResTy (bitconvert (InpTy RC:$Val))), (ResTy RC:$Val)>; + +let Predicates = [UseHVX] in { + def: Bitcast<VecI8, VecI16, HvxVR>; + def: Bitcast<VecI8, VecI32, HvxVR>; + def: Bitcast<VecI16, VecI8, HvxVR>; + def: Bitcast<VecI16, VecI32, HvxVR>; + def: Bitcast<VecI32, VecI8, HvxVR>; + def: Bitcast<VecI32, VecI16, HvxVR>; + + def: Bitcast<VecPI8, VecPI16, HvxWR>; + def: Bitcast<VecPI8, VecPI32, HvxWR>; + def: Bitcast<VecPI16, VecPI8, HvxWR>; + def: Bitcast<VecPI16, VecPI32, HvxWR>; + def: Bitcast<VecPI32, VecPI8, HvxWR>; + def: Bitcast<VecPI32, VecPI16, HvxWR>; +} + +let Predicates = [UseHVX] in { + def: Pat<(VecI8 vzero), (V6_vd0)>; + def: Pat<(VecI16 vzero), (V6_vd0)>; + def: Pat<(VecI32 vzero), (V6_vd0)>; + def: Pat<(VecPI8 vzero), (PS_vdd0)>; + def: Pat<(VecPI16 vzero), (PS_vdd0)>; + def: Pat<(VecPI32 vzero), (PS_vdd0)>; + + def: Pat<(concat_vectors (VecI8 vzero), (VecI8 vzero)), (PS_vdd0)>; + def: Pat<(concat_vectors (VecI16 vzero), (VecI16 vzero)), (PS_vdd0)>; + def: Pat<(concat_vectors (VecI32 vzero), (VecI32 vzero)), (PS_vdd0)>; + + def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)), + (Combinev HvxVR:$Vt, HvxVR:$Vs)>; + def: Pat<(VecPI16 (concat_vectors HVI16:$Vs, HVI16:$Vt)), + (Combinev HvxVR:$Vt, HvxVR:$Vs)>; + def: Pat<(VecPI32 (concat_vectors HVI32:$Vs, HVI32:$Vt)), + (Combinev HvxVR:$Vt, HvxVR:$Vs)>; + + def: Pat<(VecQ8 (qcat HQ16:$Qs, HQ16:$Qt)), (Combineq $Qt, $Qs)>; + def: Pat<(VecQ16 (qcat HQ32:$Qs, HQ32:$Qt)), (Combineq $Qt, $Qs)>; + + def: Pat<(HexagonVEXTRACTW HVI8:$Vu, I32:$Rs), + (V6_extractw HvxVR:$Vu, I32:$Rs)>; + def: Pat<(HexagonVEXTRACTW HVI16:$Vu, I32:$Rs), + (V6_extractw HvxVR:$Vu, I32:$Rs)>; + def: Pat<(HexagonVEXTRACTW HVI32:$Vu, I32:$Rs), + (V6_extractw HvxVR:$Vu, I32:$Rs)>; + + def: Pat<(HexagonVINSERTW0 HVI8:$Vu, I32:$Rt), + (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; + def: Pat<(HexagonVINSERTW0 HVI16:$Vu, I32:$Rt), + (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; + def: Pat<(HexagonVINSERTW0 HVI32:$Vu, I32:$Rt), + (V6_vinsertwr HvxVR:$Vu, I32:$Rt)>; +} + +def Vsplatib: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatB $V)))>; +def Vsplatih: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 (SplatH $V)))>; +def Vsplatiw: OutPatFrag<(ops node:$V), (V6_lvsplatw (ToI32 $V))>; + +def Vsplatrb: OutPatFrag<(ops node:$Rs), (V6_lvsplatw (S2_vsplatrb $Rs))>; +def Vsplatrh: OutPatFrag<(ops node:$Rs), + (V6_lvsplatw (A2_combine_ll $Rs, $Rs))>; +def Vsplatrw: OutPatFrag<(ops node:$Rs), (V6_lvsplatw $Rs)>; + +def Rep: OutPatFrag<(ops node:$N), (Combinev $N, $N)>; + +let Predicates = [UseHVX] in { + let AddedComplexity = 10 in { + def: Pat<(VecI8 (HexagonVSPLAT u8_0ImmPred:$V)), (Vsplatib $V)>; + def: Pat<(VecI16 (HexagonVSPLAT u16_0ImmPred:$V)), (Vsplatih $V)>; + def: Pat<(VecI32 (HexagonVSPLAT anyimm:$V)), (Vsplatiw $V)>; + def: Pat<(VecPI8 (HexagonVSPLAT u8_0ImmPred:$V)), (Rep (Vsplatib $V))>; + def: Pat<(VecPI16 (HexagonVSPLAT u16_0ImmPred:$V)), (Rep (Vsplatih $V))>; + def: Pat<(VecPI32 (HexagonVSPLAT anyimm:$V)), (Rep (Vsplatiw $V))>; + } + def: Pat<(VecI8 (HexagonVSPLAT I32:$Rs)), (Vsplatrb $Rs)>; + def: Pat<(VecI16 (HexagonVSPLAT I32:$Rs)), (Vsplatrh $Rs)>; + def: Pat<(VecI32 (HexagonVSPLAT I32:$Rs)), (Vsplatrw $Rs)>; + def: Pat<(VecPI8 (HexagonVSPLAT I32:$Rs)), (Rep (Vsplatrb $Rs))>; + def: Pat<(VecPI16 (HexagonVSPLAT I32:$Rs)), (Rep (Vsplatrh $Rs))>; + def: Pat<(VecPI32 (HexagonVSPLAT I32:$Rs)), (Rep (Vsplatrw $Rs))>; + + def: Pat<(VecI8 (HexagonVSPLATW I32:$Rs)), (Vsplatrw $Rs)>; + def: Pat<(VecI16 (HexagonVSPLATW I32:$Rs)), (Vsplatrw $Rs)>; + def: Pat<(VecI32 (HexagonVSPLATW I32:$Rs)), (Vsplatrw $Rs)>; + def: Pat<(VecPI8 (HexagonVSPLATW I32:$Rs)), (Rep (Vsplatrw $Rs))>; + def: Pat<(VecPI16 (HexagonVSPLATW I32:$Rs)), (Rep (Vsplatrw $Rs))>; + def: Pat<(VecPI32 (HexagonVSPLATW I32:$Rs)), (Rep (Vsplatrw $Rs))>; +} + +class Vneg1<ValueType VecTy> + : PatFrag<(ops), (VecTy (HexagonVSPLATW (i32 -1)))>; + +class Vnot<ValueType VecTy> + : PatFrag<(ops node:$Vs), (xor $Vs, Vneg1<VecTy>)>; + +let Predicates = [UseHVX] in { + let AddedComplexity = 200 in { + def: Pat<(Vnot<VecI8> HVI8:$Vs), (V6_vnot HvxVR:$Vs)>; + def: Pat<(Vnot<VecI16> HVI16:$Vs), (V6_vnot HvxVR:$Vs)>; + def: Pat<(Vnot<VecI32> HVI32:$Vs), (V6_vnot HvxVR:$Vs)>; + } + + def: OpR_RR_pat<V6_vaddb, Add, VecI8, HVI8>; + def: OpR_RR_pat<V6_vaddh, Add, VecI16, HVI16>; + def: OpR_RR_pat<V6_vaddw, Add, VecI32, HVI32>; + def: OpR_RR_pat<V6_vaddb_dv, Add, VecPI8, HWI8>; + def: OpR_RR_pat<V6_vaddh_dv, Add, VecPI16, HWI16>; + def: OpR_RR_pat<V6_vaddw_dv, Add, VecPI32, HWI32>; + def: OpR_RR_pat<V6_vsubb, Sub, VecI8, HVI8>; + def: OpR_RR_pat<V6_vsubh, Sub, VecI16, HVI16>; + def: OpR_RR_pat<V6_vsubw, Sub, VecI32, HVI32>; + def: OpR_RR_pat<V6_vsubb_dv, Sub, VecPI8, HWI8>; + def: OpR_RR_pat<V6_vsubh_dv, Sub, VecPI16, HWI16>; + def: OpR_RR_pat<V6_vsubw_dv, Sub, VecPI32, HWI32>; + def: OpR_RR_pat<V6_vand, And, VecI8, HVI8>; + def: OpR_RR_pat<V6_vand, And, VecI16, HVI16>; + def: OpR_RR_pat<V6_vand, And, VecI32, HVI32>; + def: OpR_RR_pat<V6_vor, Or, VecI8, HVI8>; + def: OpR_RR_pat<V6_vor, Or, VecI16, HVI16>; + def: OpR_RR_pat<V6_vor, Or, VecI32, HVI32>; + def: OpR_RR_pat<V6_vxor, Xor, VecI8, HVI8>; + def: OpR_RR_pat<V6_vxor, Xor, VecI16, HVI16>; + def: OpR_RR_pat<V6_vxor, Xor, VecI32, HVI32>; + + def: Pat<(vselect HQ8:$Qu, HVI8:$Vs, HVI8:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(vselect HQ16:$Qu, HVI16:$Vs, HVI16:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(vselect HQ32:$Qu, HVI32:$Vs, HVI32:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; + + def: Pat<(vselect (qnot HQ8:$Qu), HVI8:$Vs, HVI8:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; + def: Pat<(vselect (qnot HQ16:$Qu), HVI16:$Vs, HVI16:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; + def: Pat<(vselect (qnot HQ32:$Qu), HVI32:$Vs, HVI32:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; +} + +let Predicates = [UseHVX] in { + def: Pat<(VecPI16 (sext HVI8:$Vs)), (VSxtb $Vs)>; + def: Pat<(VecPI32 (sext HVI16:$Vs)), (VSxth $Vs)>; + def: Pat<(VecPI16 (zext HVI8:$Vs)), (VZxtb $Vs)>; + def: Pat<(VecPI32 (zext HVI16:$Vs)), (VZxth $Vs)>; + + def: Pat<(VecI16 (sext_invec HVI8:$Vs)), (LoVec (VSxtb $Vs))>; + def: Pat<(VecI32 (sext_invec HVI16:$Vs)), (LoVec (VSxth $Vs))>; + def: Pat<(VecI32 (sext_invec HVI8:$Vs)), + (LoVec (VSxth (LoVec (VSxtb $Vs))))>; + def: Pat<(VecPI16 (sext_invec HWI8:$Vss)), (VSxtb (LoVec $Vss))>; + def: Pat<(VecPI32 (sext_invec HWI16:$Vss)), (VSxth (LoVec $Vss))>; + def: Pat<(VecPI32 (sext_invec HWI8:$Vss)), + (VSxth (LoVec (VSxtb (LoVec $Vss))))>; + + def: Pat<(VecI16 (zext_invec HVI8:$Vs)), (LoVec (VZxtb $Vs))>; + def: Pat<(VecI32 (zext_invec HVI16:$Vs)), (LoVec (VZxth $Vs))>; + def: Pat<(VecI32 (zext_invec HVI8:$Vs)), + (LoVec (VZxth (LoVec (VZxtb $Vs))))>; + def: Pat<(VecPI16 (zext_invec HWI8:$Vss)), (VZxtb (LoVec $Vss))>; + def: Pat<(VecPI32 (zext_invec HWI16:$Vss)), (VZxth (LoVec $Vss))>; + def: Pat<(VecPI32 (zext_invec HWI8:$Vss)), + (VZxth (LoVec (VZxtb (LoVec $Vss))))>; + + def: Pat<(VecI8 (trunc HWI16:$Vss)), + (V6_vpackeb (HiVec $Vss), (LoVec $Vss))>; + def: Pat<(VecI16 (trunc HWI32:$Vss)), + (V6_vpackeh (HiVec $Vss), (LoVec $Vss))>; + + def: Pat<(VecQ8 (trunc HVI8:$Vs)), + (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>; + def: Pat<(VecQ16 (trunc HVI16:$Vs)), + (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>; + def: Pat<(VecQ32 (trunc HVI32:$Vs)), + (V6_vandvrt HvxVR:$Vs, (A2_tfrsi 0x01010101))>; +} + +let Predicates = [UseHVX] in { + // The "source" types are not legal, and there are no parameterized + // definitions for them, but they are length-specific. + let Predicates = [UseHVX,UseHVX64B] in { + def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v32i8)), + (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>; + def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i8)), + (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>; + def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v16i16)), + (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>; + } + let Predicates = [UseHVX,UseHVX128B] in { + def: Pat<(VecI16 (sext_inreg HVI16:$Vs, v64i8)), + (V6_vasrh (V6_vaslh HVI16:$Vs, (A2_tfrsi 8)), (A2_tfrsi 8))>; + def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i8)), + (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 24)), (A2_tfrsi 24))>; + def: Pat<(VecI32 (sext_inreg HVI32:$Vs, v32i16)), + (V6_vasrw (V6_vaslw HVI32:$Vs, (A2_tfrsi 16)), (A2_tfrsi 16))>; + } + + def: Pat<(HexagonVASL HVI8:$Vs, I32:$Rt), + (V6_vpackeb (V6_vaslh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt), + (V6_vaslh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>; + def: Pat<(HexagonVASR HVI8:$Vs, I32:$Rt), + (V6_vpackeb (V6_vasrh (HiVec (VSxtb HvxVR:$Vs)), I32:$Rt), + (V6_vasrh (LoVec (VSxtb HvxVR:$Vs)), I32:$Rt))>; + def: Pat<(HexagonVLSR HVI8:$Vs, I32:$Rt), + (V6_vpackeb (V6_vlsrh (HiVec (VZxtb HvxVR:$Vs)), I32:$Rt), + (V6_vlsrh (LoVec (VZxtb HvxVR:$Vs)), I32:$Rt))>; + + def: Pat<(HexagonVASL HVI16:$Vs, I32:$Rt), (V6_vaslh HvxVR:$Vs, I32:$Rt)>; + def: Pat<(HexagonVASL HVI32:$Vs, I32:$Rt), (V6_vaslw HvxVR:$Vs, I32:$Rt)>; + def: Pat<(HexagonVASR HVI16:$Vs, I32:$Rt), (V6_vasrh HvxVR:$Vs, I32:$Rt)>; + def: Pat<(HexagonVASR HVI32:$Vs, I32:$Rt), (V6_vasrw HvxVR:$Vs, I32:$Rt)>; + def: Pat<(HexagonVLSR HVI16:$Vs, I32:$Rt), (V6_vlsrh HvxVR:$Vs, I32:$Rt)>; + def: Pat<(HexagonVLSR HVI32:$Vs, I32:$Rt), (V6_vlsrw HvxVR:$Vs, I32:$Rt)>; + + def: Pat<(add HVI32:$Vx, (HexagonVASL HVI32:$Vu, I32:$Rt)), + (V6_vaslw_acc HvxVR:$Vx, HvxVR:$Vu, I32:$Rt)>; + def: Pat<(add HVI32:$Vx, (HexagonVASR HVI32:$Vu, I32:$Rt)), + (V6_vasrw_acc HvxVR:$Vx, HvxVR:$Vu, I32:$Rt)>; + + def: Pat<(shl HVI16:$Vs, HVI16:$Vt), (V6_vaslhv HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(shl HVI32:$Vs, HVI32:$Vt), (V6_vaslwv HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(sra HVI16:$Vs, HVI16:$Vt), (V6_vasrhv HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(sra HVI32:$Vs, HVI32:$Vt), (V6_vasrwv HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(srl HVI16:$Vs, HVI16:$Vt), (V6_vlsrhv HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(srl HVI32:$Vs, HVI32:$Vt), (V6_vlsrwv HvxVR:$Vs, HvxVR:$Vt)>; + + def: Pat<(VecI16 (bswap HVI16:$Vs)), + (V6_vdelta HvxVR:$Vs, (V6_lvsplatw (A2_tfrsi 0x01010101)))>; + def: Pat<(VecI32 (bswap HVI32:$Vs)), + (V6_vdelta HvxVR:$Vs, (V6_lvsplatw (A2_tfrsi 0x03030303)))>; + + def: Pat<(VecI8 (ctpop HVI8:$Vs)), + (V6_vpackeb (V6_vpopcounth (HiVec (V6_vunpackub HvxVR:$Vs))), + (V6_vpopcounth (LoVec (V6_vunpackub HvxVR:$Vs))))>; + def: Pat<(VecI16 (ctpop HVI16:$Vs)), (V6_vpopcounth HvxVR:$Vs)>; + def: Pat<(VecI32 (ctpop HVI32:$Vs)), + (V6_vaddw (LoVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))), + (HiVec (V6_vzh (V6_vpopcounth HvxVR:$Vs))))>; + + def: Pat<(VecI8 (ctlz HVI8:$Vs)), + (V6_vsubb (V6_vpackeb (V6_vcl0h (HiVec (V6_vunpackub HvxVR:$Vs))), + (V6_vcl0h (LoVec (V6_vunpackub HvxVR:$Vs)))), + (V6_lvsplatw (A2_tfrsi 0x08080808)))>; + def: Pat<(VecI16 (ctlz HVI16:$Vs)), (V6_vcl0h HvxVR:$Vs)>; + def: Pat<(VecI32 (ctlz HVI32:$Vs)), (V6_vcl0w HvxVR:$Vs)>; +} + +class HvxSel_pat<InstHexagon MI, PatFrag RegPred> + : Pat<(select I1:$Pu, RegPred:$Vs, RegPred:$Vt), + (MI I1:$Pu, RegPred:$Vs, RegPred:$Vt)>; + +let Predicates = [UseHVX] in { + def: HvxSel_pat<PS_vselect, HVI8>; + def: HvxSel_pat<PS_vselect, HVI16>; + def: HvxSel_pat<PS_vselect, HVI32>; + def: HvxSel_pat<PS_wselect, HWI8>; + def: HvxSel_pat<PS_wselect, HWI16>; + def: HvxSel_pat<PS_wselect, HWI32>; +} + +let Predicates = [UseHVX] in { + def: Pat<(VecQ8 (qtrue)), (PS_qtrue)>; + def: Pat<(VecQ16 (qtrue)), (PS_qtrue)>; + def: Pat<(VecQ32 (qtrue)), (PS_qtrue)>; + def: Pat<(VecQ8 (qfalse)), (PS_qfalse)>; + def: Pat<(VecQ16 (qfalse)), (PS_qfalse)>; + def: Pat<(VecQ32 (qfalse)), (PS_qfalse)>; + + def: Pat<(vnot HQ8:$Qs), (V6_pred_not HvxQR:$Qs)>; + def: Pat<(vnot HQ16:$Qs), (V6_pred_not HvxQR:$Qs)>; + def: Pat<(vnot HQ32:$Qs), (V6_pred_not HvxQR:$Qs)>; + def: Pat<(qnot HQ8:$Qs), (V6_pred_not HvxQR:$Qs)>; + def: Pat<(qnot HQ16:$Qs), (V6_pred_not HvxQR:$Qs)>; + def: Pat<(qnot HQ32:$Qs), (V6_pred_not HvxQR:$Qs)>; + + def: OpR_RR_pat<V6_pred_and, And, VecQ8, HQ8>; + def: OpR_RR_pat<V6_pred_and, And, VecQ16, HQ16>; + def: OpR_RR_pat<V6_pred_and, And, VecQ32, HQ32>; + def: OpR_RR_pat<V6_pred_or, Or, VecQ8, HQ8>; + def: OpR_RR_pat<V6_pred_or, Or, VecQ16, HQ16>; + def: OpR_RR_pat<V6_pred_or, Or, VecQ32, HQ32>; + def: OpR_RR_pat<V6_pred_xor, Xor, VecQ8, HQ8>; + def: OpR_RR_pat<V6_pred_xor, Xor, VecQ16, HQ16>; + def: OpR_RR_pat<V6_pred_xor, Xor, VecQ32, HQ32>; + + def: OpR_RR_pat<V6_pred_and_n, Not2<And>, VecQ8, HQ8>; + def: OpR_RR_pat<V6_pred_and_n, Not2<And>, VecQ16, HQ16>; + def: OpR_RR_pat<V6_pred_and_n, Not2<And>, VecQ32, HQ32>; + def: OpR_RR_pat<V6_pred_or_n, Not2<Or>, VecQ8, HQ8>; + def: OpR_RR_pat<V6_pred_or_n, Not2<Or>, VecQ16, HQ16>; + def: OpR_RR_pat<V6_pred_or_n, Not2<Or>, VecQ32, HQ32>; + + def: OpR_RR_pat<V6_veqb, seteq, VecQ8, HVI8>; + def: OpR_RR_pat<V6_veqh, seteq, VecQ16, HVI16>; + def: OpR_RR_pat<V6_veqw, seteq, VecQ32, HVI32>; + def: OpR_RR_pat<V6_vgtb, setgt, VecQ8, HVI8>; + def: OpR_RR_pat<V6_vgth, setgt, VecQ16, HVI16>; + def: OpR_RR_pat<V6_vgtw, setgt, VecQ32, HVI32>; + def: OpR_RR_pat<V6_vgtub, setugt, VecQ8, HVI8>; + def: OpR_RR_pat<V6_vgtuh, setugt, VecQ16, HVI16>; + def: OpR_RR_pat<V6_vgtuw, setugt, VecQ32, HVI32>; + + def: AccRRR_pat<V6_veqb_and, And, seteq, HQ8, HVI8, HVI8>; + def: AccRRR_pat<V6_veqb_or, Or, seteq, HQ8, HVI8, HVI8>; + def: AccRRR_pat<V6_veqb_xor, Xor, seteq, HQ8, HVI8, HVI8>; + def: AccRRR_pat<V6_veqh_and, And, seteq, HQ16, HVI16, HVI16>; + def: AccRRR_pat<V6_veqh_or, Or, seteq, HQ16, HVI16, HVI16>; + def: AccRRR_pat<V6_veqh_xor, Xor, seteq, HQ16, HVI16, HVI16>; + def: AccRRR_pat<V6_veqw_and, And, seteq, HQ32, HVI32, HVI32>; + def: AccRRR_pat<V6_veqw_or, Or, seteq, HQ32, HVI32, HVI32>; + def: AccRRR_pat<V6_veqw_xor, Xor, seteq, HQ32, HVI32, HVI32>; + + def: AccRRR_pat<V6_vgtb_and, And, setgt, HQ8, HVI8, HVI8>; + def: AccRRR_pat<V6_vgtb_or, Or, setgt, HQ8, HVI8, HVI8>; + def: AccRRR_pat<V6_vgtb_xor, Xor, setgt, HQ8, HVI8, HVI8>; + def: AccRRR_pat<V6_vgth_and, And, setgt, HQ16, HVI16, HVI16>; + def: AccRRR_pat<V6_vgth_or, Or, setgt, HQ16, HVI16, HVI16>; + def: AccRRR_pat<V6_vgth_xor, Xor, setgt, HQ16, HVI16, HVI16>; + def: AccRRR_pat<V6_vgtw_and, And, setgt, HQ32, HVI32, HVI32>; + def: AccRRR_pat<V6_vgtw_or, Or, setgt, HQ32, HVI32, HVI32>; + def: AccRRR_pat<V6_vgtw_xor, Xor, setgt, HQ32, HVI32, HVI32>; + + def: AccRRR_pat<V6_vgtub_and, And, setugt, HQ8, HVI8, HVI8>; + def: AccRRR_pat<V6_vgtub_or, Or, setugt, HQ8, HVI8, HVI8>; + def: AccRRR_pat<V6_vgtub_xor, Xor, setugt, HQ8, HVI8, HVI8>; + def: AccRRR_pat<V6_vgtuh_and, And, setugt, HQ16, HVI16, HVI16>; + def: AccRRR_pat<V6_vgtuh_or, Or, setugt, HQ16, HVI16, HVI16>; + def: AccRRR_pat<V6_vgtuh_xor, Xor, setugt, HQ16, HVI16, HVI16>; + def: AccRRR_pat<V6_vgtuw_and, And, setugt, HQ32, HVI32, HVI32>; + def: AccRRR_pat<V6_vgtuw_or, Or, setugt, HQ32, HVI32, HVI32>; + def: AccRRR_pat<V6_vgtuw_xor, Xor, setugt, HQ32, HVI32, HVI32>; +} diff --git a/lib/Target/Hexagon/HexagonPseudo.td b/lib/Target/Hexagon/HexagonPseudo.td index b2d66317b66e..fd7466349ecd 100644 --- a/lib/Target/Hexagon/HexagonPseudo.td +++ b/lib/Target/Hexagon/HexagonPseudo.td @@ -24,7 +24,7 @@ let PrintMethod = "printGlobalOperand" in { let isPseudo = 1 in { let isCodeGenOnly = 0 in def A2_iconst : Pseudo<(outs IntRegs:$Rd32), - (ins s27_2Imm:$Ii), "${Rd32}=iconst(#${Ii})">; + (ins s27_2Imm:$Ii), "${Rd32} = iconst(#${Ii})">; def DUPLEX_Pseudo : InstHexagon<(outs), (ins s32_0Imm:$offset), "DUPLEX", [], "", DUPLEX, TypePSEUDO>; @@ -34,7 +34,7 @@ let isExtendable = 1, opExtendable = 1, opExtentBits = 6, isAsmParserOnly = 1 in def TFRI64_V2_ext : InstHexagon<(outs DoubleRegs:$dst), (ins s32_0Imm:$src1, s8_0Imm:$src2), - "$dst=combine(#$src1,#$src2)", [], "", + "$dst = combine(#$src1,#$src2)", [], "", A2_combineii.Itinerary, TypeALU32_2op>, OpcodeHexagon; // HI/LO Instructions @@ -44,7 +44,7 @@ class REG_IMMED<string RegHalf, bit Rs, bits<3> MajOp, bit MinOp, InstHexagon rootInst> : InstHexagon<(outs IntRegs:$dst), (ins u16_0Imm:$imm_value), - "$dst"#RegHalf#"=#$imm_value", [], "", + "$dst"#RegHalf#" = #$imm_value", [], "", rootInst.Itinerary, rootInst.Type>, OpcodeHexagon { bits<5> dst; bits<32> imm_value; @@ -102,6 +102,13 @@ def ENDLOOP1 : Endloop<(outs), (ins b30_2Imm:$offset), []>; } +let isBranch = 1, isTerminator = 1, hasSideEffects = 0, + Defs = [PC, LC0, LC1], Uses = [SA0, SA1, LC0, LC1] in { +def ENDLOOP01 : Endloop<(outs), (ins b30_2Imm:$offset), + ":endloop01", + []>; +} + let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2, opExtendable = 0, hasSideEffects = 0 in class LOOP_iBase<string mnemonic, InstHexagon rootInst> @@ -316,7 +323,7 @@ def LDriw_pred : LDInst<(outs PredRegs:$dst), // Load modifier. let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in -def LDriw_mod : LDInst<(outs ModRegs:$dst), +def LDriw_ctr : LDInst<(outs CtrRegs:$dst), (ins IntRegs:$addr, s32_0Imm:$off), ".error \"should not emit\"", []>; @@ -399,42 +406,42 @@ let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in { } // Vector store pseudos -let Predicates = [HasV60T, UseHVX], isPseudo = 1, isCodeGenOnly = 1, +let Predicates = [HasV60,UseHVX], isPseudo = 1, isCodeGenOnly = 1, mayStore = 1, accessSize = HVXVectorAccess, hasSideEffects = 0 in class STrivv_template<RegisterClass RC, InstHexagon rootInst> : InstHexagon<(outs), (ins IntRegs:$addr, s32_0Imm:$off, RC:$src), "", [], "", rootInst.Itinerary, rootInst.Type>; def PS_vstorerw_ai: STrivv_template<HvxWR, V6_vS32b_ai>, - Requires<[HasV60T,UseHVX]>; + Requires<[HasV60,UseHVX]>; def PS_vstorerw_nt_ai: STrivv_template<HvxWR, V6_vS32b_nt_ai>, - Requires<[HasV60T,UseHVX]>; + Requires<[HasV60,UseHVX]>; def PS_vstorerwu_ai: STrivv_template<HvxWR, V6_vS32Ub_ai>, - Requires<[HasV60T,UseHVX]>; + Requires<[HasV60,UseHVX]>; let isPseudo = 1, isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0 in def PS_vstorerq_ai: Pseudo<(outs), (ins IntRegs:$Rs, s32_0Imm:$Off, HvxQR:$Qt), "", []>, - Requires<[HasV60T,UseHVX]>; + Requires<[HasV60,UseHVX]>; // Vector load pseudos -let Predicates = [HasV60T, UseHVX], isPseudo = 1, isCodeGenOnly = 1, +let Predicates = [HasV60, UseHVX], isPseudo = 1, isCodeGenOnly = 1, mayLoad = 1, accessSize = HVXVectorAccess, hasSideEffects = 0 in class LDrivv_template<RegisterClass RC, InstHexagon rootInst> : InstHexagon<(outs RC:$dst), (ins IntRegs:$addr, s32_0Imm:$off), "", [], "", rootInst.Itinerary, rootInst.Type>; def PS_vloadrw_ai: LDrivv_template<HvxWR, V6_vL32b_ai>, - Requires<[HasV60T,UseHVX]>; + Requires<[HasV60,UseHVX]>; def PS_vloadrw_nt_ai: LDrivv_template<HvxWR, V6_vL32b_nt_ai>, - Requires<[HasV60T,UseHVX]>; + Requires<[HasV60,UseHVX]>; def PS_vloadrwu_ai: LDrivv_template<HvxWR, V6_vL32Ub_ai>, - Requires<[HasV60T,UseHVX]>; + Requires<[HasV60,UseHVX]>; let isPseudo = 1, isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0 in def PS_vloadrq_ai: Pseudo<(outs HvxQR:$Qd), (ins IntRegs:$Rs, s32_0Imm:$Off), "", []>, - Requires<[HasV60T,UseHVX]>; + Requires<[HasV60,UseHVX]>; let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in @@ -443,10 +450,20 @@ class VSELInst<dag outs, dag ins, InstHexagon rootInst> def PS_vselect: VSELInst<(outs HvxVR:$dst), (ins PredRegs:$src1, HvxVR:$src2, HvxVR:$src3), V6_vcmov>, - Requires<[HasV60T,UseHVX]>; + Requires<[HasV60,UseHVX]>; def PS_wselect: VSELInst<(outs HvxWR:$dst), (ins PredRegs:$src1, HvxWR:$src2, HvxWR:$src3), V6_vccombine>, - Requires<[HasV60T,UseHVX]>; + Requires<[HasV60,UseHVX]>; + +let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1, + isCodeGenOnly = 1 in { + def PS_qtrue: InstHexagon<(outs HvxQR:$Qd), (ins), "", [], "", + V6_veqw.Itinerary, TypeCVI_VA>; + def PS_qfalse: InstHexagon<(outs HvxQR:$Qd), (ins), "", [], "", + V6_vgtw.Itinerary, TypeCVI_VA>; + def PS_vdd0: InstHexagon<(outs HvxWR:$Vd), (ins), "", [], "", + V6_vsubw_dv.Itinerary, TypeCVI_VA_DV>; +} // Store predicate. let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, @@ -457,8 +474,8 @@ def STriw_pred : STInst<(outs), // Store modifier. let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in -def STriw_mod : STInst<(outs), - (ins IntRegs:$addr, s32_0Imm:$off, ModRegs:$src1), +def STriw_ctr : STInst<(outs), + (ins IntRegs:$addr, s32_0Imm:$off, CtrRegs:$src1), ".error \"should not emit\"", []>; let isExtendable = 1, opExtendable = 1, opExtentBits = 6, @@ -499,3 +516,46 @@ def DuplexIClassC: InstDuplex < 0xC >; def DuplexIClassD: InstDuplex < 0xD >; def DuplexIClassE: InstDuplex < 0xE >; def DuplexIClassF: InstDuplex < 0xF >; + +// Pseudos for circular buffer instructions. These are needed in order to +// allocate the correct pair of CSx and Mx registers. +multiclass NewCircularLoad<RegisterClass RC, MemAccessSize MS> { + +let isCodeGenOnly = 1, isPseudo = 1, Defs = [CS], Uses = [CS], + addrMode = PostInc, accessSize = MS, hasSideEffects = 0 in { + def NAME#_pci : LDInst<(outs RC:$Rd32, IntRegs:$Rx32), + (ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, IntRegs:$Cs), + ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_4403ca65>; + + def NAME#_pcr : LDInst<(outs RC:$Rd32, IntRegs:$Rx32), + (ins IntRegs:$Rx32in, ModRegs:$Mu2, IntRegs:$Cs), + ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_2fc0c436>; +} +} + +defm PS_loadrub : NewCircularLoad<IntRegs, ByteAccess>; +defm PS_loadrb : NewCircularLoad<IntRegs, ByteAccess>; +defm PS_loadruh : NewCircularLoad<IntRegs, HalfWordAccess>; +defm PS_loadrh : NewCircularLoad<IntRegs, HalfWordAccess>; +defm PS_loadri : NewCircularLoad<IntRegs, WordAccess>; +defm PS_loadrd : NewCircularLoad<DoubleRegs, DoubleWordAccess>; + +multiclass NewCircularStore<RegisterClass RC, MemAccessSize MS> { + +let isCodeGenOnly = 1, isPseudo = 1, Defs = [CS], Uses = [CS], + addrMode = PostInc, accessSize = MS, hasSideEffects = 0 in { + def NAME#_pci : STInst<(outs IntRegs:$Rx32), + (ins IntRegs:$Rx32in, s4_0Imm:$Ii, ModRegs:$Mu2, RC:$Rt32, IntRegs:$Cs), + ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_9fdb5406>; + + def NAME#_pcr : STInst<(outs IntRegs:$Rx32), + (ins IntRegs:$Rx32in, ModRegs:$Mu2, RC:$Rt32, IntRegs:$Cs), + ".error \"should not emit\" ", [], "$Rx32 = $Rx32in", tc_f86c328a>; +} +} + +defm PS_storerb : NewCircularStore<IntRegs, ByteAccess>; +defm PS_storerh : NewCircularStore<IntRegs, HalfWordAccess>; +defm PS_storerf : NewCircularStore<IntRegs, HalfWordAccess>; +defm PS_storeri : NewCircularStore<IntRegs, WordAccess>; +defm PS_storerd : NewCircularStore<DoubleRegs, WordAccess>; diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 85d6a6b4089e..2e11f875c0f9 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -19,6 +19,7 @@ #include "HexagonTargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -145,6 +146,13 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF) Reserved.set(Hexagon::R30); Reserved.set(Hexagon::R31); Reserved.set(Hexagon::VTMP); + + // Guest registers. + Reserved.set(Hexagon::GELR); // G0 + Reserved.set(Hexagon::GSR); // G1 + Reserved.set(Hexagon::GOSP); // G2 + Reserved.set(Hexagon::G3); // G3 + // Control registers. Reserved.set(Hexagon::SA0); // C0 Reserved.set(Hexagon::LC0); // C1 @@ -171,6 +179,9 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF) Reserved.set(Hexagon::C8); Reserved.set(Hexagon::USR_OVF); + if (MF.getSubtarget<HexagonSubtarget>().hasReservedR19()) + Reserved.set(Hexagon::R19); + for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x)) markSuperRegs(Reserved, x); @@ -233,6 +244,55 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } +bool HexagonRegisterInfo::shouldCoalesce(MachineInstr *MI, + const TargetRegisterClass *SrcRC, unsigned SubReg, + const TargetRegisterClass *DstRC, unsigned DstSubReg, + const TargetRegisterClass *NewRC, LiveIntervals &LIS) const { + // Coalescing will extend the live interval of the destination register. + // If the destination register is a vector pair, avoid introducing function + // calls into the interval, since it could result in a spilling of a pair + // instead of a single vector. + MachineFunction &MF = *MI->getParent()->getParent(); + const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>(); + if (!HST.useHVXOps() || NewRC->getID() != Hexagon::HvxWRRegClass.getID()) + return true; + bool SmallSrc = SrcRC->getID() == Hexagon::HvxVRRegClass.getID(); + bool SmallDst = DstRC->getID() == Hexagon::HvxVRRegClass.getID(); + if (!SmallSrc && !SmallDst) + return true; + + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + const SlotIndexes &Indexes = *LIS.getSlotIndexes(); + auto HasCall = [&Indexes] (const LiveInterval::Segment &S) { + for (SlotIndex I = S.start.getBaseIndex(), E = S.end.getBaseIndex(); + I != E; I = I.getNextIndex()) { + if (const MachineInstr *MI = Indexes.getInstructionFromIndex(I)) + if (MI->isCall()) + return true; + } + return false; + }; + + if (SmallSrc == SmallDst) { + // Both must be true, because the case for both being false was + // checked earlier. Both registers will be coalesced into a register + // of a wider class (HvxWR), and we don't want its live range to + // span over calls. + return !any_of(LIS.getInterval(DstReg), HasCall) && + !any_of(LIS.getInterval(SrcReg), HasCall); + } + + // If one register is large (HvxWR) and the other is small (HvxVR), then + // coalescing is ok if the large is already live across a function call, + // or if the small one is not. + unsigned SmallReg = SmallSrc ? SrcReg : DstReg; + unsigned LargeReg = SmallSrc ? DstReg : SrcReg; + return any_of(LIS.getInterval(LargeReg), HasCall) || + !any_of(LIS.getInterval(SmallReg), HasCall); +} + + unsigned HexagonRegisterInfo::getRARegister() const { return Hexagon::R31; } @@ -283,6 +343,11 @@ bool HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) return MF.getSubtarget<HexagonSubtarget>().getFrameLowering()->hasFP(MF); } +const TargetRegisterClass * +HexagonRegisterInfo::getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const { + return &Hexagon::IntRegsRegClass; +} unsigned HexagonRegisterInfo::getFirstCallerSavedNonParamReg() const { return Hexagon::R6; diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h index 4ead57da8fa1..497dc45236b1 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -39,6 +39,8 @@ public: BitVector getReservedRegs(const MachineFunction &MF) const override; + bool enableMultipleCopyHints() const override { return true; } + void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; @@ -61,6 +63,10 @@ public: return true; } + bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, + unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, + const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override; + // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const override; @@ -75,6 +81,10 @@ public: unsigned getFirstCallerSavedNonParamReg() const; + const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, + unsigned Kind = 0) const override; + bool isEHReturnCalleeSaveReg(unsigned Reg) const; }; diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td index 1d1e85e7ac7e..1fe1ef4ac572 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.td +++ b/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -67,6 +67,17 @@ let Namespace = "Hexagon" in { let HWEncoding{0} = num; } + // Rg - Guest/Hypervisor registers + class Rg<bits<5> num, string n, + list<string> alt = [], list<Register> alias = []> : + HexagonReg<num, n, alt, alias>; + + // Rgg - 64-bit Guest/Hypervisor registers + class Rgg<bits<5> num, string n, list<Register> subregs> : + HexagonDoubleReg<num, n, subregs> { + let SubRegs = subregs; + } + def isub_lo : SubRegIndex<32>; def isub_hi : SubRegIndex<32, 32>; def vsub_lo : SubRegIndex<512>; @@ -200,40 +211,87 @@ let Namespace = "Hexagon" in { def Q1 : Rq<1, "q1">, DwarfRegNum<[132]>; def Q2 : Rq<2, "q2">, DwarfRegNum<[133]>; def Q3 : Rq<3, "q3">, DwarfRegNum<[134]>; + + // Guest Registers + def GELR: Rg<0, "gelr", ["g0"]>, DwarfRegNum<[220]>; + def GSR: Rg<1, "gsr", ["g1"]>, DwarfRegNum<[221]>; + def GOSP: Rg<2, "gosp", ["g2"]>, DwarfRegNum<[222]>; + def G3: Rg<3, "gbadva", ["g3"]>, DwarfRegNum<[223]>; + def G4: Rg<4, "g4">, DwarfRegNum<[224]>; + def G5: Rg<5, "g5">, DwarfRegNum<[225]>; + def G6: Rg<6, "g6">, DwarfRegNum<[226]>; + def G7: Rg<7, "g7">, DwarfRegNum<[227]>; + def G8: Rg<8, "g8">, DwarfRegNum<[228]>; + def G9: Rg<9, "g9">, DwarfRegNum<[229]>; + def G10: Rg<10, "g10">, DwarfRegNum<[230]>; + def G11: Rg<11, "g11">, DwarfRegNum<[231]>; + def G12: Rg<12, "g12">, DwarfRegNum<[232]>; + def G13: Rg<13, "g13">, DwarfRegNum<[233]>; + def G14: Rg<14, "g14">, DwarfRegNum<[234]>; + def G15: Rg<15, "g15">, DwarfRegNum<[235]>; + def GPMUCNT4: Rg<16, "gpmucnt4", ["g16"]>, DwarfRegNum<[236]>; + def GPMUCNT5: Rg<17, "gpmucnt5", ["g17"]>, DwarfRegNum<[237]>; + def GPMUCNT6: Rg<18, "gpmucnt6", ["g18"]>, DwarfRegNum<[238]>; + def GPMUCNT7: Rg<19, "gpmucnt7", ["g19"]>, DwarfRegNum<[239]>; + def G20: Rg<20, "g20">, DwarfRegNum<[240]>; + def G21: Rg<21, "g21">, DwarfRegNum<[241]>; + def G22: Rg<22, "g22">, DwarfRegNum<[242]>; + def G23: Rg<23, "g23">, DwarfRegNum<[243]>; + def GPCYCLELO: Rg<24, "gpcyclelo", ["g24"]>, DwarfRegNum<[244]>; + def GPCYCLEHI: Rg<25, "gpcyclehi", ["g25"]>, DwarfRegNum<[245]>; + def GPMUCNT0: Rg<26, "gpmucnt0", ["g26"]>, DwarfRegNum<[246]>; + def GPMUCNT1: Rg<27, "gpmucnt1", ["g27"]>, DwarfRegNum<[247]>; + def GPMUCNT2: Rg<28, "gpmucnt2", ["g28"]>, DwarfRegNum<[248]>; + def GPMUCNT3: Rg<29, "gpmucnt3", ["g29"]>, DwarfRegNum<[249]>; + def G30: Rg<30, "g30">, DwarfRegNum<[250]>; + def G31: Rg<31, "g31">, DwarfRegNum<[251]>; + + // Guest Register Pairs + let SubRegIndices = [isub_lo, isub_hi], CoveredBySubRegs = 1 in { + def G1_0 : Rgg<0, "g1:0", [GELR, GSR]>, DwarfRegNum<[220]>; + def G3_2 : Rgg<2, "g3:2", [GOSP, G3]>, DwarfRegNum<[222]>; + def G5_4 : Rgg<4, "g5:4", [G4, G5]>, DwarfRegNum<[224]>; + def G7_6 : Rgg<6, "g7:6", [G6, G7]>, DwarfRegNum<[226]>; + def G9_8 : Rgg<8, "g9:8", [G8, G9]>, DwarfRegNum<[228]>; + def G11_10 : Rgg<10, "g11:10", [G10, G11]>, DwarfRegNum<[230]>; + def G13_12 : Rgg<12, "g13:12", [G12, G13]>, DwarfRegNum<[232]>; + def G15_14 : Rgg<14, "g15:14", [G14, G15]>, DwarfRegNum<[234]>; + def G17_16 : Rgg<16, "g17:16", [GPMUCNT4, GPMUCNT5]>, DwarfRegNum<[236]>; + def G19_18 : Rgg<18, "g19:18", [GPMUCNT6, GPMUCNT7]>, DwarfRegNum<[238]>; + def G21_20 : Rgg<20, "g21:20", [G20, G21]>, DwarfRegNum<[240]>; + def G23_22 : Rgg<22, "g23:22", [G22, G23]>, DwarfRegNum<[242]>; + def G25_24 : Rgg<24, "g25:24", [GPCYCLELO, GPCYCLEHI]>, DwarfRegNum<[244]>; + def G27_26 : Rgg<26, "g27:26", [GPMUCNT0, GPMUCNT1]>, DwarfRegNum<[246]>; + def G29_28 : Rgg<28, "g29:28", [GPMUCNT2, GPMUCNT3]>, DwarfRegNum<[248]>; + def G31_30 : Rgg<30, "g31:30", [G30, G31]>, DwarfRegNum<[250]>; + } + } // HVX types -def VecI1 - : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], - [v512i1, v512i1, v1024i1, v1024i1, v512i1]>; -def VecI8 - : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], - [v64i8, v64i8, v128i8, v128i8, v64i8]>; -def VecI16 - : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], - [v32i16, v32i16, v64i16, v64i16, v32i16]>; -def VecI32 - : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], - [v16i32, v16i32, v32i32, v32i32, v16i32]>; -def VecPI8 - : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], - [v128i8, v128i8, v256i8, v256i8, v128i8]>; -def VecPI16 - : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], - [v64i16, v64i16, v128i16, v128i16, v64i16]>; -def VecPI32 - : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], - [v32i32, v32i32, v64i32, v64i32, v32i32]>; -def VecQ8 - : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], - [v64i1, v64i1, v128i1, v128i1, v64i1]>; -def VecQ16 - : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], - [v32i1, v32i1, v64i1, v64i1, v32i1]>; -def VecQ32 - : ValueTypeByHwMode<[Hvx64, Hvx64old, Hvx128, Hvx128old, DefaultMode], - [v16i1, v16i1, v32i1, v32i1, v16i1]>; +def VecI1: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], + [v512i1, v1024i1, v512i1]>; +def VecI8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], + [v64i8, v128i8, v64i8]>; +def VecI16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], + [v32i16, v64i16, v32i16]>; +def VecI32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], + [v16i32, v32i32, v16i32]>; + +def VecPI8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], + [v128i8, v256i8, v128i8]>; +def VecPI16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], + [v64i16, v128i16, v64i16]>; +def VecPI32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], + [v32i32, v64i32, v32i32]>; + +def VecQ8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], + [v64i1, v128i1, v64i1]>; +def VecQ16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], + [v32i1, v64i1, v32i1]>; +def VecQ32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], + [v16i1, v32i1, v16i1]>; // HVX register classes @@ -242,7 +300,7 @@ def VecQ32 // FIXME: the register order should be defined in terms of the preferred // allocation order... // -def IntRegs : RegisterClass<"Hexagon", [i32, f32, v32i1, v4i8, v2i16], 32, +def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32, (add (sequence "R%u", 0, 9), (sequence "R%u", 12, 28), R10, R11, R29, R30, R31)>; @@ -254,8 +312,7 @@ def GeneralSubRegs : RegisterClass<"Hexagon", [i32], 32, def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32, (add R7, R6, R5, R4, R3, R2, R1, R0)> ; -def DoubleRegs : RegisterClass<"Hexagon", - [i64, f64, v64i1, v8i8, v4i16, v2i32], 64, +def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64, (add (sequence "D%u", 0, 4), (sequence "D%u", 6, 13), D5, D14, D15)>; def GeneralDoubleLow8Regs : RegisterClass<"Hexagon", [i64], 64, @@ -301,6 +358,25 @@ def CtrRegs64 : RegisterClass<"Hexagon", [i64], 64, (add C1_0, C3_2, C5_4, C7_6, C9_8, C11_10, CS, UPCYCLE, C17_16, PKTCOUNT, UTIMER)>; +let Size = 32, isAllocatable = 0 in +def GuestRegs : RegisterClass<"Hexagon", [i32], 32, + (add GELR, GSR, GOSP, + (sequence "G%u", 3, 15), + GPMUCNT4, GPMUCNT5, GPMUCNT6, GPMUCNT7, + G20, G21, G22, G23, + GPCYCLELO, GPCYCLEHI, GPMUCNT0, GPMUCNT1, + GPMUCNT2, GPMUCNT3, + G30, G31)>; + +let Size = 64, isAllocatable = 0 in +def GuestRegs64 : RegisterClass<"Hexagon", [i64], 64, + (add G1_0, G3_2, + G5_4, G7_6, G9_8, G11_10, G13_12, G15_14, + G17_16, G19_18, + G21_20, G23_22, + G25_24, G27_26, G29_28, + G31_30)>; + // These registers are new for v62 and onward. // The function RegisterMatchesArch() uses this list for validation. let isAllocatable = 0 in @@ -313,7 +389,6 @@ let Size = 32, isAllocatable = 0 in def V65Regs : RegisterClass<"Hexagon", [i32], 32, (add VTMP)>; - def HexagonCSR : CalleeSavedRegs<(add R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27)>; diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp index 3fe4cc73d2f3..c41f0d3c085c 100644 --- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp +++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -60,14 +60,14 @@ INITIALIZE_PASS(HexagonSplitConst32AndConst64, "split-const-for-sdata", "Hexagon Split Const32s and Const64s", false, false) bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { - const HexagonTargetObjectFile &TLOF = - *static_cast<const HexagonTargetObjectFile *>( - Fn.getTarget().getObjFileLowering()); - if (TLOF.isSmallDataEnabled()) - return true; + auto &HST = Fn.getSubtarget<HexagonSubtarget>(); + auto &HTM = static_cast<const HexagonTargetMachine&>(Fn.getTarget()); + auto &TLOF = *HTM.getObjFileLowering(); + if (HST.useSmallData() && TLOF.isSmallDataEnabled()) + return false; - const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo(); - const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); + const TargetInstrInfo *TII = HST.getInstrInfo(); + const TargetRegisterInfo *TRI = HST.getRegisterInfo(); // Loop over all of the basic blocks for (MachineBasicBlock &B : Fn) { @@ -109,7 +109,6 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// - FunctionPass *llvm::createHexagonSplitConst32AndConst64() { return new HexagonSplitConst32AndConst64(); } diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp index c9f5400018e8..e018785f24d8 100644 --- a/lib/Target/Hexagon/HexagonSplitDouble.cpp +++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugLoc.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -55,6 +56,8 @@ static cl::opt<int> MaxHSDR("max-hsdr", cl::Hidden, cl::init(-1), cl::desc("Maximum number of split partitions")); static cl::opt<bool> MemRefsFixed("hsdr-no-mem", cl::Hidden, cl::init(true), cl::desc("Do not split loads or stores")); + static cl::opt<bool> SplitAll("hsdr-split-all", cl::Hidden, cl::init(false), + cl::desc("Split all partitions")); namespace { @@ -62,9 +65,7 @@ namespace { public: static char ID; - HexagonSplitDoubleRegs() : MachineFunctionPass(ID) { - initializeHexagonSplitDoubleRegsPass(*PassRegistry::getPassRegistry()); - } + HexagonSplitDoubleRegs() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "Hexagon Split Double Registers"; @@ -97,6 +98,7 @@ namespace { bool isFixedInstr(const MachineInstr *MI) const; void partitionRegisters(UUSetMap &P2Rs); int32_t profit(const MachineInstr *MI) const; + int32_t profit(unsigned Reg) const; bool isProfitable(const USet &Part, LoopRegMap &IRM) const; void collectIndRegsForLoop(const MachineLoop *L, USet &Rs); @@ -161,7 +163,7 @@ bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const { if (MI->mayLoad() || MI->mayStore()) if (MemRefsFixed || isVolatileInstr(MI)) return true; - if (MI->isDebugValue()) + if (MI->isDebugInstr()) return false; unsigned Opc = MI->getOpcode(); @@ -244,7 +246,7 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) { if (FixedRegs[x]) continue; unsigned R = TargetRegisterInfo::index2VirtReg(x); - DEBUG(dbgs() << printReg(R, TRI) << " ~~"); + LLVM_DEBUG(dbgs() << printReg(R, TRI) << " ~~"); USet &Asc = AssocMap[R]; for (auto U = MRI->use_nodbg_begin(R), Z = MRI->use_nodbg_end(); U != Z; ++U) { @@ -267,13 +269,13 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) { unsigned u = TargetRegisterInfo::virtReg2Index(T); if (FixedRegs[u]) continue; - DEBUG(dbgs() << ' ' << printReg(T, TRI)); + LLVM_DEBUG(dbgs() << ' ' << printReg(T, TRI)); Asc.insert(T); // Make it symmetric. AssocMap[T].insert(R); } } - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); } UUMap R2P; @@ -304,15 +306,10 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) { P2Rs[I.second].insert(I.first); } -static inline int32_t profitImm(unsigned Lo, unsigned Hi) { +static inline int32_t profitImm(unsigned Imm) { int32_t P = 0; - bool LoZ1 = false, HiZ1 = false; - if (Lo == 0 || Lo == 0xFFFFFFFF) - P += 10, LoZ1 = true; - if (Hi == 0 || Hi == 0xFFFFFFFF) - P += 10, HiZ1 = true; - if (!LoZ1 && !HiZ1 && Lo == Hi) - P += 3; + if (Imm == 0 || Imm == 0xFFFFFFFF) + P += 10; return P; } @@ -342,21 +339,28 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const { uint64_t D = MI->getOperand(1).getImm(); unsigned Lo = D & 0xFFFFFFFFULL; unsigned Hi = D >> 32; - return profitImm(Lo, Hi); + return profitImm(Lo) + profitImm(Hi); } case Hexagon::A2_combineii: - case Hexagon::A4_combineii: - return profitImm(MI->getOperand(1).getImm(), - MI->getOperand(2).getImm()); + case Hexagon::A4_combineii: { + const MachineOperand &Op1 = MI->getOperand(1); + const MachineOperand &Op2 = MI->getOperand(2); + int32_t Prof1 = Op1.isImm() ? profitImm(Op1.getImm()) : 0; + int32_t Prof2 = Op2.isImm() ? profitImm(Op2.getImm()) : 0; + return Prof1 + Prof2; + } case Hexagon::A4_combineri: ImmX++; // Fall through into A4_combineir. LLVM_FALLTHROUGH; case Hexagon::A4_combineir: { ImmX++; - int64_t V = MI->getOperand(ImmX).getImm(); - if (V == 0 || V == -1) - return 10; + const MachineOperand &OpX = MI->getOperand(ImmX); + if (OpX.isImm()) { + int64_t V = OpX.getImm(); + if (V == 0 || V == -1) + return 10; + } // Fall through into A2_combinew. LLVM_FALLTHROUGH; } @@ -368,8 +372,11 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const { case Hexagon::A2_andp: case Hexagon::A2_orp: - case Hexagon::A2_xorp: - return 1; + case Hexagon::A2_xorp: { + unsigned Rs = MI->getOperand(1).getReg(); + unsigned Rt = MI->getOperand(2).getReg(); + return profit(Rs) + profit(Rt); + } case Hexagon::S2_asl_i_p_or: { unsigned S = MI->getOperand(3).getImm(); @@ -393,6 +400,25 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const { return 0; } +int32_t HexagonSplitDoubleRegs::profit(unsigned Reg) const { + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + + const MachineInstr *DefI = MRI->getVRegDef(Reg); + switch (DefI->getOpcode()) { + case Hexagon::A2_tfrpi: + case Hexagon::CONST64: + case Hexagon::A2_combineii: + case Hexagon::A4_combineii: + case Hexagon::A4_combineri: + case Hexagon::A4_combineir: + case Hexagon::A2_combinew: + return profit(DefI); + default: + break; + } + return 0; +} + bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM) const { unsigned FixedNum = 0, LoopPhiNum = 0; @@ -442,7 +468,9 @@ bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM) if (FixedNum > 0 && LoopPhiNum > 0) TotalP -= 20*LoopPhiNum; - DEBUG(dbgs() << "Partition profit: " << TotalP << '\n'); + LLVM_DEBUG(dbgs() << "Partition profit: " << TotalP << '\n'); + if (SplitAll) + return true; return TotalP > 0; } @@ -535,7 +563,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L, Rs.insert(CmpR1); Rs.insert(CmpR2); - DEBUG({ + LLVM_DEBUG({ dbgs() << "For loop at " << printMBBReference(*HB) << " ind regs: "; dump_partition(dbgs(), Rs, *TRI); dbgs() << '\n'; @@ -710,23 +738,21 @@ void HexagonSplitDoubleRegs::splitCombine(MachineInstr *MI, assert(F != PairMap.end()); const UUPair &P = F->second; - if (Op1.isImm()) { + if (!Op1.isReg()) { BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second) - .addImm(Op1.getImm()); - } else if (Op1.isReg()) { + .add(Op1); + } else { BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.second) .addReg(Op1.getReg(), getRegState(Op1), Op1.getSubReg()); - } else - llvm_unreachable("Unexpected operand"); + } - if (Op2.isImm()) { + if (!Op2.isReg()) { BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first) - .addImm(Op2.getImm()); - } else if (Op2.isReg()) { + .add(Op2); + } else { BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first) .addReg(Op2.getReg(), getRegState(Op2), Op2.getSubReg()); - } else - llvm_unreachable("Unexpected operand"); + } } void HexagonSplitDoubleRegs::splitExt(MachineInstr *MI, @@ -970,7 +996,7 @@ bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI, const UUPairMap &PairMap) { using namespace Hexagon; - DEBUG(dbgs() << "Splitting: " << *MI); + LLVM_DEBUG(dbgs() << "Splitting: " << *MI); bool Split = false; unsigned Opc = MI->getOpcode(); @@ -1104,8 +1130,8 @@ bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) { const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass; bool Changed = false; - DEBUG(dbgs() << "Splitting partition: "; dump_partition(dbgs(), Part, *TRI); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "Splitting partition: "; + dump_partition(dbgs(), Part, *TRI); dbgs() << '\n'); UUPairMap PairMap; @@ -1122,8 +1148,9 @@ bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) { unsigned LoR = MRI->createVirtualRegister(IntRC); unsigned HiR = MRI->createVirtualRegister(IntRC); - DEBUG(dbgs() << "Created mapping: " << printReg(DR, TRI) << " -> " - << printReg(HiR, TRI) << ':' << printReg(LoR, TRI) << '\n'); + LLVM_DEBUG(dbgs() << "Created mapping: " << printReg(DR, TRI) << " -> " + << printReg(HiR, TRI) << ':' << printReg(LoR, TRI) + << '\n'); PairMap.insert(std::make_pair(DR, UUPair(LoR, HiR))); } @@ -1160,12 +1187,12 @@ bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) { } bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "Splitting double registers in function: " - << MF.getName() << '\n'); - if (skipFunction(MF.getFunction())) return false; + LLVM_DEBUG(dbgs() << "Splitting double registers in function: " + << MF.getName() << '\n'); + auto &ST = MF.getSubtarget<HexagonSubtarget>(); TRI = ST.getRegisterInfo(); TII = ST.getInstrInfo(); @@ -1178,7 +1205,7 @@ bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) { collectIndRegs(IRM); partitionRegisters(P2Rs); - DEBUG({ + LLVM_DEBUG({ dbgs() << "Register partitioning: (partition #0 is fixed)\n"; for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) { dbgs() << '#' << I->first << " -> "; @@ -1196,7 +1223,8 @@ bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) { if (Limit >= 0 && Counter >= Limit) break; USet &Part = I->second; - DEBUG(dbgs() << "Calculating profit for partition #" << I->first << '\n'); + LLVM_DEBUG(dbgs() << "Calculating profit for partition #" << I->first + << '\n'); if (!isProfitable(Part, IRM)) continue; Counter++; diff --git a/lib/Target/Hexagon/HexagonStoreWidening.cpp b/lib/Target/Hexagon/HexagonStoreWidening.cpp index 300f6de33552..991af047387e 100644 --- a/lib/Target/Hexagon/HexagonStoreWidening.cpp +++ b/lib/Target/Hexagon/HexagonStoreWidening.cpp @@ -433,10 +433,11 @@ bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, const MCInstrDesc &StD = TII->get(WOpc); MachineOperand &MR = FirstSt->getOperand(0); int64_t Off = FirstSt->getOperand(1).getImm(); - MachineInstr *StI = BuildMI(*MF, DL, StD) - .addReg(MR.getReg(), getKillRegState(MR.isKill())) - .addImm(Off) - .addImm(Val); + MachineInstr *StI = + BuildMI(*MF, DL, StD) + .addReg(MR.getReg(), getKillRegState(MR.isKill()), MR.getSubReg()) + .addImm(Off) + .addImm(Val); StI->addMemOperand(*MF, NewM); NG.push_back(StI); } else { @@ -455,10 +456,11 @@ bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, const MCInstrDesc &StD = TII->get(WOpc); MachineOperand &MR = FirstSt->getOperand(0); int64_t Off = FirstSt->getOperand(1).getImm(); - MachineInstr *StI = BuildMI(*MF, DL, StD) - .addReg(MR.getReg(), getKillRegState(MR.isKill())) - .addImm(Off) - .addReg(VReg, RegState::Kill); + MachineInstr *StI = + BuildMI(*MF, DL, StD) + .addReg(MR.getReg(), getKillRegState(MR.isKill()), MR.getSubReg()) + .addImm(Off) + .addReg(VReg, RegState::Kill); StI->addMemOperand(*MF, NewM); NG.push_back(StI); } @@ -472,7 +474,7 @@ bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG, // from OG was (in the order in which they appeared in the basic block). // (The ordering in OG does not have to match the order in the basic block.) bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) { - DEBUG({ + LLVM_DEBUG({ dbgs() << "Replacing:\n"; for (auto I : OG) dbgs() << " " << *I; @@ -576,7 +578,7 @@ bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) { }; for (auto &G : SGs) { assert(G.size() > 1 && "Store group with fewer than 2 elements"); - std::sort(G.begin(), G.end(), Less); + llvm::sort(G.begin(), G.end(), Less); Changed |= processStoreGroup(G); } diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index 6f1f6c46a107..0686d6eb6118 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -15,13 +15,14 @@ #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" +#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "MCTargetDesc/HexagonMCTargetDesc.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/Support/CommandLine.h" @@ -38,17 +39,6 @@ using namespace llvm; #define GET_SUBTARGETINFO_TARGET_DESC #include "HexagonGenSubtargetInfo.inc" -static cl::opt<bool> EnableMemOps("enable-hexagon-memops", - cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true), - cl::desc("Generate V4 MEMOP in code generation for Hexagon target")); - -static cl::opt<bool> DisableMemOps("disable-hexagon-memops", - cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false), - cl::desc("Do not generate V4 MEMOP in code generation for Hexagon target")); - -static cl::opt<bool> EnableIEEERndNear("enable-hexagon-ieee-rnd-near", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Generate non-chopped conversion from fp to int.")); static cl::opt<bool> EnableBSBSched("enable-bsb-sched", cl::Hidden, cl::ZeroOrMore, cl::init(true)); @@ -60,9 +50,6 @@ static cl::opt<bool> EnableDotCurSched("enable-cur-sched", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("Enable the scheduler to generate .cur")); -static cl::opt<bool> EnableVecFrwdSched("enable-evec-frwd-sched", - cl::Hidden, cl::ZeroOrMore, cl::init(true)); - static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Hexagon MI Scheduling")); @@ -105,6 +92,7 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, HexagonSubtarget & HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { static std::map<StringRef, Hexagon::ArchEnum> CpuTable{ + {"generic", Hexagon::ArchEnum::V60}, {"hexagonv4", Hexagon::ArchEnum::V4}, {"hexagonv5", Hexagon::ArchEnum::V5}, {"hexagonv55", Hexagon::ArchEnum::V55}, @@ -123,9 +111,7 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { UseHVX64BOps = false; UseLongCalls = false; - UseMemOps = DisableMemOps ? false : EnableMemOps; - ModeIEEERndNear = EnableIEEERndNear; - UseBSBScheduling = hasV60TOps() && EnableBSBSched; + UseBSBScheduling = hasV60Ops() && EnableBSBSched; ParseSubtargetFeatures(CPUString, FS); @@ -204,11 +190,14 @@ bool HexagonSubtarget::CallMutation::shouldTFRICallBind( Type == HexagonII::TypeALU64 || Type == HexagonII::TypeM; } -void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAG) { +void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAGInstrs) { + ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs); SUnit* LastSequentialCall = nullptr; - unsigned VRegHoldingRet = 0; - unsigned RetRegister; - SUnit* LastUseOfRet = nullptr; + // Map from virtual register to physical register from the copy. + DenseMap<unsigned, unsigned> VRegHoldingReg; + // Map from the physical register to the instruction that uses virtual + // register. This is used to create the barrier edge. + DenseMap<unsigned, SUnit *> LastVRegUse; auto &TRI = *DAG->MF.getSubtarget().getRegisterInfo(); auto &HII = *DAG->MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); @@ -220,13 +209,15 @@ void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAG) { LastSequentialCall = &DAG->SUnits[su]; // Look for a compare that defines a predicate. else if (DAG->SUnits[su].getInstr()->isCompare() && LastSequentialCall) - DAG->SUnits[su].addPred(SDep(LastSequentialCall, SDep::Barrier)); + DAG->addEdge(&DAG->SUnits[su], SDep(LastSequentialCall, SDep::Barrier)); // Look for call and tfri* instructions. else if (SchedPredsCloser && LastSequentialCall && su > 1 && su < e-1 && shouldTFRICallBind(HII, DAG->SUnits[su], DAG->SUnits[su+1])) - DAG->SUnits[su].addPred(SDep(&DAG->SUnits[su-1], SDep::Barrier)); - // Prevent redundant register copies between two calls, which are caused by - // both the return value and the argument for the next call being in %r0. + DAG->addEdge(&DAG->SUnits[su], SDep(&DAG->SUnits[su-1], SDep::Barrier)); + // Prevent redundant register copies due to reads and writes of physical + // registers. The original motivation for this was the code generated + // between two calls, which are caused both the return value and the + // argument for the next call being in %r0. // Example: // 1: <call1> // 2: %vreg = COPY %r0 @@ -235,21 +226,37 @@ void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAG) { // 5: <call2> // The scheduler would often swap 3 and 4, so an additional register is // needed. This code inserts a Barrier dependence between 3 & 4 to prevent - // this. The same applies for %d0 and %v0/%w0, which are also handled. + // this. + // The code below checks for all the physical registers, not just R0/D0/V0. else if (SchedRetvalOptimization) { const MachineInstr *MI = DAG->SUnits[su].getInstr(); - if (MI->isCopy() && (MI->readsRegister(Hexagon::R0, &TRI) || - MI->readsRegister(Hexagon::V0, &TRI))) { - // %vreg = COPY %r0 - VRegHoldingRet = MI->getOperand(0).getReg(); - RetRegister = MI->getOperand(1).getReg(); - LastUseOfRet = nullptr; - } else if (VRegHoldingRet && MI->readsVirtualRegister(VRegHoldingRet)) - // <use of %X> - LastUseOfRet = &DAG->SUnits[su]; - else if (LastUseOfRet && MI->definesRegister(RetRegister, &TRI)) - // %r0 = ... - DAG->SUnits[su].addPred(SDep(LastUseOfRet, SDep::Barrier)); + if (MI->isCopy() && + TargetRegisterInfo::isPhysicalRegister(MI->getOperand(1).getReg())) { + // %vregX = COPY %r0 + VRegHoldingReg[MI->getOperand(0).getReg()] = MI->getOperand(1).getReg(); + LastVRegUse.erase(MI->getOperand(1).getReg()); + } else { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isUse() && !MI->isCopy() && + VRegHoldingReg.count(MO.getReg())) { + // <use of %vregX> + LastVRegUse[VRegHoldingReg[MO.getReg()]] = &DAG->SUnits[su]; + } else if (MO.isDef() && + TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + for (MCRegAliasIterator AI(MO.getReg(), &TRI, true); AI.isValid(); + ++AI) { + if (LastVRegUse.count(*AI) && + LastVRegUse[*AI] != &DAG->SUnits[su]) + // %r0 = ... + DAG->addEdge(&DAG->SUnits[su], SDep(LastVRegUse[*AI], SDep::Barrier)); + LastVRegUse.erase(*AI); + } + } + } + } } } } @@ -300,7 +307,7 @@ void HexagonSubtarget::BankConflictMutation::apply(ScheduleDAGInstrs *DAG) { } } -/// \brief Enable use of alias analysis during code generation (during MI +/// Enable use of alias analysis during code generation (during MI /// scheduling, DAGCombine, etc.). bool HexagonSubtarget::useAA() const { if (OptLevel != CodeGenOpt::None) @@ -308,7 +315,7 @@ bool HexagonSubtarget::useAA() const { return false; } -/// \brief Perform target specific adjustments to the latency of a schedule +/// Perform target specific adjustments to the latency of a schedule /// dependency. void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, SDep &Dep) const { @@ -328,25 +335,30 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst, return; } - if (!hasV60TOps()) + if (!hasV60Ops()) return; - // If it's a REG_SEQUENCE, use its destination instruction to determine + // Set the latency for a copy to zero since we hope that is will get removed. + if (DstInst->isCopy()) + Dep.setLatency(0); + + // If it's a REG_SEQUENCE/COPY, use its destination instruction to determine // the correct latency. - if (DstInst->isRegSequence() && Dst->NumSuccs == 1) { - unsigned RSeqReg = DstInst->getOperand(0).getReg(); - MachineInstr *RSeqDst = Dst->Succs[0].getSUnit()->getInstr(); + if ((DstInst->isRegSequence() || DstInst->isCopy()) && Dst->NumSuccs == 1) { + unsigned DReg = DstInst->getOperand(0).getReg(); + MachineInstr *DDst = Dst->Succs[0].getSUnit()->getInstr(); unsigned UseIdx = -1; - for (unsigned OpNum = 0; OpNum < RSeqDst->getNumOperands(); OpNum++) { - const MachineOperand &MO = RSeqDst->getOperand(OpNum); - if (MO.isReg() && MO.getReg() && MO.isUse() && MO.getReg() == RSeqReg) { + for (unsigned OpNum = 0; OpNum < DDst->getNumOperands(); OpNum++) { + const MachineOperand &MO = DDst->getOperand(OpNum); + if (MO.isReg() && MO.getReg() && MO.isUse() && MO.getReg() == DReg) { UseIdx = OpNum; break; } } - unsigned RSeqLatency = (InstrInfo.getOperandLatency(&InstrItins, *SrcInst, - 0, *RSeqDst, UseIdx)); - Dep.setLatency(RSeqLatency); + int DLatency = (InstrInfo.getOperandLatency(&InstrItins, *SrcInst, + 0, *DDst, UseIdx)); + DLatency = std::max(DLatency, 0); + Dep.setLatency((unsigned)DLatency); } // Try to schedule uses near definitions to generate .cur. @@ -394,7 +406,7 @@ void HexagonSubtarget::updateLatency(MachineInstr &SrcInst, return; } - if (!hasV60TOps()) + if (!hasV60Ops()) return; auto &QII = static_cast<const HexagonInstrInfo&>(*getInstrInfo()); @@ -418,6 +430,7 @@ void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const { } assert(DefIdx >= 0 && "Def Reg not found in Src MI"); MachineInstr *DstI = Dst->getInstr(); + SDep T = I; for (unsigned OpNum = 0; OpNum < DstI->getNumOperands(); OpNum++) { const MachineOperand &MO = DstI->getOperand(OpNum); if (MO.isReg() && MO.isUse() && MO.getReg() == DepR) { @@ -426,8 +439,7 @@ void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const { // For some instructions (ex: COPY), we might end up with < 0 latency // as they don't have any Itinerary class associated with them. - if (Latency <= 0) - Latency = 1; + Latency = std::max(Latency, 0); I.setLatency(Latency); updateLatency(*SrcI, *DstI, I); @@ -435,11 +447,10 @@ void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const { } // Update the latency of opposite edge too. - for (auto &J : Dst->Preds) { - if (J.getSUnit() != Src) - continue; - J.setLatency(I.getLatency()); - } + T.setSUnit(Src); + auto F = std::find(Dst->Preds.begin(), Dst->Preds.end(), T); + assert(F != Dst->Preds.end()); + F->setLatency(I.getLatency()); } } @@ -447,7 +458,7 @@ void HexagonSubtarget::restoreLatency(SUnit *Src, SUnit *Dst) const { void HexagonSubtarget::changeLatency(SUnit *Src, SUnit *Dst, unsigned Lat) const { for (auto &I : Src->Succs) { - if (I.getSUnit() != Dst) + if (!I.isAssignedRegDep() || I.getSUnit() != Dst) continue; SDep T = I; I.setLatency(Lat); @@ -456,7 +467,7 @@ void HexagonSubtarget::changeLatency(SUnit *Src, SUnit *Dst, unsigned Lat) T.setSUnit(Src); auto F = std::find(Dst->Preds.begin(), Dst->Preds.end(), T); assert(F != Dst->Preds.end()); - F->setLatency(I.getLatency()); + F->setLatency(Lat); } } @@ -519,13 +530,13 @@ bool HexagonSubtarget::isBestZeroLatency(SUnit *Src, SUnit *Dst, // Reassign the latency for the previous bests, which requires setting // the dependence edge in both directions. if (SrcBest != nullptr) { - if (!hasV60TOps()) + if (!hasV60Ops()) changeLatency(SrcBest, Dst, 1); else restoreLatency(SrcBest, Dst); } if (DstBest != nullptr) { - if (!hasV60TOps()) + if (!hasV60Ops()) changeLatency(Src, DstBest, 1); else restoreLatency(Src, DstBest); diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h index af93f20d97fc..dc8d173a5057 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.h +++ b/lib/Target/Hexagon/HexagonSubtarget.h @@ -32,9 +32,6 @@ #define GET_SUBTARGETINFO_HEADER #include "HexagonGenSubtargetInfo.inc" -#define Hexagon_SMALL_DATA_THRESHOLD 8 -#define Hexagon_SLOTS 4 - namespace llvm { class MachineInstr; @@ -46,12 +43,20 @@ class Triple; class HexagonSubtarget : public HexagonGenSubtargetInfo { virtual void anchor(); - bool UseMemOps, UseHVX64BOps, UseHVX128BOps; - bool UseLongCalls; - bool ModeIEEERndNear; + bool UseHVX64BOps = false; + bool UseHVX128BOps = false; + + bool UseLongCalls = false; + bool UseMemops = false; + bool UsePackets = false; + bool UseNewValueJumps = false; + bool UseNewValueStores = false; + bool UseSmallData = false; bool HasMemNoShuf = false; bool EnableDuplex = false; + bool ReservedR19 = false; + public: Hexagon::ArchEnum HexagonArchVersion; Hexagon::ArchEnum HexagonHVXVersion = Hexagon::ArchEnum::V4; @@ -115,44 +120,50 @@ public: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - bool useMemOps() const { return UseMemOps; } - bool hasV5TOps() const { + bool hasV5Ops() const { return getHexagonArchVersion() >= Hexagon::ArchEnum::V5; } - bool hasV5TOpsOnly() const { + bool hasV5OpsOnly() const { return getHexagonArchVersion() == Hexagon::ArchEnum::V5; } - bool hasV55TOps() const { + bool hasV55Ops() const { return getHexagonArchVersion() >= Hexagon::ArchEnum::V55; } - bool hasV55TOpsOnly() const { + bool hasV55OpsOnly() const { return getHexagonArchVersion() == Hexagon::ArchEnum::V55; } - bool hasV60TOps() const { + bool hasV60Ops() const { return getHexagonArchVersion() >= Hexagon::ArchEnum::V60; } - bool hasV60TOpsOnly() const { + bool hasV60OpsOnly() const { return getHexagonArchVersion() == Hexagon::ArchEnum::V60; } - bool hasV62TOps() const { + bool hasV62Ops() const { return getHexagonArchVersion() >= Hexagon::ArchEnum::V62; } - bool hasV62TOpsOnly() const { + bool hasV62OpsOnly() const { return getHexagonArchVersion() == Hexagon::ArchEnum::V62; } - bool hasV65TOps() const { + bool hasV65Ops() const { return getHexagonArchVersion() >= Hexagon::ArchEnum::V65; } - bool hasV65TOpsOnly() const { + bool hasV65OpsOnly() const { return getHexagonArchVersion() == Hexagon::ArchEnum::V65; } - bool modeIEEERndNear() const { return ModeIEEERndNear; } + bool useLongCalls() const { return UseLongCalls; } + bool useMemops() const { return UseMemops; } + bool usePackets() const { return UsePackets; } + bool useNewValueJumps() const { return UseNewValueJumps; } + bool useNewValueStores() const { return UseNewValueStores; } + bool useSmallData() const { return UseSmallData; } + bool useHVXOps() const { return HexagonHVXVersion > Hexagon::ArchEnum::V4; } bool useHVX128BOps() const { return useHVXOps() && UseHVX128BOps; } bool useHVX64BOps() const { return useHVXOps() && UseHVX64BOps; } + bool hasMemNoShuf() const { return HasMemNoShuf; } - bool useLongCalls() const { return UseLongCalls; } + bool hasReservedR19() const { return ReservedR19; } bool usePredicatedCalls() const; bool useBSBScheduling() const { return UseBSBScheduling; } @@ -170,11 +181,6 @@ public: const std::string &getCPUString () const { return CPUString; } - // Threshold for small data section - unsigned getSmallDataThreshold() const { - return Hexagon_SMALL_DATA_THRESHOLD; - } - const Hexagon::ArchEnum &getHexagonArchVersion() const { return HexagonArchVersion; } @@ -187,11 +193,11 @@ public: std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const override; - /// \brief Enable use of alias analysis during code generation (during MI + /// Enable use of alias analysis during code generation (during MI /// scheduling, DAGCombine, etc.). bool useAA() const override; - /// \brief Perform target specific adjustments to the latency of a schedule + /// Perform target specific adjustments to the latency of a schedule /// dependency. void adjustSchedDependency(SUnit *def, SUnit *use, SDep& dep) const override; @@ -238,6 +244,12 @@ public: return llvm::any_of(ElemTypes, [ElemTy] (MVT T) { return ElemTy == T; }); } + unsigned getTypeAlignment(MVT Ty) const { + if (isHVXVectorType(Ty, true)) + return getVectorLength(); + return Ty.getSizeInBits() / 8; + } + unsigned getL1CacheLineSize() const; unsigned getL1PrefetchDistance() const; diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 363b703fef28..2c75e9139ad7 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -94,9 +94,8 @@ static cl::opt<bool> EnableVectorPrint("enable-hexagon-vector-print", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable Hexagon Vector print instr pass")); -static cl::opt<bool> EnableTrapUnreachable("hexagon-trap-unreachable", - cl::Hidden, cl::ZeroOrMore, cl::init(false), - cl::desc("Enable generating trap for unreachable")); +static cl::opt<bool> EnableVExtractOpt("hexagon-opt-vextract", cl::Hidden, + cl::ZeroOrMore, cl::init(true), cl::desc("Enable vextract optimization")); /// HexagonTargetMachineModule - Note that this is used on hosts that /// cannot link in a library unless there are references into the @@ -122,7 +121,9 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", namespace llvm { extern char &HexagonExpandCondsetsID; + void initializeHexagonBitSimplifyPass(PassRegistry&); void initializeHexagonConstExtendersPass(PassRegistry&); + void initializeHexagonConstPropagationPass(PassRegistry&); void initializeHexagonEarlyIfConversionPass(PassRegistry&); void initializeHexagonExpandCondsetsPass(PassRegistry&); void initializeHexagonGenMuxPass(PassRegistry&); @@ -133,6 +134,8 @@ namespace llvm { void initializeHexagonOptAddrModePass(PassRegistry&); void initializeHexagonPacketizerPass(PassRegistry&); void initializeHexagonRDFOptPass(PassRegistry&); + void initializeHexagonSplitDoubleRegsPass(PassRegistry&); + void initializeHexagonVExtractPass(PassRegistry&); Pass *createHexagonLoopIdiomPass(); Pass *createHexagonVectorLoopCarriedReusePass(); @@ -165,6 +168,7 @@ namespace llvm { FunctionPass *createHexagonSplitDoubleRegs(); FunctionPass *createHexagonStoreWidening(); FunctionPass *createHexagonVectorPrint(); + FunctionPass *createHexagonVExtract(); } // end namespace llvm; static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) { @@ -184,7 +188,9 @@ extern "C" void LLVMInitializeHexagonTarget() { RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget()); PassRegistry &PR = *PassRegistry::getPassRegistry(); + initializeHexagonBitSimplifyPass(PR); initializeHexagonConstExtendersPass(PR); + initializeHexagonConstPropagationPass(PR); initializeHexagonEarlyIfConversionPass(PR); initializeHexagonGenMuxPass(PR); initializeHexagonHardwareLoopsPass(PR); @@ -194,6 +200,8 @@ extern "C" void LLVMInitializeHexagonTarget() { initializeHexagonOptAddrModePass(PR); initializeHexagonPacketizerPass(PR); initializeHexagonRDFOptPass(PR); + initializeHexagonSplitDoubleRegsPass(PR); + initializeHexagonVExtractPass(PR); } HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, @@ -213,8 +221,6 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, TT, CPU, FS, Options, getEffectiveRelocModel(RM), getEffectiveCodeModel(CM), (HexagonNoOpt ? CodeGenOpt::None : OL)), TLOF(make_unique<HexagonTargetObjectFile>()) { - if (EnableTrapUnreachable) - this->Options.TrapUnreachable = true; initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry()); initAsmInfo(); } @@ -299,6 +305,11 @@ void HexagonPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); bool NoOpt = (getOptLevel() == CodeGenOpt::None); + if (!NoOpt) { + addPass(createConstantPropagationPass()); + addPass(createDeadCodeEliminationPass()); + } + addPass(createAtomicExpandPass()); if (!NoOpt) { if (EnableLoopPrefetch) @@ -321,6 +332,8 @@ bool HexagonPassConfig::addInstSelector() { addPass(createHexagonISelDag(TM, getOptLevel())); if (!NoOpt) { + if (EnableVExtractOpt) + addPass(createHexagonVExtract()); // Create logical operations on predicate registers. if (EnableGenPred) addPass(createHexagonGenPredicate()); diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index ea86c9c42f47..e771f383dffa 100644 --- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -74,7 +74,7 @@ static cl::opt<bool> if (TraceGVPlacement) { \ TRACE_TO(errs(), X); \ } else { \ - DEBUG(TRACE_TO(dbgs(), X)); \ + LLVM_DEBUG(TRACE_TO(dbgs(), X)); \ } \ } while (false) #endif @@ -200,11 +200,11 @@ MCSection *HexagonTargetObjectFile::getExplicitSectionGlobal( bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO, const TargetMachine &TM) const { // Only global variables, not functions. - DEBUG(dbgs() << "Checking if value is in small-data, -G" - << SmallDataThreshold << ": \"" << GO->getName() << "\": "); + LLVM_DEBUG(dbgs() << "Checking if value is in small-data, -G" + << SmallDataThreshold << ": \"" << GO->getName() << "\": "); const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GO); if (!GVar) { - DEBUG(dbgs() << "no, not a global variable\n"); + LLVM_DEBUG(dbgs() << "no, not a global variable\n"); return false; } @@ -213,19 +213,19 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO, // small data or not. This is how we can support mixing -G0/-G8 in LTO. if (GVar->hasSection()) { bool IsSmall = isSmallDataSection(GVar->getSection()); - DEBUG(dbgs() << (IsSmall ? "yes" : "no") << ", has section: " - << GVar->getSection() << '\n'); + LLVM_DEBUG(dbgs() << (IsSmall ? "yes" : "no") + << ", has section: " << GVar->getSection() << '\n'); return IsSmall; } if (GVar->isConstant()) { - DEBUG(dbgs() << "no, is a constant\n"); + LLVM_DEBUG(dbgs() << "no, is a constant\n"); return false; } bool IsLocal = GVar->hasLocalLinkage(); if (!StaticsInSData && IsLocal) { - DEBUG(dbgs() << "no, is static\n"); + LLVM_DEBUG(dbgs() << "no, is static\n"); return false; } @@ -234,7 +234,7 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO, GType = PT->getElementType(); if (isa<ArrayType>(GType)) { - DEBUG(dbgs() << "no, is an array\n"); + LLVM_DEBUG(dbgs() << "no, is an array\n"); return false; } @@ -244,22 +244,22 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO, // these objects end up in the sdata, the references will still be valid. if (StructType *ST = dyn_cast<StructType>(GType)) { if (ST->isOpaque()) { - DEBUG(dbgs() << "no, has opaque type\n"); + LLVM_DEBUG(dbgs() << "no, has opaque type\n"); return false; } } unsigned Size = GVar->getParent()->getDataLayout().getTypeAllocSize(GType); if (Size == 0) { - DEBUG(dbgs() << "no, has size 0\n"); + LLVM_DEBUG(dbgs() << "no, has size 0\n"); return false; } if (Size > SmallDataThreshold) { - DEBUG(dbgs() << "no, size exceeds sdata threshold: " << Size << '\n'); + LLVM_DEBUG(dbgs() << "no, size exceeds sdata threshold: " << Size << '\n'); return false; } - DEBUG(dbgs() << "yes\n"); + LLVM_DEBUG(dbgs() << "yes\n"); return true; } diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index d638503990ad..a496a17788d5 100644 --- a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -16,23 +16,59 @@ #include "HexagonTargetTransformInfo.h" #include "HexagonSubtarget.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/User.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" using namespace llvm; #define DEBUG_TYPE "hexagontti" +static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false), + cl::Hidden, cl::desc("Enable loop vectorizer for HVX")); + static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables", cl::init(true), cl::Hidden, cl::desc("Control lookup table emission on Hexagon target")); +// Constant "cost factor" to make floating point operations more expensive +// in terms of vectorization cost. This isn't the best way, but it should +// do. Ultimately, the cost should use cycles. +static const unsigned FloatFactor = 4; + +bool HexagonTTIImpl::useHVX() const { + return ST.useHVXOps() && HexagonAutoHVX; +} + +bool HexagonTTIImpl::isTypeForHVX(Type *VecTy) const { + assert(VecTy->isVectorTy()); + // Avoid types like <2 x i32*>. + if (!cast<VectorType>(VecTy)->getElementType()->isIntegerTy()) + return false; + EVT VecVT = EVT::getEVT(VecTy); + if (!VecVT.isSimple() || VecVT.getSizeInBits() <= 64) + return false; + if (ST.isHVXVectorType(VecVT.getSimpleVT())) + return true; + auto Action = TLI.getPreferredVectorAction(VecVT); + return Action == TargetLoweringBase::TypeWidenVector; +} + +unsigned HexagonTTIImpl::getTypeNumElements(Type *Ty) const { + if (Ty->isVectorTy()) + return Ty->getVectorNumElements(); + assert((Ty->isIntegerTy() || Ty->isFloatingPointTy()) && + "Expecting scalar type"); + return 1; +} + TargetTransformInfo::PopcntSupportKind HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const { - // Return Fast Hardware support as every input < 64 bits will be promoted + // Return fast hardware support as every input < 64 bits will be promoted // to 64 bits. return TargetTransformInfo::PSK_FastHardware; } @@ -41,37 +77,223 @@ HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const { void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) { UP.Runtime = UP.Partial = true; + // Only try to peel innermost loops with small runtime trip counts. + if (L && L->empty() && canPeel(L) && + SE.getSmallConstantTripCount(L) == 0 && + SE.getSmallConstantMaxTripCount(L) > 0 && + SE.getSmallConstantMaxTripCount(L) <= 5) { + UP.PeelCount = 2; + } +} + +bool HexagonTTIImpl::shouldFavorPostInc() const { + return true; +} + +/// --- Vector TTI begin --- + +unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const { + if (Vector) + return useHVX() ? 32 : 0; + return 32; +} + +unsigned HexagonTTIImpl::getMaxInterleaveFactor(unsigned VF) { + return useHVX() ? 2 : 0; +} + +unsigned HexagonTTIImpl::getRegisterBitWidth(bool Vector) const { + return Vector ? getMinVectorRegisterBitWidth() : 32; +} + +unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth() const { + return useHVX() ? ST.getVectorLength()*8 : 0; +} + +unsigned HexagonTTIImpl::getMinimumVF(unsigned ElemWidth) const { + return (8 * ST.getVectorLength()) / ElemWidth; +} + +unsigned HexagonTTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, + bool Extract) { + return BaseT::getScalarizationOverhead(Ty, Insert, Extract); +} + +unsigned HexagonTTIImpl::getOperandsScalarizationOverhead( + ArrayRef<const Value*> Args, unsigned VF) { + return BaseT::getOperandsScalarizationOverhead(Args, VF); +} + +unsigned HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy, + ArrayRef<Type*> Tys) { + return BaseT::getCallInstrCost(F, RetTy, Tys); +} + +unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) { + return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF); +} + +unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef<Type*> Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed) { + if (ID == Intrinsic::bswap) { + std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, RetTy); + return LT.first + 2; + } + return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF, + ScalarizationCostPassed); +} + +unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp, + ScalarEvolution *SE, const SCEV *S) { + return 0; +} + +unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, unsigned AddressSpace, const Instruction *I) { + assert(Opcode == Instruction::Load || Opcode == Instruction::Store); + if (Opcode == Instruction::Store) + return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); + + if (Src->isVectorTy()) { + VectorType *VecTy = cast<VectorType>(Src); + unsigned VecWidth = VecTy->getBitWidth(); + if (useHVX() && isTypeForHVX(VecTy)) { + unsigned RegWidth = getRegisterBitWidth(true); + Alignment = std::min(Alignment, RegWidth/8); + // Cost of HVX loads. + if (VecWidth % RegWidth == 0) + return VecWidth / RegWidth; + // Cost of constructing HVX vector from scalar loads. + unsigned AlignWidth = 8 * std::max(1u, Alignment); + unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth; + return 3*NumLoads; + } + + // Non-HVX vectors. + // Add extra cost for floating point types. + unsigned Cost = VecTy->getElementType()->isFloatingPointTy() ? FloatFactor + : 1; + Alignment = std::min(Alignment, 8u); + unsigned AlignWidth = 8 * std::max(1u, Alignment); + unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth; + if (Alignment == 4 || Alignment == 8) + return Cost * NumLoads; + // Loads of less than 32 bits will need extra inserts to compose a vector. + unsigned LogA = Log2_32(Alignment); + return (3 - LogA) * Cost * NumLoads; + } + + return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); +} + +unsigned HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, + Type *Src, unsigned Alignment, unsigned AddressSpace) { + return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); +} + +unsigned HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) { + return 1; +} + +unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, + Value *Ptr, bool VariableMask, unsigned Alignment) { + return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, + Alignment); +} + +unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, + Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, + unsigned Alignment, unsigned AddressSpace) { + return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); +} + +unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy, const Instruction *I) { + if (ValTy->isVectorTy()) { + std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, ValTy); + if (Opcode == Instruction::FCmp) + return LT.first + FloatFactor * getTypeNumElements(ValTy); + } + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); } -unsigned HexagonTTIImpl::getNumberOfRegisters(bool vector) const { - return vector ? 0 : 32; +unsigned HexagonTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, + TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value*> Args) { + if (Ty->isVectorTy()) { + std::pair<int, MVT> LT = TLI.getTypeLegalizationCost(DL, Ty); + if (LT.second.isFloatingPoint()) + return LT.first + FloatFactor * getTypeNumElements(Ty); + } + return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo, Args); } +unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy, + Type *SrcTy, const Instruction *I) { + if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) { + unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0; + unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0; + + std::pair<int, MVT> SrcLT = TLI.getTypeLegalizationCost(DL, SrcTy); + std::pair<int, MVT> DstLT = TLI.getTypeLegalizationCost(DL, DstTy); + return std::max(SrcLT.first, DstLT.first) + FloatFactor * (SrcN + DstN); + } + return 1; +} + +unsigned HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) { + Type *ElemTy = Val->isVectorTy() ? cast<VectorType>(Val)->getElementType() + : Val; + if (Opcode == Instruction::InsertElement) { + // Need two rotations for non-zero index. + unsigned Cost = (Index != 0) ? 2 : 0; + if (ElemTy->isIntegerTy(32)) + return Cost; + // If it's not a 32-bit value, there will need to be an extract. + return Cost + getVectorInstrCost(Instruction::ExtractElement, Val, Index); + } + + if (Opcode == Instruction::ExtractElement) + return 2; + + return 1; +} + +/// --- Vector TTI end --- + unsigned HexagonTTIImpl::getPrefetchDistance() const { - return getST()->getL1PrefetchDistance(); + return ST.getL1PrefetchDistance(); } unsigned HexagonTTIImpl::getCacheLineSize() const { - return getST()->getL1CacheLineSize(); + return ST.getL1CacheLineSize(); } int HexagonTTIImpl::getUserCost(const User *U, ArrayRef<const Value *> Operands) { - auto isCastFoldedIntoLoad = [](const CastInst *CI) -> bool { + auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool { if (!CI->isIntegerCast()) return false; + // Only extensions from an integer type shorter than 32-bit to i32 + // can be folded into the load. + const DataLayout &DL = getDataLayout(); + unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy()); + unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy()); + if (DBW != 32 || SBW >= DBW) + return false; + const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0)); // Technically, this code could allow multiple uses of the load, and // check if all the uses are the same extension operation, but this // should be sufficient for most cases. - if (!LI || !LI->hasOneUse()) - return false; - - // Only extensions from an integer type shorter than 32-bit to i32 - // can be folded into the load. - unsigned SBW = CI->getSrcTy()->getIntegerBitWidth(); - unsigned DBW = CI->getDestTy()->getIntegerBitWidth(); - return DBW == 32 && (SBW < DBW); + return LI && LI->hasOneUse(); }; if (const CastInst *CI = dyn_cast<const CastInst>(U)) @@ -81,5 +303,5 @@ int HexagonTTIImpl::getUserCost(const User *U, } bool HexagonTTIImpl::shouldBuildLookupTables() const { - return EmitLookupTables; + return EmitLookupTables; } diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/lib/Target/Hexagon/HexagonTargetTransformInfo.h index d2cd05012afa..a232f99fc407 100644 --- a/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -37,16 +37,24 @@ class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> { friend BaseT; - const HexagonSubtarget *ST; - const HexagonTargetLowering *TLI; + const HexagonSubtarget &ST; + const HexagonTargetLowering &TLI; - const HexagonSubtarget *getST() const { return ST; } - const HexagonTargetLowering *getTLI() const { return TLI; } + const HexagonSubtarget *getST() const { return &ST; } + const HexagonTargetLowering *getTLI() const { return &TLI; } + + bool useHVX() const; + bool isTypeForHVX(Type *VecTy) const; + + // Returns the number of vector elements of Ty, if Ty is a vector type, + // or 1 if Ty is a scalar type. It is incorrect to call this function + // with any other type. + unsigned getTypeNumElements(Type *Ty) const; public: explicit HexagonTTIImpl(const HexagonTargetMachine *TM, const Function &F) - : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), - TLI(ST->getTargetLowering()) {} + : BaseT(TM, F.getParent()->getDataLayout()), + ST(*TM->getSubtargetImpl(F)), TLI(*ST.getTargetLowering()) {} /// \name Scalar TTI Implementations /// @{ @@ -57,6 +65,9 @@ public: void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + /// Bias LSR towards creating post-increment opportunities. + bool shouldFavorPostInc() const; + // L1 cache prefetch. unsigned getPrefetchDistance() const; unsigned getCacheLineSize() const; @@ -67,6 +78,64 @@ public: /// @{ unsigned getNumberOfRegisters(bool vector) const; + unsigned getMaxInterleaveFactor(unsigned VF); + unsigned getRegisterBitWidth(bool Vector) const; + unsigned getMinVectorRegisterBitWidth() const; + unsigned getMinimumVF(unsigned ElemWidth) const; + + bool shouldMaximizeVectorBandwidth(bool OptSize) const { + return true; + } + bool supportsEfficientVectorElementLoadStore() { + return false; + } + bool hasBranchDivergence() { + return false; + } + bool enableAggressiveInterleaving(bool LoopHasReductions) { + return false; + } + bool prefersVectorizedAddressing() { + return false; + } + + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract); + unsigned getOperandsScalarizationOverhead(ArrayRef<const Value*> Args, + unsigned VF); + unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type*> Tys); + unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF); + unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef<Type*> Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed = UINT_MAX); + unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE, + const SCEV *S); + unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace, const Instruction *I = nullptr); + unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace); + unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp); + unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, + bool VariableMask, unsigned Alignment); + unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, + unsigned Factor, ArrayRef<unsigned> Indices, unsigned Alignment, + unsigned AddressSpace); + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I); + unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, + ArrayRef<const Value *> Args = ArrayRef<const Value *>()); + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I = nullptr); + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); + + unsigned getCFInstrCost(unsigned Opcode) { + return 1; + } /// @} @@ -77,5 +146,4 @@ public: }; } // end namespace llvm - #endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H diff --git a/lib/Target/Hexagon/HexagonVExtract.cpp b/lib/Target/Hexagon/HexagonVExtract.cpp new file mode 100644 index 000000000000..929ac2bd0d93 --- /dev/null +++ b/lib/Target/Hexagon/HexagonVExtract.cpp @@ -0,0 +1,166 @@ +//===- HexagonVExtract.cpp ------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass will replace multiple occurrences of V6_extractw from the same +// vector register with a combination of a vector store and scalar loads. +//===----------------------------------------------------------------------===// + +#include "Hexagon.h" +#include "HexagonInstrInfo.h" +#include "HexagonRegisterInfo.h" +#include "HexagonSubtarget.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/PassSupport.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" + +#include <map> + +using namespace llvm; + +static cl::opt<unsigned> VExtractThreshold("hexagon-vextract-threshold", + cl::Hidden, cl::ZeroOrMore, cl::init(1), + cl::desc("Threshold for triggering vextract replacement")); + +namespace llvm { + void initializeHexagonVExtractPass(PassRegistry& Registry); + FunctionPass *createHexagonVExtract(); +} + +namespace { + class HexagonVExtract : public MachineFunctionPass { + public: + static char ID; + HexagonVExtract() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "Hexagon optimize vextract"; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + const HexagonSubtarget *HST = nullptr; + const HexagonInstrInfo *HII = nullptr; + + unsigned genElemLoad(MachineInstr *ExtI, unsigned BaseR, + MachineRegisterInfo &MRI); + }; + + char HexagonVExtract::ID = 0; +} + +INITIALIZE_PASS(HexagonVExtract, "hexagon-vextract", + "Hexagon optimize vextract", false, false) + +unsigned HexagonVExtract::genElemLoad(MachineInstr *ExtI, unsigned BaseR, + MachineRegisterInfo &MRI) { + MachineBasicBlock &ExtB = *ExtI->getParent(); + DebugLoc DL = ExtI->getDebugLoc(); + unsigned ElemR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + + unsigned ExtIdxR = ExtI->getOperand(2).getReg(); + unsigned ExtIdxS = ExtI->getOperand(2).getSubReg(); + + // Simplified check for a compile-time constant value of ExtIdxR. + if (ExtIdxS == 0) { + MachineInstr *DI = MRI.getVRegDef(ExtIdxR); + if (DI->getOpcode() == Hexagon::A2_tfrsi) { + unsigned V = DI->getOperand(1).getImm(); + V &= (HST->getVectorLength()-1) & -4u; + + BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::L2_loadri_io), ElemR) + .addReg(BaseR) + .addImm(V); + return ElemR; + } + } + + unsigned IdxR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::A2_andir), IdxR) + .add(ExtI->getOperand(2)) + .addImm(-4); + BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::L4_loadri_rr), ElemR) + .addReg(BaseR) + .addReg(IdxR) + .addImm(0); + return ElemR; +} + +bool HexagonVExtract::runOnMachineFunction(MachineFunction &MF) { + HST = &MF.getSubtarget<HexagonSubtarget>(); + HII = HST->getInstrInfo(); + const auto &HRI = *HST->getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + std::map<unsigned, SmallVector<MachineInstr*,4>> VExtractMap; + bool Changed = false; + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + unsigned Opc = MI.getOpcode(); + if (Opc != Hexagon::V6_extractw) + continue; + unsigned VecR = MI.getOperand(1).getReg(); + VExtractMap[VecR].push_back(&MI); + } + } + + for (auto &P : VExtractMap) { + unsigned VecR = P.first; + if (P.second.size() <= VExtractThreshold) + continue; + + const auto &VecRC = *MRI.getRegClass(VecR); + int FI = MFI.CreateSpillStackObject(HRI.getSpillSize(VecRC), + HRI.getSpillAlignment(VecRC)); + MachineInstr *DefI = MRI.getVRegDef(VecR); + MachineBasicBlock::iterator At = std::next(DefI->getIterator()); + MachineBasicBlock &DefB = *DefI->getParent(); + unsigned StoreOpc = VecRC.getID() == Hexagon::HvxVRRegClassID + ? Hexagon::V6_vS32b_ai + : Hexagon::PS_vstorerw_ai; + BuildMI(DefB, At, DefI->getDebugLoc(), HII->get(StoreOpc)) + .addFrameIndex(FI) + .addImm(0) + .addReg(VecR); + + unsigned VecSize = HRI.getRegSizeInBits(VecRC) / 8; + + for (MachineInstr *ExtI : P.second) { + assert(ExtI->getOpcode() == Hexagon::V6_extractw); + unsigned SR = ExtI->getOperand(1).getSubReg(); + assert(ExtI->getOperand(1).getReg() == VecR); + + MachineBasicBlock &ExtB = *ExtI->getParent(); + DebugLoc DL = ExtI->getDebugLoc(); + unsigned BaseR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass); + BuildMI(ExtB, ExtI, DL, HII->get(Hexagon::PS_fi), BaseR) + .addFrameIndex(FI) + .addImm(SR == 0 ? 0 : VecSize/2); + + unsigned ElemR = genElemLoad(ExtI, BaseR, MRI); + unsigned ExtR = ExtI->getOperand(0).getReg(); + MRI.replaceRegWith(ExtR, ElemR); + ExtB.erase(ExtI); + Changed = true; + } + } + + return Changed; +} + +FunctionPass *llvm::createHexagonVExtract() { + return new HexagonVExtract(); +} diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index c2404235091c..56ab69db9bd1 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -199,11 +199,12 @@ static MachineBasicBlock::iterator moveInstrOut(MachineInstr &MI, } bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { - if (DisablePacketizer || skipFunction(MF.getFunction())) + auto &HST = MF.getSubtarget<HexagonSubtarget>(); + if (DisablePacketizer || !HST.usePackets() || skipFunction(MF.getFunction())) return false; - HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); - HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); + HII = HST.getInstrInfo(); + HRI = HST.getRegisterInfo(); auto &MLI = getAnalysis<MachineLoopInfo>(); auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); @@ -374,7 +375,7 @@ bool HexagonPacketizerList::promoteToDotCur(MachineInstr &MI, void HexagonPacketizerList::cleanUpDotCur() { MachineInstr *MI = nullptr; for (auto BI : CurrentPacketMIs) { - DEBUG(dbgs() << "Cleanup packet has "; BI->dump();); + LLVM_DEBUG(dbgs() << "Cleanup packet has "; BI->dump();); if (HII->isDotCurInst(*BI)) { MI = BI; continue; @@ -389,7 +390,7 @@ void HexagonPacketizerList::cleanUpDotCur() { return; // We did not find a use of the CUR, so de-cur it. MI->setDesc(HII->get(HII->getNonDotCurOp(*MI))); - DEBUG(dbgs() << "Demoted CUR "; MI->dump();); + LLVM_DEBUG(dbgs() << "Demoted CUR "; MI->dump();); } // Check to see if an instruction can be dot cur. @@ -413,11 +414,10 @@ bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr &MI, return false; // Make sure candidate instruction uses cur. - DEBUG(dbgs() << "Can we DOT Cur Vector MI\n"; - MI.dump(); - dbgs() << "in packet\n";); + LLVM_DEBUG(dbgs() << "Can we DOT Cur Vector MI\n"; MI.dump(); + dbgs() << "in packet\n";); MachineInstr &MJ = *MII; - DEBUG({ + LLVM_DEBUG({ dbgs() << "Checking CUR against "; MJ.dump(); }); @@ -432,12 +432,12 @@ bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr &MI, // Check for existing uses of a vector register within the packet which // would be affected by converting a vector load into .cur formt. for (auto BI : CurrentPacketMIs) { - DEBUG(dbgs() << "packet has "; BI->dump();); + LLVM_DEBUG(dbgs() << "packet has "; BI->dump();); if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo())) return false; } - DEBUG(dbgs() << "Can Dot CUR MI\n"; MI.dump();); + LLVM_DEBUG(dbgs() << "Can Dot CUR MI\n"; MI.dump();); // We can convert the opcode into a .cur. return true; } @@ -529,6 +529,9 @@ bool HexagonPacketizerList::updateOffset(SUnit *SUI, SUnit *SUJ) { return false; int64_t Offset = MI.getOperand(OPI).getImm(); + if (!HII->isValidOffset(MI.getOpcode(), Offset+Incr, HRI)) + return false; + MI.getOperand(OPI).setImm(Offset + Incr); ChangedOffset = Offset; return true; @@ -1033,7 +1036,7 @@ void HexagonPacketizerList::initPacketizerState() { // Ignore bundling of pseudo instructions. bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr &MI, const MachineBasicBlock *) { - if (MI.isDebugValue()) + if (MI.isDebugInstr()) return true; if (MI.isCFIInstruction()) @@ -1095,7 +1098,7 @@ bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) { static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ, const HexagonInstrInfo &HII) { const MachineFunction *MF = MI.getParent()->getParent(); - if (MF->getSubtarget<HexagonSubtarget>().hasV60TOpsOnly() && + if (MF->getSubtarget<HexagonSubtarget>().hasV60OpsOnly() && HII.isHVXMemWithAIndirect(MI, MJ)) return true; @@ -1112,6 +1115,10 @@ static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ, case Hexagon::S4_stored_locked: case Hexagon::L2_loadw_locked: case Hexagon::L4_loadd_locked: + case Hexagon::Y2_dccleana: + case Hexagon::Y2_dccleaninva: + case Hexagon::Y2_dcinva: + case Hexagon::Y2_dczeroa: case Hexagon::Y4_l2fetch: case Hexagon::Y5_l2fetch: { // These instructions can only be grouped with ALU32 or non-floating-point @@ -1513,7 +1520,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { bool IsVecJ = HII->isHVXVec(J); bool IsVecI = HII->isHVXVec(I); - if (Slot1Store && MF.getSubtarget<HexagonSubtarget>().hasV65TOps() && + if (Slot1Store && MF.getSubtarget<HexagonSubtarget>().hasV65Ops() && ((LoadJ && StoreI && !NVStoreI) || (StoreJ && LoadI && !NVStoreJ)) && (J.getOpcode() != Hexagon::S2_allocframe && @@ -1683,8 +1690,12 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) { PacketStalls = false; PacketStalls |= producesStall(MI); - if (MI.isImplicitDef()) + if (MI.isImplicitDef()) { + // Add to the packet to allow subsequent instructions to be checked + // properly. + CurrentPacketMIs.push_back(&MI); return MII; + } assert(ResourceTracker->canReserveResources(MI)); bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI); @@ -1754,7 +1765,7 @@ void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB, bool memShufDisabled = getmemShufDisabled(); if (memShufDisabled && !foundLSInPacket()) { setmemShufDisabled(false); - DEBUG(dbgs() << " Not added to NoShufPacket\n"); + LLVM_DEBUG(dbgs() << " Not added to NoShufPacket\n"); } memShufDisabled = getmemShufDisabled(); @@ -1773,7 +1784,7 @@ void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB, CurrentPacketMIs.clear(); ResourceTracker->clearResources(); - DEBUG(dbgs() << "End packet\n"); + LLVM_DEBUG(dbgs() << "End packet\n"); } bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) { @@ -1803,17 +1814,18 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) { SUnit *SUI = MIToSUnit[const_cast<MachineInstr *>(&I)]; - // Check if the latency is 0 between this instruction and any instruction - // in the current packet. If so, we disregard any potential stalls due to - // the instructions in the previous packet. Most of the instruction pairs - // that can go together in the same packet have 0 latency between them. - // Only exceptions are newValueJumps as they're generated much later and - // the latencies can't be changed at that point. Another is .cur - // instructions if its consumer has a 0 latency successor (such as .new). - // In this case, the latency between .cur and the consumer stays non-zero - // even though we can have both .cur and .new in the same packet. Changing - // the latency to 0 is not an option as it causes software pipeliner to - // not pipeline in some cases. + // If the latency is 0 and there is a data dependence between this + // instruction and any instruction in the current packet, we disregard any + // potential stalls due to the instructions in the previous packet. Most of + // the instruction pairs that can go together in the same packet have 0 + // latency between them. The exceptions are + // 1. NewValueJumps as they're generated much later and the latencies can't + // be changed at that point. + // 2. .cur instructions, if its consumer has a 0 latency successor (such as + // .new). In this case, the latency between .cur and the consumer stays + // non-zero even though we can have both .cur and .new in the same packet. + // Changing the latency to 0 is not an option as it causes software pipeliner + // to not pipeline in some cases. // For Example: // { @@ -1826,19 +1838,10 @@ bool HexagonPacketizerList::producesStall(const MachineInstr &I) { for (auto J : CurrentPacketMIs) { SUnit *SUJ = MIToSUnit[J]; for (auto &Pred : SUI->Preds) - if (Pred.getSUnit() == SUJ && - (Pred.getLatency() == 0 || HII->isNewValueJump(I) || - HII->isToBeScheduledASAP(*J, I))) - return false; - } - - // Check if the latency is greater than one between this instruction and any - // instruction in the previous packet. - for (auto J : OldPacketMIs) { - SUnit *SUJ = MIToSUnit[J]; - for (auto &Pred : SUI->Preds) - if (Pred.getSUnit() == SUJ && Pred.getLatency() > 1) - return true; + if (Pred.getSUnit() == SUJ) + if ((Pred.getLatency() == 0 && Pred.isAssignedRegDep()) || + HII->isNewValueJump(I) || HII->isToBeScheduledASAP(*J, I)) + return false; } // Check if the latency is greater than one between this instruction and any diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/lib/Target/Hexagon/HexagonVLIWPacketizer.h index 764d9ae9059a..40dcee3441a2 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.h +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.h @@ -59,7 +59,7 @@ class HexagonPacketizerList : public VLIWPacketizerList { bool PacketStalls = false; protected: - /// \brief A handle to the branch probability pass. + /// A handle to the branch probability pass. const MachineBranchProbabilityInfo *MBPI; const MachineLoopInfo *MLI; diff --git a/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp index 39395dbd3aec..9d1073346c72 100644 --- a/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp +++ b/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp @@ -138,6 +138,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" #include <algorithm> #include <cassert> #include <cstddef> @@ -363,17 +364,18 @@ bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) { if (II && (II->getIntrinsicID() == Intrinsic::hexagon_V6_hi || II->getIntrinsicID() == Intrinsic::hexagon_V6_lo)) { - DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n"); + LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n"); return false; } return true; } void HexagonVectorLoopCarriedReuse::findValueToReuse() { for (auto *D : Dependences) { - DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n"); + LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n"); if (D->iterations() > HexagonVLCRIterationLim) { - DEBUG(dbgs() << - ".. Skipping because number of iterations > than the limit\n"); + LLVM_DEBUG( + dbgs() + << ".. Skipping because number of iterations > than the limit\n"); continue; } @@ -381,7 +383,8 @@ void HexagonVectorLoopCarriedReuse::findValueToReuse() { Instruction *BEInst = D->back(); int Iters = D->iterations(); BasicBlock *BB = PN->getParent(); - DEBUG(dbgs() << "Checking if any uses of " << *PN << " can be reused\n"); + LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN + << " can be reused\n"); SmallVector<Instruction *, 4> PNUsers; for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) { @@ -391,7 +394,8 @@ void HexagonVectorLoopCarriedReuse::findValueToReuse() { if (User->getParent() != BB) continue; if (ReplacedInsts.count(User)) { - DEBUG(dbgs() << *User << " has already been replaced. Skipping...\n"); + LLVM_DEBUG(dbgs() << *User + << " has already been replaced. Skipping...\n"); continue; } if (isa<PHINode>(User)) @@ -403,7 +407,7 @@ void HexagonVectorLoopCarriedReuse::findValueToReuse() { PNUsers.push_back(User); } - DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n"); + LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n"); // For each interesting use I of PN, find an Instruction BEUser that // performs the same operation as I on BEInst and whose other operands, @@ -439,7 +443,7 @@ void HexagonVectorLoopCarriedReuse::findValueToReuse() { } } if (BEUser) { - DEBUG(dbgs() << "Found Value for reuse.\n"); + LLVM_DEBUG(dbgs() << "Found Value for reuse.\n"); ReuseCandidate.Inst2Replace = I; ReuseCandidate.BackedgeInst = BEUser; return; @@ -460,7 +464,7 @@ Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op, } void HexagonVectorLoopCarriedReuse::reuseValue() { - DEBUG(dbgs() << ReuseCandidate); + LLVM_DEBUG(dbgs() << ReuseCandidate); Instruction *Inst2Replace = ReuseCandidate.Inst2Replace; Instruction *BEInst = ReuseCandidate.BackedgeInst; int NumOperands = Inst2Replace->getNumOperands(); @@ -485,7 +489,7 @@ void HexagonVectorLoopCarriedReuse::reuseValue() { } } - DEBUG(dbgs() << "reuseValue is making the following changes\n"); + LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n"); SmallVector<Instruction *, 4> InstsInPreheader; for (int i = 0; i < Iterations; ++i) { @@ -506,8 +510,8 @@ void HexagonVectorLoopCarriedReuse::reuseValue() { InstsInPreheader.push_back(InstInPreheader); InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr"); InstInPreheader->insertBefore(LoopPH->getTerminator()); - DEBUG(dbgs() << "Added " << *InstInPreheader << " to " << LoopPH->getName() - << "\n"); + LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to " + << LoopPH->getName() << "\n"); } BasicBlock *BB = BEInst->getParent(); IRBuilder<> IRB(BB); @@ -519,7 +523,8 @@ void HexagonVectorLoopCarriedReuse::reuseValue() { NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2); NewPhi->addIncoming(InstInPreheader, LoopPH); NewPhi->addIncoming(BEVal, BB); - DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName() << "\n"); + LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName() + << "\n"); BEVal = NewPhi; } // We are in LCSSA form. So, a value defined inside the Loop is used only @@ -538,7 +543,7 @@ bool HexagonVectorLoopCarriedReuse::doVLCR() { bool Changed = false; bool Continue; - DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n"); + LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n"); do { // Reset datastructures. Dependences.clear(); @@ -625,10 +630,9 @@ void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() { else delete D; } - DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n"); - DEBUG(for (size_t i = 0; i < Dependences.size(); ++i) { - dbgs() << *Dependences[i] << "\n"; - }); + LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n"); + LLVM_DEBUG(for (size_t i = 0; i < Dependences.size(); + ++i) { dbgs() << *Dependences[i] << "\n"; }); } Pass *llvm::createHexagonVectorLoopCarriedReusePass() { diff --git a/lib/Target/Hexagon/HexagonVectorPrint.cpp b/lib/Target/Hexagon/HexagonVectorPrint.cpp index ddd668b2cb1e..18d2f2f4acde 100644 --- a/lib/Target/Hexagon/HexagonVectorPrint.cpp +++ b/lib/Target/Hexagon/HexagonVectorPrint.cpp @@ -144,14 +144,15 @@ bool HexagonVectorPrint::runOnMachineFunction(MachineFunction &Fn) { unsigned Reg = 0; if (getInstrVecReg(*MII, Reg)) { VecPrintList.push_back((&*MII)); - DEBUG(dbgs() << "Found vector reg inside bundle \n"; MII->dump()); + LLVM_DEBUG(dbgs() << "Found vector reg inside bundle \n"; + MII->dump()); } } } else { unsigned Reg = 0; if (getInstrVecReg(MI, Reg)) { VecPrintList.push_back(&MI); - DEBUG(dbgs() << "Found vector reg \n"; MI.dump()); + LLVM_DEBUG(dbgs() << "Found vector reg \n"; MI.dump()); } } } @@ -163,33 +164,33 @@ bool HexagonVectorPrint::runOnMachineFunction(MachineFunction &Fn) { for (auto *I : VecPrintList) { DebugLoc DL = I->getDebugLoc(); MachineBasicBlock *MBB = I->getParent(); - DEBUG(dbgs() << "Evaluating V MI\n"; I->dump()); + LLVM_DEBUG(dbgs() << "Evaluating V MI\n"; I->dump()); unsigned Reg = 0; if (!getInstrVecReg(*I, Reg)) llvm_unreachable("Need a vector reg"); MachineBasicBlock::instr_iterator MII = I->getIterator(); if (I->isInsideBundle()) { - DEBUG(dbgs() << "add to end of bundle\n"; I->dump()); + LLVM_DEBUG(dbgs() << "add to end of bundle\n"; I->dump()); while (MBB->instr_end() != MII && MII->isInsideBundle()) MII++; } else { - DEBUG(dbgs() << "add after instruction\n"; I->dump()); + LLVM_DEBUG(dbgs() << "add after instruction\n"; I->dump()); MII++; } if (MBB->instr_end() == MII) continue; if (Reg >= Hexagon::V0 && Reg <= Hexagon::V31) { - DEBUG(dbgs() << "adding dump for V" << Reg-Hexagon::V0 << '\n'); + LLVM_DEBUG(dbgs() << "adding dump for V" << Reg - Hexagon::V0 << '\n'); addAsmInstr(MBB, Reg, MII, DL, QII, Fn); } else if (Reg >= Hexagon::W0 && Reg <= Hexagon::W15) { - DEBUG(dbgs() << "adding dump for W" << Reg-Hexagon::W0 << '\n'); + LLVM_DEBUG(dbgs() << "adding dump for W" << Reg - Hexagon::W0 << '\n'); addAsmInstr(MBB, Hexagon::V0 + (Reg - Hexagon::W0) * 2 + 1, MII, DL, QII, Fn); addAsmInstr(MBB, Hexagon::V0 + (Reg - Hexagon::W0) * 2, MII, DL, QII, Fn); } else if (Reg >= Hexagon::Q0 && Reg <= Hexagon::Q3) { - DEBUG(dbgs() << "adding dump for Q" << Reg-Hexagon::Q0 << '\n'); + LLVM_DEBUG(dbgs() << "adding dump for Q" << Reg - Hexagon::Q0 << '\n'); addAsmInstr(MBB, Reg, MII, DL, QII, Fn); } else llvm_unreachable("Bad Vector reg"); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index b3ab6763281c..af1e5429d0c2 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -51,7 +51,7 @@ class HexagonAsmBackend : public MCAsmBackend { SmallVector<MCFixup, 4> Fixups; SmallString<256> Code; raw_svector_ostream VecOS(Code); - E.encodeInstruction(HMB, VecOS, Fixups, RF.getSubtargetInfo()); + E.encodeInstruction(HMB, VecOS, Fixups, *RF.getSubtargetInfo()); // Update the fragment. RF.setInst(HMB); @@ -61,13 +61,14 @@ class HexagonAsmBackend : public MCAsmBackend { public: HexagonAsmBackend(const Target &T, const Triple &TT, uint8_t OSABI, - StringRef CPU) : - OSABI(OSABI), CPU(CPU), MCII(T.createMCInstrInfo()), - RelaxTarget(new MCInst *), Extender(nullptr) {} - - std::unique_ptr<MCObjectWriter> - createObjectWriter(raw_pwrite_stream &OS) const override { - return createHexagonELFObjectWriter(OS, OSABI, CPU); + StringRef CPU) + : MCAsmBackend(support::little), OSABI(OSABI), CPU(CPU), + MCII(T.createMCInstrInfo()), RelaxTarget(new MCInst *), + Extender(nullptr) {} + + std::unique_ptr<MCObjectTargetWriter> + createObjectTargetWriter() const override { + return createHexagonELFObjectWriter(OSABI, CPU); } void setExtender(MCContext &Context) const { @@ -413,7 +414,8 @@ public: /// fixup kind as appropriate. void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, - uint64_t FixupValue, bool IsResolved) const override { + uint64_t FixupValue, bool IsResolved, + const MCSubtargetInfo *STI) const override { // When FixupValue is 0 the relocation is external and there // is nothing for us to do. @@ -510,17 +512,15 @@ public: break; } - DEBUG(dbgs() << "Name=" << getFixupKindInfo(Kind).Name << "(" << - (unsigned)Kind << ")\n"); - DEBUG(uint32_t OldData = 0; - for (unsigned i = 0; i < NumBytes; i++) - OldData |= (InstAddr[i] << (i * 8)) & (0xff << (i * 8)); - dbgs() << "\tBValue=0x"; dbgs().write_hex(Value) << - ": AValue=0x"; dbgs().write_hex(FixupValue) << - ": Offset=" << Offset << - ": Size=" << Data.size() << - ": OInst=0x"; dbgs().write_hex(OldData) << - ": Reloc=0x"; dbgs().write_hex(Reloc);); + LLVM_DEBUG(dbgs() << "Name=" << getFixupKindInfo(Kind).Name << "(" + << (unsigned)Kind << ")\n"); + LLVM_DEBUG( + uint32_t OldData = 0; for (unsigned i = 0; i < NumBytes; i++) OldData |= + (InstAddr[i] << (i * 8)) & (0xff << (i * 8)); + dbgs() << "\tBValue=0x"; dbgs().write_hex(Value) << ": AValue=0x"; + dbgs().write_hex(FixupValue) + << ": Offset=" << Offset << ": Size=" << Data.size() << ": OInst=0x"; + dbgs().write_hex(OldData) << ": Reloc=0x"; dbgs().write_hex(Reloc);); // For each byte of the fragment that the fixup touches, mask in the // bits from the fixup value. The Value has been "split up" into the @@ -530,10 +530,10 @@ public: InstAddr[i] |= uint8_t(Reloc >> (i * 8)) & 0xff; // Apply new reloc } - DEBUG(uint32_t NewData = 0; - for (unsigned i = 0; i < NumBytes; i++) - NewData |= (InstAddr[i] << (i * 8)) & (0xff << (i * 8)); - dbgs() << ": NInst=0x"; dbgs().write_hex(NewData) << "\n";); + LLVM_DEBUG(uint32_t NewData = 0; + for (unsigned i = 0; i < NumBytes; i++) NewData |= + (InstAddr[i] << (i * 8)) & (0xff << (i * 8)); + dbgs() << ": NInst=0x"; dbgs().write_hex(NewData) << "\n";); } bool isInstRelaxable(MCInst const &HMI) const { @@ -562,7 +562,8 @@ public: /// relaxation. /// /// \param Inst - The instruction to test. - bool mayNeedRelaxation(MCInst const &Inst) const override { + bool mayNeedRelaxation(MCInst const &Inst, + const MCSubtargetInfo &STI) const override { return true; } @@ -571,7 +572,8 @@ public: bool fixupNeedsRelaxationAdvanced(const MCFixup &Fixup, bool Resolved, uint64_t Value, const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const override { + const MCAsmLayout &Layout, + const bool WasForced) const override { MCInst const &MCB = DF->getInst(); assert(HexagonMCInstrInfo::isBundle(MCB)); @@ -682,17 +684,17 @@ public: assert(Update && "Didn't find relaxation target"); } - bool writeNopData(uint64_t Count, - MCObjectWriter * OW) const override { + bool writeNopData(raw_ostream &OS, uint64_t Count) const override { static const uint32_t Nopcode = 0x7f000000, // Hard-coded NOP. ParseIn = 0x00004000, // In packet parse-bits. ParseEnd = 0x0000c000; // End of packet parse-bits. while(Count % HEXAGON_INSTR_SIZE) { - DEBUG(dbgs() << "Alignment not a multiple of the instruction size:" << - Count % HEXAGON_INSTR_SIZE << "/" << HEXAGON_INSTR_SIZE << "\n"); + LLVM_DEBUG(dbgs() << "Alignment not a multiple of the instruction size:" + << Count % HEXAGON_INSTR_SIZE << "/" + << HEXAGON_INSTR_SIZE << "\n"); --Count; - OW->write8(0); + OS << '\0'; } while(Count) { @@ -700,7 +702,7 @@ public: // Close the packet whenever a multiple of the maximum packet size remains uint32_t ParseBits = (Count % (HEXAGON_PACKET_SIZE * HEXAGON_INSTR_SIZE))? ParseIn: ParseEnd; - OW->write32(Nopcode | ParseBits); + support::endian::write<uint32_t>(OS, Nopcode | ParseBits, Endian); } return true; } @@ -736,7 +738,7 @@ public: Inst.addOperand(MCOperand::createInst(Nop)); Size -= 4; if (!HexagonMCChecker( - Context, *MCII, RF.getSubtargetInfo(), Inst, + Context, *MCII, *RF.getSubtargetInfo(), Inst, *Context.getRegisterInfo(), false) .check()) { Inst.erase(Inst.end() - 1); @@ -744,7 +746,7 @@ public: } } bool Error = HexagonMCShuffle(Context, true, *MCII, - RF.getSubtargetInfo(), Inst); + *RF.getSubtargetInfo(), Inst); //assert(!Error); (void)Error; ReplaceInstruction(Asm.getEmitter(), RF, Inst); @@ -765,11 +767,12 @@ public: // MCAsmBackend MCAsmBackend *llvm::createHexagonAsmBackend(Target const &T, - MCRegisterInfo const & /*MRI*/, - const Triple &TT, StringRef CPU, - const MCTargetOptions &Options) { + const MCSubtargetInfo &STI, + MCRegisterInfo const & /*MRI*/, + const MCTargetOptions &Options) { + const Triple &TT = STI.getTargetTriple(); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS()); - StringRef CPUString = Hexagon_MC::selectHexagonCPU(CPU); + StringRef CPUString = Hexagon_MC::selectHexagonCPU(STI.getCPU()); return new HexagonAsmBackend(T, TT, OSABI, CPUString); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index f5a376033757..cb504b5c3d5d 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -25,7 +25,7 @@ namespace llvm { /// HexagonII - This namespace holds all of the target specific flags that /// instruction info tracks. namespace HexagonII { - unsigned const TypeCVI_FIRST = TypeCVI_HIST; + unsigned const TypeCVI_FIRST = TypeCVI_4SLOT_MPY; unsigned const TypeCVI_LAST = TypeCVI_VX_LATE; enum SubTarget { diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp index 12aa1bd9b2a0..e82e6b559f62 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp @@ -298,9 +298,7 @@ unsigned HexagonELFObjectWriter::getRelocType(MCContext &Ctx, } } -std::unique_ptr<MCObjectWriter> -llvm::createHexagonELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, - StringRef CPU) { - auto MOTW = llvm::make_unique<HexagonELFObjectWriter>(OSABI, CPU); - return createELFObjectWriter(std::move(MOTW), OS, /*IsLittleEndian*/ true); +std::unique_ptr<MCObjectTargetWriter> +llvm::createHexagonELFObjectWriter(uint8_t OSABI, StringRef CPU) { + return llvm::make_unique<HexagonELFObjectWriter>(OSABI, CPU); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp index 1929152129fa..3b3a15b990f1 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp @@ -69,19 +69,12 @@ void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, OS << "\n"; } - auto Separator = ""; - if (HexagonMCInstrInfo::isInnerLoop(*MI)) { - OS << Separator; - Separator = " "; - MCInst ME; - ME.setOpcode(Hexagon::ENDLOOP0); - printInstruction(&ME, OS); - } - if (HexagonMCInstrInfo::isOuterLoop(*MI)) { - OS << Separator; - MCInst ME; - ME.setOpcode(Hexagon::ENDLOOP1); - printInstruction(&ME, OS); + bool IsLoop0 = HexagonMCInstrInfo::isInnerLoop(*MI); + bool IsLoop1 = HexagonMCInstrInfo::isOuterLoop(*MI); + if (IsLoop0) { + OS << (IsLoop1 ? " :endloop01" : " :endloop0"); + } else if (IsLoop1) { + OS << " :endloop1"; } } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index 631c38c2734f..3382684803aa 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -33,7 +33,9 @@ #include <cassert> #include <cstddef> #include <cstdint> +#include <map> #include <string> +#include <vector> #define DEBUG_TYPE "mccodeemitter" @@ -42,62 +44,350 @@ using namespace Hexagon; STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); -HexagonMCCodeEmitter::HexagonMCCodeEmitter(MCInstrInfo const &aMII, - MCContext &aMCT) - : MCT(aMCT), MCII(aMII), Addend(new unsigned(0)), - Extended(new bool(false)), CurrentBundle(new MCInst const *), - CurrentIndex(new size_t(0)) {} +static const unsigned fixup_Invalid = ~0u; + +#define _ fixup_Invalid +#define P(x) Hexagon::fixup_Hexagon##x +static const std::map<unsigned, std::vector<unsigned>> ExtFixups = { + { MCSymbolRefExpr::VK_DTPREL, + { _, _, _, _, + _, _, P(_DTPREL_16_X), P(_DTPREL_11_X), + P(_DTPREL_11_X), P(_9_X), _, P(_DTPREL_11_X), + P(_DTPREL_16_X), _, _, _, + P(_DTPREL_16_X), _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_DTPREL_32_6_X) }}, + { MCSymbolRefExpr::VK_GOT, + { _, _, _, _, + _, _, P(_GOT_11_X), _ /* [1] */, + _ /* [1] */, P(_9_X), _, P(_GOT_11_X), + P(_GOT_16_X), _, _, _, + P(_GOT_16_X), _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_GOT_32_6_X) }}, + { MCSymbolRefExpr::VK_GOTREL, + { _, _, _, _, + _, _, P(_GOTREL_11_X), P(_GOTREL_11_X), + P(_GOTREL_11_X), P(_9_X), _, P(_GOTREL_11_X), + P(_GOTREL_16_X), _, _, _, + P(_GOTREL_16_X), _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_GOTREL_32_6_X) }}, + { MCSymbolRefExpr::VK_TPREL, + { _, _, _, _, + _, _, P(_TPREL_16_X), P(_TPREL_11_X), + P(_TPREL_11_X), P(_9_X), _, P(_TPREL_11_X), + P(_TPREL_16_X), _, _, _, + P(_TPREL_16_X), _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_TPREL_32_6_X) }}, + { MCSymbolRefExpr::VK_Hexagon_GD_GOT, + { _, _, _, _, + _, _, P(_GD_GOT_16_X), P(_GD_GOT_11_X), + P(_GD_GOT_11_X), P(_9_X), _, P(_GD_GOT_11_X), + P(_GD_GOT_16_X), _, _, _, + P(_GD_GOT_16_X), _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_GD_GOT_32_6_X) }}, + { MCSymbolRefExpr::VK_Hexagon_GD_PLT, + { _, _, _, _, + _, _, _, _, + _, P(_9_X), _, P(_GD_PLT_B22_PCREL_X), + _, _, _, _, + _, _, _, _, + _, _, P(_GD_PLT_B22_PCREL_X), _, + _, _, _, _, + _, _, _, _, + _ }}, + { MCSymbolRefExpr::VK_Hexagon_IE, + { _, _, _, _, + _, _, P(_IE_16_X), _, + _, P(_9_X), _, _, + P(_IE_16_X), _, _, _, + P(_IE_16_X), _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_IE_32_6_X) }}, + { MCSymbolRefExpr::VK_Hexagon_IE_GOT, + { _, _, _, _, + _, _, P(_IE_GOT_11_X), P(_IE_GOT_11_X), + P(_IE_GOT_11_X), P(_9_X), _, P(_IE_GOT_11_X), + P(_IE_GOT_16_X), _, _, _, + P(_IE_GOT_16_X), _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_IE_GOT_32_6_X) }}, + { MCSymbolRefExpr::VK_Hexagon_LD_GOT, + { _, _, _, _, + _, _, P(_LD_GOT_11_X), P(_LD_GOT_11_X), + P(_LD_GOT_11_X), P(_9_X), _, P(_LD_GOT_11_X), + P(_LD_GOT_16_X), _, _, _, + P(_LD_GOT_16_X), _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_LD_GOT_32_6_X) }}, + { MCSymbolRefExpr::VK_Hexagon_LD_PLT, + { _, _, _, _, + _, _, _, _, + _, P(_9_X), _, P(_LD_PLT_B22_PCREL_X), + _, _, _, _, + _, _, _, _, + _, _, P(_LD_PLT_B22_PCREL_X), _, + _, _, _, _, + _, _, _, _, + _ }}, + { MCSymbolRefExpr::VK_Hexagon_PCREL, + { _, _, _, _, + _, _, P(_6_PCREL_X), _, + _, P(_9_X), _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_32_PCREL) }}, + { MCSymbolRefExpr::VK_None, + { _, _, _, _, + _, _, P(_6_X), P(_8_X), + P(_8_X), P(_9_X), P(_10_X), P(_11_X), + P(_12_X), P(_B13_PCREL), _, P(_B15_PCREL_X), + P(_16_X), _, _, _, + _, _, P(_B22_PCREL_X), _, + _, _, _, _, + _, _, _, _, + P(_32_6_X) }}, +}; +// [1] The fixup is GOT_16_X for signed values and GOT_11_X for unsigned. + +static const std::map<unsigned, std::vector<unsigned>> StdFixups = { + { MCSymbolRefExpr::VK_DTPREL, + { _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_DTPREL_16), _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_DTPREL_32) }}, + { MCSymbolRefExpr::VK_GOT, + { _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_GOT_32) }}, + { MCSymbolRefExpr::VK_GOTREL, + { _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _ /* [2] */, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_GOTREL_32) }}, + { MCSymbolRefExpr::VK_PLT, + { _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, P(_PLT_B22_PCREL), _, + _, _, _, _, + _, _, _, _, + _ }}, + { MCSymbolRefExpr::VK_TPREL, + { _, _, _, _, + _, _, _, _, + _, _, _, P(_TPREL_11_X), + _, _, _, _, + P(_TPREL_16), _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_TPREL_32) }}, + { MCSymbolRefExpr::VK_Hexagon_GD_GOT, + { _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_GD_GOT_16), _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_GD_GOT_32) }}, + { MCSymbolRefExpr::VK_Hexagon_GD_PLT, + { _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, P(_GD_PLT_B22_PCREL), _, + _, _, _, _, + _, _, _, _, + _ }}, + { MCSymbolRefExpr::VK_Hexagon_GPREL, + { _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_GPREL16_0), _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _ }}, + { MCSymbolRefExpr::VK_Hexagon_HI16, + { _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_HI16), _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _ }}, + { MCSymbolRefExpr::VK_Hexagon_IE, + { _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_IE_32) }}, + { MCSymbolRefExpr::VK_Hexagon_IE_GOT, + { _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_IE_GOT_16), _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_IE_GOT_32) }}, + { MCSymbolRefExpr::VK_Hexagon_LD_GOT, + { _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_LD_GOT_16), _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_LD_GOT_32) }}, + { MCSymbolRefExpr::VK_Hexagon_LD_PLT, + { _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, P(_LD_PLT_B22_PCREL), _, + _, _, _, _, + _, _, _, _, + _ }}, + { MCSymbolRefExpr::VK_Hexagon_LO16, + { _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_LO16), _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _ }}, + { MCSymbolRefExpr::VK_Hexagon_PCREL, + { _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + _, _, _, _, + P(_32_PCREL) }}, + { MCSymbolRefExpr::VK_None, + { _, _, _, _, + _, _, _, _, + _, _, _, _, + _, P(_B13_PCREL), _, P(_B15_PCREL), + _, _, _, _, + _, _, P(_B22_PCREL), _, + _, _, _, _, + _, _, _, _, + P(_32) }}, +}; +// +// [2] The actual fixup is LO16 or HI16, depending on the instruction. +#undef P +#undef _ -uint32_t HexagonMCCodeEmitter::parseBits(size_t Last, - MCInst const &MCB, +uint32_t HexagonMCCodeEmitter::parseBits(size_t Last, MCInst const &MCB, MCInst const &MCI) const { bool Duplex = HexagonMCInstrInfo::isDuplex(MCII, MCI); - if (*CurrentIndex == 0) { + if (State.Index == 0) { if (HexagonMCInstrInfo::isInnerLoop(MCB)) { assert(!Duplex); - assert(*CurrentIndex != Last); + assert(State.Index != Last); return HexagonII::INST_PARSE_LOOP_END; } } - if (*CurrentIndex == 1) { + if (State.Index == 1) { if (HexagonMCInstrInfo::isOuterLoop(MCB)) { assert(!Duplex); - assert(*CurrentIndex != Last); + assert(State.Index != Last); return HexagonII::INST_PARSE_LOOP_END; } } if (Duplex) { - assert(*CurrentIndex == Last); + assert(State.Index == Last); return HexagonII::INST_PARSE_DUPLEX; } - if(*CurrentIndex == Last) + if (State.Index == Last) return HexagonII::INST_PARSE_PACKET_END; return HexagonII::INST_PARSE_NOT_END; } -/// EncodeInstruction - Emit the bundle +/// Emit the bundle. void HexagonMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { MCInst &HMB = const_cast<MCInst &>(MI); assert(HexagonMCInstrInfo::isBundle(HMB)); - DEBUG(dbgs() << "Encoding bundle\n";); - *Addend = 0; - *Extended = false; - *CurrentBundle = &MI; - *CurrentIndex = 0; + LLVM_DEBUG(dbgs() << "Encoding bundle\n";); + State.Addend = 0; + State.Extended = false; + State.Bundle = &MI; + State.Index = 0; size_t Last = HexagonMCInstrInfo::bundleSize(HMB) - 1; + uint64_t Features = computeAvailableFeatures(STI.getFeatureBits()); + for (auto &I : HexagonMCInstrInfo::bundleInstructions(HMB)) { MCInst &HMI = const_cast<MCInst &>(*I.getInst()); - verifyInstructionPredicates(HMI, - computeAvailableFeatures(STI.getFeatureBits())); - - EncodeSingleInstruction(HMI, OS, Fixups, STI, - parseBits(Last, HMB, HMI)); - *Extended = HexagonMCInstrInfo::isImmext(HMI); - *Addend += HEXAGON_INSTR_SIZE; - ++*CurrentIndex; + verifyInstructionPredicates(HMI, Features); + + EncodeSingleInstruction(HMI, OS, Fixups, STI, parseBits(Last, HMB, HMI)); + State.Extended = HexagonMCInstrInfo::isImmext(HMI); + State.Addend += HEXAGON_INSTR_SIZE; + ++State.Index; } } @@ -115,9 +405,9 @@ static bool RegisterMatches(unsigned Consumer, unsigned Producer, } /// EncodeSingleInstruction - Emit a single -void HexagonMCCodeEmitter::EncodeSingleInstruction( - const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI, uint32_t Parse) const { +void HexagonMCCodeEmitter::EncodeSingleInstruction(const MCInst &MI, + raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI, uint32_t Parse) const { assert(!HexagonMCInstrInfo::isBundle(MI)); uint64_t Binary; @@ -125,198 +415,150 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction( // in the first place! assert(!HexagonMCInstrInfo::getDesc(MCII, MI).isPseudo() && "pseudo-instruction found"); - DEBUG(dbgs() << "Encoding insn" - " `" << HexagonMCInstrInfo::getName(MCII, MI) << "'" - "\n"); + LLVM_DEBUG(dbgs() << "Encoding insn `" + << HexagonMCInstrInfo::getName(MCII, MI) << "'\n"); Binary = getBinaryCodeForInstr(MI, Fixups, STI); + unsigned Opc = MI.getOpcode(); + // Check for unimplemented instructions. Immediate extenders // are encoded as zero, so they need to be accounted for. - if (!Binary && - MI.getOpcode() != DuplexIClass0 && - MI.getOpcode() != A4_ext) { - DEBUG(dbgs() << "Unimplemented inst: " - " `" << HexagonMCInstrInfo::getName(MCII, MI) << "'" - "\n"); + if (!Binary && Opc != DuplexIClass0 && Opc != A4_ext) { + LLVM_DEBUG(dbgs() << "Unimplemented inst `" + << HexagonMCInstrInfo::getName(MCII, MI) << "'\n"); llvm_unreachable("Unimplemented Instruction"); } Binary |= Parse; // if we need to emit a duplexed instruction - if (MI.getOpcode() >= Hexagon::DuplexIClass0 && - MI.getOpcode() <= Hexagon::DuplexIClassF) { + if (Opc >= Hexagon::DuplexIClass0 && Opc <= Hexagon::DuplexIClassF) { assert(Parse == HexagonII::INST_PARSE_DUPLEX && "Emitting duplex without duplex parse bits"); - unsigned dupIClass = MI.getOpcode() - Hexagon::DuplexIClass0; + unsigned DupIClass = MI.getOpcode() - Hexagon::DuplexIClass0; // 29 is the bit position. // 0b1110 =0xE bits are masked off and down shifted by 1 bit. // Last bit is moved to bit position 13 - Binary = ((dupIClass & 0xE) << (29 - 1)) | ((dupIClass & 0x1) << 13); + Binary = ((DupIClass & 0xE) << (29 - 1)) | ((DupIClass & 0x1) << 13); - const MCInst *subInst0 = MI.getOperand(0).getInst(); - const MCInst *subInst1 = MI.getOperand(1).getInst(); + const MCInst *Sub0 = MI.getOperand(0).getInst(); + const MCInst *Sub1 = MI.getOperand(1).getInst(); - // get subinstruction slot 0 - unsigned subInstSlot0Bits = getBinaryCodeForInstr(*subInst0, Fixups, STI); - // get subinstruction slot 1 - unsigned subInstSlot1Bits = getBinaryCodeForInstr(*subInst1, Fixups, STI); + // Get subinstruction slot 0. + unsigned SubBits0 = getBinaryCodeForInstr(*Sub0, Fixups, STI); + // Get subinstruction slot 1. + State.SubInst1 = true; + unsigned SubBits1 = getBinaryCodeForInstr(*Sub1, Fixups, STI); + State.SubInst1 = false; - Binary |= subInstSlot0Bits | (subInstSlot1Bits << 16); + Binary |= SubBits0 | (SubBits1 << 16); } - support::endian::Writer<support::little>(OS).write<uint32_t>(Binary); + support::endian::write<uint32_t>(OS, Binary, support::little); ++MCNumEmitted; } LLVM_ATTRIBUTE_NORETURN -static void raise_relocation_error(unsigned bits, unsigned kind) { +static void raise_relocation_error(unsigned Width, unsigned Kind) { std::string Text; - { - raw_string_ostream Stream(Text); - Stream << "Unrecognized relocation combination bits: " << bits - << " kind: " << kind; - } - report_fatal_error(Text); + raw_string_ostream Stream(Text); + Stream << "Unrecognized relocation combination: width=" << Width + << " kind=" << Kind; + report_fatal_error(Stream.str()); } -/// getFixupNoBits - Some insns are not extended and thus have no -/// bits. These cases require a more brute force method for determining -/// the correct relocation. +/// Some insns are not extended and thus have no bits. These cases require +/// a more brute force method for determining the correct relocation. Hexagon::Fixups HexagonMCCodeEmitter::getFixupNoBits( - MCInstrInfo const &MCII, const MCInst &MI, const MCOperand &MO, - const MCSymbolRefExpr::VariantKind kind) const { + MCInstrInfo const &MCII, const MCInst &MI, const MCOperand &MO, + const MCSymbolRefExpr::VariantKind VarKind) const { const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MI); - unsigned insnType = HexagonMCInstrInfo::getType(MCII, MI); - - if (insnType == HexagonII::TypeEXTENDER) { - switch (kind) { - case MCSymbolRefExpr::VK_GOTREL: - return Hexagon::fixup_Hexagon_GOTREL_32_6_X; - case MCSymbolRefExpr::VK_GOT: - return Hexagon::fixup_Hexagon_GOT_32_6_X; - case MCSymbolRefExpr::VK_TPREL: - return Hexagon::fixup_Hexagon_TPREL_32_6_X; - case MCSymbolRefExpr::VK_DTPREL: - return Hexagon::fixup_Hexagon_DTPREL_32_6_X; - case MCSymbolRefExpr::VK_Hexagon_GD_GOT: - return Hexagon::fixup_Hexagon_GD_GOT_32_6_X; - case MCSymbolRefExpr::VK_Hexagon_LD_GOT: - return Hexagon::fixup_Hexagon_LD_GOT_32_6_X; - case MCSymbolRefExpr::VK_Hexagon_IE: - return Hexagon::fixup_Hexagon_IE_32_6_X; - case MCSymbolRefExpr::VK_Hexagon_IE_GOT: - return Hexagon::fixup_Hexagon_IE_GOT_32_6_X; - case MCSymbolRefExpr::VK_Hexagon_PCREL: - return Hexagon::fixup_Hexagon_B32_PCREL_X; - case MCSymbolRefExpr::VK_Hexagon_GD_PLT: - return Hexagon::fixup_Hexagon_GD_PLT_B32_PCREL_X; - case MCSymbolRefExpr::VK_Hexagon_LD_PLT: - return Hexagon::fixup_Hexagon_LD_PLT_B32_PCREL_X; - - case MCSymbolRefExpr::VK_None: { - auto Insts = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle); - for (auto I = Insts.begin(), N = Insts.end(); I != N; ++I) { - if (I->getInst() == &MI) { - const MCInst &NextI = *(I+1)->getInst(); - const MCInstrDesc &D = HexagonMCInstrInfo::getDesc(MCII, NextI); - if (D.isBranch() || D.isCall() || - HexagonMCInstrInfo::getType(MCII, NextI) == HexagonII::TypeCR) - return Hexagon::fixup_Hexagon_B32_PCREL_X; - return Hexagon::fixup_Hexagon_32_6_X; - } + unsigned InsnType = HexagonMCInstrInfo::getType(MCII, MI); + using namespace Hexagon; + + if (InsnType == HexagonII::TypeEXTENDER) { + if (VarKind == MCSymbolRefExpr::VK_None) { + auto Instrs = HexagonMCInstrInfo::bundleInstructions(*State.Bundle); + for (auto I = Instrs.begin(), N = Instrs.end(); I != N; ++I) { + if (I->getInst() != &MI) + continue; + assert(I+1 != N && "Extender cannot be last in packet"); + const MCInst &NextI = *(I+1)->getInst(); + const MCInstrDesc &NextD = HexagonMCInstrInfo::getDesc(MCII, NextI); + if (NextD.isBranch() || NextD.isCall() || + HexagonMCInstrInfo::getType(MCII, NextI) == HexagonII::TypeCR) + return fixup_Hexagon_B32_PCREL_X; + return fixup_Hexagon_32_6_X; } - raise_relocation_error(0, kind); - } - default: - raise_relocation_error(0, kind); } - } else if (MCID.isBranch()) - return Hexagon::fixup_Hexagon_B13_PCREL; - switch (MCID.getOpcode()) { - case Hexagon::HI: - case Hexagon::A2_tfrih: - switch (kind) { - case MCSymbolRefExpr::VK_GOT: - return Hexagon::fixup_Hexagon_GOT_HI16; - case MCSymbolRefExpr::VK_GOTREL: - return Hexagon::fixup_Hexagon_GOTREL_HI16; - case MCSymbolRefExpr::VK_Hexagon_GD_GOT: - return Hexagon::fixup_Hexagon_GD_GOT_HI16; - case MCSymbolRefExpr::VK_Hexagon_LD_GOT: - return Hexagon::fixup_Hexagon_LD_GOT_HI16; - case MCSymbolRefExpr::VK_Hexagon_IE: - return Hexagon::fixup_Hexagon_IE_HI16; - case MCSymbolRefExpr::VK_Hexagon_IE_GOT: - return Hexagon::fixup_Hexagon_IE_GOT_HI16; - case MCSymbolRefExpr::VK_TPREL: - return Hexagon::fixup_Hexagon_TPREL_HI16; - case MCSymbolRefExpr::VK_DTPREL: - return Hexagon::fixup_Hexagon_DTPREL_HI16; - case MCSymbolRefExpr::VK_None: - return Hexagon::fixup_Hexagon_HI16; - default: - raise_relocation_error(0, kind); - } + static const std::map<unsigned,unsigned> Relocs = { + { MCSymbolRefExpr::VK_GOTREL, fixup_Hexagon_GOTREL_32_6_X }, + { MCSymbolRefExpr::VK_GOT, fixup_Hexagon_GOT_32_6_X }, + { MCSymbolRefExpr::VK_TPREL, fixup_Hexagon_TPREL_32_6_X }, + { MCSymbolRefExpr::VK_DTPREL, fixup_Hexagon_DTPREL_32_6_X }, + { MCSymbolRefExpr::VK_Hexagon_GD_GOT, fixup_Hexagon_GD_GOT_32_6_X }, + { MCSymbolRefExpr::VK_Hexagon_LD_GOT, fixup_Hexagon_LD_GOT_32_6_X }, + { MCSymbolRefExpr::VK_Hexagon_IE, fixup_Hexagon_IE_32_6_X }, + { MCSymbolRefExpr::VK_Hexagon_IE_GOT, fixup_Hexagon_IE_GOT_32_6_X }, + { MCSymbolRefExpr::VK_Hexagon_PCREL, fixup_Hexagon_B32_PCREL_X }, + { MCSymbolRefExpr::VK_Hexagon_GD_PLT, fixup_Hexagon_GD_PLT_B32_PCREL_X }, + { MCSymbolRefExpr::VK_Hexagon_LD_PLT, fixup_Hexagon_LD_PLT_B32_PCREL_X }, + }; + + auto F = Relocs.find(VarKind); + if (F != Relocs.end()) + return Hexagon::Fixups(F->second); + raise_relocation_error(0, VarKind); + } - case Hexagon::LO: - case Hexagon::A2_tfril: - switch (kind) { - case MCSymbolRefExpr::VK_GOT: - return Hexagon::fixup_Hexagon_GOT_LO16; - case MCSymbolRefExpr::VK_GOTREL: - return Hexagon::fixup_Hexagon_GOTREL_LO16; - case MCSymbolRefExpr::VK_Hexagon_GD_GOT: - return Hexagon::fixup_Hexagon_GD_GOT_LO16; - case MCSymbolRefExpr::VK_Hexagon_LD_GOT: - return Hexagon::fixup_Hexagon_LD_GOT_LO16; - case MCSymbolRefExpr::VK_Hexagon_IE: - return Hexagon::fixup_Hexagon_IE_LO16; - case MCSymbolRefExpr::VK_Hexagon_IE_GOT: - return Hexagon::fixup_Hexagon_IE_GOT_LO16; - case MCSymbolRefExpr::VK_TPREL: - return Hexagon::fixup_Hexagon_TPREL_LO16; - case MCSymbolRefExpr::VK_DTPREL: - return Hexagon::fixup_Hexagon_DTPREL_LO16; - case MCSymbolRefExpr::VK_None: - return Hexagon::fixup_Hexagon_LO16; - default: - raise_relocation_error(0, kind); - } + if (MCID.isBranch()) + return fixup_Hexagon_B13_PCREL; + + static const std::map<unsigned,unsigned> RelocsLo = { + { MCSymbolRefExpr::VK_GOT, fixup_Hexagon_GOT_LO16 }, + { MCSymbolRefExpr::VK_GOTREL, fixup_Hexagon_GOTREL_LO16 }, + { MCSymbolRefExpr::VK_Hexagon_GD_GOT, fixup_Hexagon_GD_GOT_LO16 }, + { MCSymbolRefExpr::VK_Hexagon_LD_GOT, fixup_Hexagon_LD_GOT_LO16 }, + { MCSymbolRefExpr::VK_Hexagon_IE, fixup_Hexagon_IE_LO16 }, + { MCSymbolRefExpr::VK_Hexagon_IE_GOT, fixup_Hexagon_IE_GOT_LO16 }, + { MCSymbolRefExpr::VK_TPREL, fixup_Hexagon_TPREL_LO16 }, + { MCSymbolRefExpr::VK_DTPREL, fixup_Hexagon_DTPREL_LO16 }, + { MCSymbolRefExpr::VK_None, fixup_Hexagon_LO16 }, + }; + + static const std::map<unsigned,unsigned> RelocsHi = { + { MCSymbolRefExpr::VK_GOT, fixup_Hexagon_GOT_HI16 }, + { MCSymbolRefExpr::VK_GOTREL, fixup_Hexagon_GOTREL_HI16 }, + { MCSymbolRefExpr::VK_Hexagon_GD_GOT, fixup_Hexagon_GD_GOT_HI16 }, + { MCSymbolRefExpr::VK_Hexagon_LD_GOT, fixup_Hexagon_LD_GOT_HI16 }, + { MCSymbolRefExpr::VK_Hexagon_IE, fixup_Hexagon_IE_HI16 }, + { MCSymbolRefExpr::VK_Hexagon_IE_GOT, fixup_Hexagon_IE_GOT_HI16 }, + { MCSymbolRefExpr::VK_TPREL, fixup_Hexagon_TPREL_HI16 }, + { MCSymbolRefExpr::VK_DTPREL, fixup_Hexagon_DTPREL_HI16 }, + { MCSymbolRefExpr::VK_None, fixup_Hexagon_HI16 }, + }; - // The only relocs left should be GP relative: - default: - if (MCID.mayStore() || MCID.mayLoad()) { - for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; - ++ImpUses) { - if (*ImpUses != Hexagon::GP) - continue; - switch (HexagonMCInstrInfo::getMemAccessSize(MCII, MI)) { - case 1: - return fixup_Hexagon_GPREL16_0; - case 2: - return fixup_Hexagon_GPREL16_1; - case 4: - return fixup_Hexagon_GPREL16_2; - case 8: - return fixup_Hexagon_GPREL16_3; - default: - raise_relocation_error(0, kind); - } - } + switch (MCID.getOpcode()) { + case Hexagon::LO: + case Hexagon::A2_tfril: { + auto F = RelocsLo.find(VarKind); + if (F != RelocsLo.end()) + return Hexagon::Fixups(F->second); + break; + } + case Hexagon::HI: + case Hexagon::A2_tfrih: { + auto F = RelocsHi.find(VarKind); + if (F != RelocsHi.end()) + return Hexagon::Fixups(F->second); + break; } - raise_relocation_error(0, kind); } - llvm_unreachable("Relocation exit not taken"); -} - -namespace llvm { - -extern const MCInstrDesc HexagonInsts[]; -} // end namespace llvm + raise_relocation_error(0, VarKind); +} -static bool isPCRel (unsigned Kind) { - switch(Kind){ +static bool isPCRel(unsigned Kind) { + switch (Kind){ case fixup_Hexagon_B22_PCREL: case fixup_Hexagon_B15_PCREL: case fixup_Hexagon_B7_PCREL: @@ -342,16 +584,34 @@ static bool isPCRel (unsigned Kind) { } unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, - const MCOperand &MO, - const MCExpr *ME, - SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const -{ + const MCOperand &MO, const MCExpr *ME, SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { if (isa<HexagonMCExpr>(ME)) ME = &HexagonMCInstrInfo::getExpr(*ME); int64_t Value; - if (ME->evaluateAsAbsolute(Value)) + if (ME->evaluateAsAbsolute(Value)) { + bool InstExtendable = HexagonMCInstrInfo::isExtendable(MCII, MI) || + HexagonMCInstrInfo::isExtended(MCII, MI); + // Only sub-instruction #1 can be extended in a duplex. If MI is a + // sub-instruction #0, it is not extended even if Extended is true + // (it can be true for the duplex as a whole). + bool IsSub0 = HexagonMCInstrInfo::isSubInstruction(MI) && !State.SubInst1; + if (State.Extended && InstExtendable && !IsSub0) { + unsigned OpIdx = ~0u; + for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { + if (&MO != &MI.getOperand(I)) + continue; + OpIdx = I; + break; + } + assert(OpIdx != ~0u); + if (OpIdx == HexagonMCInstrInfo::getExtendableOp(MCII, MI)) { + unsigned Shift = HexagonMCInstrInfo::getExtentAlignment(MCII, MI); + Value = (Value & 0x3f) << Shift; + } + } return Value; + } assert(ME->getKind() == MCExpr::SymbolRef || ME->getKind() == MCExpr::Binary); if (ME->getKind() == MCExpr::Binary) { @@ -360,366 +620,99 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI, getExprOpValue(MI, MO, Binary->getRHS(), Fixups, STI); return 0; } - Hexagon::Fixups FixupKind = - Hexagon::Fixups(Hexagon::fixup_Hexagon_TPREL_LO16); + + unsigned FixupKind = fixup_Invalid; const MCSymbolRefExpr *MCSRE = static_cast<const MCSymbolRefExpr *>(ME); const MCInstrDesc &MCID = HexagonMCInstrInfo::getDesc(MCII, MI); - unsigned bits = HexagonMCInstrInfo::getExtentBits(MCII, MI) - - HexagonMCInstrInfo::getExtentAlignment(MCII, MI); - const MCSymbolRefExpr::VariantKind kind = MCSRE->getKind(); - - DEBUG(dbgs() << "----------------------------------------\n"); - DEBUG(dbgs() << "Opcode Name: " << HexagonMCInstrInfo::getName(MCII, MI) - << "\n"); - DEBUG(dbgs() << "Opcode: " << MCID.getOpcode() << "\n"); - DEBUG(dbgs() << "Relocation bits: " << bits << "\n"); - DEBUG(dbgs() << "Addend: " << *Addend << "\n"); - DEBUG(dbgs() << "----------------------------------------\n"); - - switch (bits) { - default: - raise_relocation_error(bits, kind); - case 32: - switch (kind) { - case MCSymbolRefExpr::VK_DTPREL: - FixupKind = *Extended ? Hexagon::fixup_Hexagon_DTPREL_32_6_X - : Hexagon::fixup_Hexagon_DTPREL_32; - break; - case MCSymbolRefExpr::VK_GOT: - FixupKind = *Extended ? Hexagon::fixup_Hexagon_GOT_32_6_X - : Hexagon::fixup_Hexagon_GOT_32; - break; - case MCSymbolRefExpr::VK_GOTREL: - FixupKind = *Extended ? Hexagon::fixup_Hexagon_GOTREL_32_6_X - : Hexagon::fixup_Hexagon_GOTREL_32; - break; - case MCSymbolRefExpr::VK_Hexagon_GD_GOT: - FixupKind = *Extended ? Hexagon::fixup_Hexagon_GD_GOT_32_6_X - : Hexagon::fixup_Hexagon_GD_GOT_32; - break; - case MCSymbolRefExpr::VK_Hexagon_IE: - FixupKind = *Extended ? Hexagon::fixup_Hexagon_IE_32_6_X - : Hexagon::fixup_Hexagon_IE_32; - break; - case MCSymbolRefExpr::VK_Hexagon_IE_GOT: - FixupKind = *Extended ? Hexagon::fixup_Hexagon_IE_GOT_32_6_X - : Hexagon::fixup_Hexagon_IE_GOT_32; - break; - case MCSymbolRefExpr::VK_Hexagon_LD_GOT: - FixupKind = *Extended ? Hexagon::fixup_Hexagon_LD_GOT_32_6_X - : Hexagon::fixup_Hexagon_LD_GOT_32; - break; - case MCSymbolRefExpr::VK_Hexagon_PCREL: - FixupKind = Hexagon::fixup_Hexagon_32_PCREL; - break; - case MCSymbolRefExpr::VK_None: - FixupKind = - *Extended ? Hexagon::fixup_Hexagon_32_6_X : Hexagon::fixup_Hexagon_32; - break; - case MCSymbolRefExpr::VK_TPREL: - FixupKind = *Extended ? Hexagon::fixup_Hexagon_TPREL_32_6_X - : Hexagon::fixup_Hexagon_TPREL_32; - break; - default: - raise_relocation_error(bits, kind); - } - break; - - case 22: - switch (kind) { - case MCSymbolRefExpr::VK_Hexagon_GD_PLT: - FixupKind = *Extended ? Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL_X - : Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL; - break; - case MCSymbolRefExpr::VK_Hexagon_LD_PLT: - FixupKind = *Extended ? Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL_X - : Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL; - break; - case MCSymbolRefExpr::VK_None: - FixupKind = *Extended ? Hexagon::fixup_Hexagon_B22_PCREL_X - : Hexagon::fixup_Hexagon_B22_PCREL; - break; - case MCSymbolRefExpr::VK_PLT: - FixupKind = Hexagon::fixup_Hexagon_PLT_B22_PCREL; - break; - default: - raise_relocation_error(bits, kind); - } - break; - - case 16: - if (*Extended) { - switch (kind) { - case MCSymbolRefExpr::VK_DTPREL: - FixupKind = Hexagon::fixup_Hexagon_DTPREL_16_X; - break; - case MCSymbolRefExpr::VK_GOT: - FixupKind = Hexagon::fixup_Hexagon_GOT_16_X; - break; - case MCSymbolRefExpr::VK_GOTREL: - FixupKind = Hexagon::fixup_Hexagon_GOTREL_16_X; - break; - case MCSymbolRefExpr::VK_Hexagon_GD_GOT: - FixupKind = Hexagon::fixup_Hexagon_GD_GOT_16_X; - break; - case MCSymbolRefExpr::VK_Hexagon_IE: - FixupKind = Hexagon::fixup_Hexagon_IE_16_X; - break; - case MCSymbolRefExpr::VK_Hexagon_IE_GOT: - FixupKind = Hexagon::fixup_Hexagon_IE_GOT_16_X; - break; - case MCSymbolRefExpr::VK_Hexagon_LD_GOT: - FixupKind = Hexagon::fixup_Hexagon_LD_GOT_16_X; - break; - case MCSymbolRefExpr::VK_None: - FixupKind = Hexagon::fixup_Hexagon_16_X; - break; - case MCSymbolRefExpr::VK_TPREL: - FixupKind = Hexagon::fixup_Hexagon_TPREL_16_X; - break; - default: - raise_relocation_error(bits, kind); - } - } else - switch (kind) { - case MCSymbolRefExpr::VK_None: - if (HexagonMCInstrInfo::s27_2_reloc(*MO.getExpr())) - FixupKind = Hexagon::fixup_Hexagon_27_REG; - else - if (MCID.mayStore() || MCID.mayLoad()) { - for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; - ++ImpUses) { - if (*ImpUses != Hexagon::GP) - continue; - switch (HexagonMCInstrInfo::getMemAccessSize(MCII, MI)) { - case 1: - FixupKind = fixup_Hexagon_GPREL16_0; - break; - case 2: - FixupKind = fixup_Hexagon_GPREL16_1; - break; - case 4: - FixupKind = fixup_Hexagon_GPREL16_2; - break; - case 8: - FixupKind = fixup_Hexagon_GPREL16_3; - break; - default: - raise_relocation_error(bits, kind); - } - } - } else - raise_relocation_error(bits, kind); - break; - case MCSymbolRefExpr::VK_DTPREL: - FixupKind = Hexagon::fixup_Hexagon_DTPREL_16; - break; - case MCSymbolRefExpr::VK_GOTREL: - if (MCID.getOpcode() == Hexagon::HI) - FixupKind = Hexagon::fixup_Hexagon_GOTREL_HI16; - else - FixupKind = Hexagon::fixup_Hexagon_GOTREL_LO16; - break; - case MCSymbolRefExpr::VK_Hexagon_GD_GOT: - FixupKind = Hexagon::fixup_Hexagon_GD_GOT_16; - break; - case MCSymbolRefExpr::VK_Hexagon_GPREL: - FixupKind = Hexagon::fixup_Hexagon_GPREL16_0; - break; - case MCSymbolRefExpr::VK_Hexagon_HI16: - FixupKind = Hexagon::fixup_Hexagon_HI16; - break; - case MCSymbolRefExpr::VK_Hexagon_IE_GOT: - FixupKind = Hexagon::fixup_Hexagon_IE_GOT_16; - break; - case MCSymbolRefExpr::VK_Hexagon_LD_GOT: - FixupKind = Hexagon::fixup_Hexagon_LD_GOT_16; - break; - case MCSymbolRefExpr::VK_Hexagon_LO16: - FixupKind = Hexagon::fixup_Hexagon_LO16; - break; - case MCSymbolRefExpr::VK_TPREL: - FixupKind = Hexagon::fixup_Hexagon_TPREL_16; - break; - default: - raise_relocation_error(bits, kind); + unsigned FixupWidth = HexagonMCInstrInfo::getExtentBits(MCII, MI) - + HexagonMCInstrInfo::getExtentAlignment(MCII, MI); + MCSymbolRefExpr::VariantKind VarKind = MCSRE->getKind(); + unsigned Opc = MCID.getOpcode(); + unsigned IType = HexagonMCInstrInfo::getType(MCII, MI); + + LLVM_DEBUG(dbgs() << "----------------------------------------\n" + << "Opcode Name: " << HexagonMCInstrInfo::getName(MCII, MI) + << "\nOpcode: " << Opc << "\nRelocation bits: " + << FixupWidth << "\nAddend: " << State.Addend + << "\nVariant: " << unsigned(VarKind) + << "\n----------------------------------------\n"); + + // Pick the applicable fixup kind for the symbol. + // Handle special cases first, the rest will be looked up in the tables. + + if (FixupWidth == 16 && !State.Extended) { + if (VarKind == MCSymbolRefExpr::VK_None) { + if (HexagonMCInstrInfo::s27_2_reloc(*MO.getExpr())) { + // A2_iconst. + FixupKind = Hexagon::fixup_Hexagon_27_REG; + } else { + // Look for GP-relative fixups. + unsigned Shift = HexagonMCInstrInfo::getExtentAlignment(MCII, MI); + static const Hexagon::Fixups GPRelFixups[] = { + Hexagon::fixup_Hexagon_GPREL16_0, Hexagon::fixup_Hexagon_GPREL16_1, + Hexagon::fixup_Hexagon_GPREL16_2, Hexagon::fixup_Hexagon_GPREL16_3 + }; + assert(Shift < array_lengthof(GPRelFixups)); + auto UsesGP = [] (const MCInstrDesc &D) { + for (const MCPhysReg *U = D.getImplicitUses(); U && *U; ++U) + if (*U == Hexagon::GP) + return true; + return false; + }; + if (UsesGP(MCID)) + FixupKind = GPRelFixups[Shift]; } - break; - - case 15: - switch (kind) { - case MCSymbolRefExpr::VK_None: - FixupKind = *Extended ? Hexagon::fixup_Hexagon_B15_PCREL_X - : Hexagon::fixup_Hexagon_B15_PCREL; - break; - default: - raise_relocation_error(bits, kind); + } else if (VarKind == MCSymbolRefExpr::VK_GOTREL) { + // Select between LO/HI. + if (Opc == Hexagon::LO) + FixupKind = Hexagon::fixup_Hexagon_GOTREL_LO16; + else if (Opc == Hexagon::HI) + FixupKind = Hexagon::fixup_Hexagon_GOTREL_HI16; } - break; - - case 13: - switch (kind) { - case MCSymbolRefExpr::VK_None: - FixupKind = Hexagon::fixup_Hexagon_B13_PCREL; - break; - default: - raise_relocation_error(bits, kind); - } - break; - - case 12: - if (*Extended) - switch (kind) { - // There isn't a GOT_12_X, both 11_X and 16_X resolve to 6/26 - case MCSymbolRefExpr::VK_GOT: - FixupKind = Hexagon::fixup_Hexagon_GOT_16_X; - break; - case MCSymbolRefExpr::VK_GOTREL: - FixupKind = Hexagon::fixup_Hexagon_GOTREL_16_X; - break; - case MCSymbolRefExpr::VK_None: - FixupKind = Hexagon::fixup_Hexagon_12_X; - break; - default: - raise_relocation_error(bits, kind); - } - else - raise_relocation_error(bits, kind); - break; - - case 11: - if (*Extended) - switch (kind) { - case MCSymbolRefExpr::VK_DTPREL: - FixupKind = Hexagon::fixup_Hexagon_DTPREL_11_X; - break; - case MCSymbolRefExpr::VK_GOT: - FixupKind = Hexagon::fixup_Hexagon_GOT_11_X; - break; - case MCSymbolRefExpr::VK_GOTREL: - FixupKind = Hexagon::fixup_Hexagon_GOTREL_11_X; - break; - case MCSymbolRefExpr::VK_Hexagon_GD_GOT: - FixupKind = Hexagon::fixup_Hexagon_GD_GOT_11_X; - break; - case MCSymbolRefExpr::VK_Hexagon_IE_GOT: - FixupKind = Hexagon::fixup_Hexagon_IE_GOT_11_X; + } else { + bool BranchOrCR = MCID.isBranch() || IType == HexagonII::TypeCR; + switch (FixupWidth) { + case 9: + if (BranchOrCR) + FixupKind = State.Extended ? Hexagon::fixup_Hexagon_B9_PCREL_X + : Hexagon::fixup_Hexagon_B9_PCREL; + break; + case 8: + case 7: + if (State.Extended && VarKind == MCSymbolRefExpr::VK_GOT) + FixupKind = HexagonMCInstrInfo::isExtentSigned(MCII, MI) + ? Hexagon::fixup_Hexagon_GOT_16_X + : Hexagon::fixup_Hexagon_GOT_11_X; + else if (FixupWidth == 7 && BranchOrCR) + FixupKind = State.Extended ? Hexagon::fixup_Hexagon_B7_PCREL_X + : Hexagon::fixup_Hexagon_B7_PCREL; + break; + case 0: + FixupKind = getFixupNoBits(MCII, MI, MO, VarKind); break; - case MCSymbolRefExpr::VK_Hexagon_LD_GOT: - FixupKind = Hexagon::fixup_Hexagon_LD_GOT_11_X; - break; - case MCSymbolRefExpr::VK_Hexagon_GD_PLT: - FixupKind = Hexagon::fixup_Hexagon_GD_PLT_B22_PCREL_X; - break; - case MCSymbolRefExpr::VK_Hexagon_LD_PLT: - FixupKind = Hexagon::fixup_Hexagon_LD_PLT_B22_PCREL_X; - break; - case MCSymbolRefExpr::VK_None: - FixupKind = Hexagon::fixup_Hexagon_11_X; - break; - case MCSymbolRefExpr::VK_TPREL: - FixupKind = Hexagon::fixup_Hexagon_TPREL_11_X; - break; - default: - raise_relocation_error(bits, kind); - } - else { - switch (kind) { - case MCSymbolRefExpr::VK_TPREL: - FixupKind = Hexagon::fixup_Hexagon_TPREL_11_X; - break; - default: - raise_relocation_error(bits, kind); - } } - break; + } - case 10: - if (*Extended) { - switch (kind) { - case MCSymbolRefExpr::VK_None: - FixupKind = Hexagon::fixup_Hexagon_10_X; - break; - default: - raise_relocation_error(bits, kind); - } - } else - raise_relocation_error(bits, kind); - break; - - case 9: - if (MCID.isBranch() || - (HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR)) - FixupKind = *Extended ? Hexagon::fixup_Hexagon_B9_PCREL_X - : Hexagon::fixup_Hexagon_B9_PCREL; - else if (*Extended) - FixupKind = Hexagon::fixup_Hexagon_9_X; - else - raise_relocation_error(bits, kind); - break; - - case 8: - if (*Extended) - FixupKind = Hexagon::fixup_Hexagon_8_X; - else - raise_relocation_error(bits, kind); - break; - - case 7: - if (MCID.isBranch() || - (HexagonMCInstrInfo::getType(MCII, MI) == HexagonII::TypeCR)) - FixupKind = *Extended ? Hexagon::fixup_Hexagon_B7_PCREL_X - : Hexagon::fixup_Hexagon_B7_PCREL; - else if (*Extended) - FixupKind = Hexagon::fixup_Hexagon_7_X; - else - raise_relocation_error(bits, kind); - break; - - case 6: - if (*Extended) { - switch (kind) { - case MCSymbolRefExpr::VK_DTPREL: - FixupKind = Hexagon::fixup_Hexagon_DTPREL_16_X; - break; - // This is part of an extender, GOT_11 is a - // Word32_U6 unsigned/truncated reloc. - case MCSymbolRefExpr::VK_GOT: - FixupKind = Hexagon::fixup_Hexagon_GOT_11_X; - break; - case MCSymbolRefExpr::VK_GOTREL: - FixupKind = Hexagon::fixup_Hexagon_GOTREL_11_X; - break; - case MCSymbolRefExpr::VK_Hexagon_PCREL: - FixupKind = Hexagon::fixup_Hexagon_6_PCREL_X; - break; - case MCSymbolRefExpr::VK_TPREL: - FixupKind = Hexagon::fixup_Hexagon_TPREL_16_X; - break; - case MCSymbolRefExpr::VK_None: - FixupKind = Hexagon::fixup_Hexagon_6_X; - break; - default: - raise_relocation_error(bits, kind); - } - } else - raise_relocation_error(bits, kind); - break; + if (FixupKind == fixup_Invalid) { + const auto &FixupTable = State.Extended ? ExtFixups : StdFixups; - case 0: - FixupKind = getFixupNoBits(MCII, MI, MO, kind); - break; + auto FindVK = FixupTable.find(VarKind); + if (FindVK != FixupTable.end()) + FixupKind = FindVK->second[FixupWidth]; } - MCExpr const *FixupExpression = - (*Addend > 0 && isPCRel(FixupKind)) - ? MCBinaryExpr::createAdd(MO.getExpr(), - MCConstantExpr::create(*Addend, MCT), MCT) - : MO.getExpr(); + if (FixupKind == fixup_Invalid) + raise_relocation_error(FixupWidth, VarKind); - MCFixup fixup = MCFixup::create(*Addend, FixupExpression, + const MCExpr *FixupExpr = MO.getExpr(); + if (State.Addend != 0 && isPCRel(FixupKind)) { + const MCExpr *C = MCConstantExpr::create(State.Addend, MCT); + FixupExpr = MCBinaryExpr::createAdd(FixupExpr, C, MCT); + } + + MCFixup Fixup = MCFixup::create(State.Addend, FixupExpr, MCFixupKind(FixupKind), MI.getLoc()); - Fixups.push_back(fixup); + Fixups.push_back(Fixup); // All of the information is in the fixup. return 0; } @@ -739,55 +732,55 @@ HexagonMCCodeEmitter::getMachineOpValue(MCInst const &MI, MCOperand const &MO, #endif if (HexagonMCInstrInfo::isNewValue(MCII, MI) && - &MO == &MI.getOperand(HexagonMCInstrInfo::getNewValueOp(MCII, MI))) { + &MO == &HexagonMCInstrInfo::getNewValueOperand(MCII, MI)) { // Calculate the new value distance to the associated producer - MCOperand const &MCO = - MI.getOperand(HexagonMCInstrInfo::getNewValueOp(MCII, MI)); unsigned SOffset = 0; unsigned VOffset = 0; - unsigned Register = MCO.getReg(); - unsigned Register1; - unsigned Register2; - auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle); - auto i = Instructions.begin() + *CurrentIndex - 1; - for (;; --i) { - assert(i != Instructions.begin() - 1 && "Couldn't find producer"); - MCInst const &Inst = *i->getInst(); + unsigned UseReg = MO.getReg(); + unsigned DefReg1, DefReg2; + + auto Instrs = HexagonMCInstrInfo::bundleInstructions(*State.Bundle); + const MCOperand *I = Instrs.begin() + State.Index - 1; + + for (;; --I) { + assert(I != Instrs.begin() - 1 && "Couldn't find producer"); + MCInst const &Inst = *I->getInst(); if (HexagonMCInstrInfo::isImmext(Inst)) continue; + + DefReg1 = DefReg2 = 0; ++SOffset; - if (HexagonMCInstrInfo::isVector(MCII, Inst)) - // Vector instructions don't count scalars + if (HexagonMCInstrInfo::isVector(MCII, Inst)) { + // Vector instructions don't count scalars. ++VOffset; - Register1 = - HexagonMCInstrInfo::hasNewValue(MCII, Inst) - ? HexagonMCInstrInfo::getNewValueOperand(MCII, Inst).getReg() - : static_cast<unsigned>(Hexagon::NoRegister); - Register2 = - HexagonMCInstrInfo::hasNewValue2(MCII, Inst) - ? HexagonMCInstrInfo::getNewValueOperand2(MCII, Inst).getReg() - : static_cast<unsigned>(Hexagon::NoRegister); - if (!RegisterMatches(Register, Register1, Register2)) + } + if (HexagonMCInstrInfo::hasNewValue(MCII, Inst)) + DefReg1 = HexagonMCInstrInfo::getNewValueOperand(MCII, Inst).getReg(); + if (HexagonMCInstrInfo::hasNewValue2(MCII, Inst)) + DefReg2 = HexagonMCInstrInfo::getNewValueOperand2(MCII, Inst).getReg(); + if (!RegisterMatches(UseReg, DefReg1, DefReg2)) { // This isn't the register we're looking for continue; - if (!HexagonMCInstrInfo::isPredicated(MCII, Inst)) + } + if (!HexagonMCInstrInfo::isPredicated(MCII, Inst)) { // Producer is unpredicated break; + } assert(HexagonMCInstrInfo::isPredicated(MCII, MI) && - "Unpredicated consumer depending on predicated producer"); + "Unpredicated consumer depending on predicated producer"); if (HexagonMCInstrInfo::isPredicatedTrue(MCII, Inst) == - HexagonMCInstrInfo::isPredicatedTrue(MCII, MI)) - // Producer predicate sense matched ours + HexagonMCInstrInfo::isPredicatedTrue(MCII, MI)) + // Producer predicate sense matched ours. break; } // Hexagon PRM 10.11 Construct Nt from distance - unsigned Offset = - HexagonMCInstrInfo::isVector(MCII, MI) ? VOffset : SOffset; + unsigned Offset = HexagonMCInstrInfo::isVector(MCII, MI) ? VOffset + : SOffset; Offset <<= 1; - Offset |= - HexagonMCInstrInfo::SubregisterBit(Register, Register1, Register2); + Offset |= HexagonMCInstrInfo::SubregisterBit(UseReg, DefReg1, DefReg2); return Offset; } + assert(!MO.isImm()); if (MO.isReg()) { unsigned Reg = MO.getReg(); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h index 14cabf1534a5..fcea63db23a3 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// /// /// \file -/// \brief Definition for classes that emit Hexagon machine code from MCInsts +/// Definition for classes that emit Hexagon machine code from MCInsts /// //===----------------------------------------------------------------------===// @@ -35,25 +35,20 @@ class raw_ostream; class HexagonMCCodeEmitter : public MCCodeEmitter { MCContext &MCT; MCInstrInfo const &MCII; - std::unique_ptr<unsigned> Addend; - std::unique_ptr<bool> Extended; - std::unique_ptr<MCInst const *> CurrentBundle; - std::unique_ptr<size_t> CurrentIndex; - // helper routine for getMachineOpValue() - unsigned getExprOpValue(const MCInst &MI, const MCOperand &MO, - const MCExpr *ME, SmallVectorImpl<MCFixup> &Fixups, - const MCSubtargetInfo &STI) const; - - Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI, - const MCOperand &MO, - const MCSymbolRefExpr::VariantKind kind) const; + // A mutable state of the emitter when encoding bundles and duplexes. + struct EmitterState { + unsigned Addend = 0; + bool Extended = false; + bool SubInst1 = false; + const MCInst *Bundle = nullptr; + size_t Index = 0; + }; + mutable EmitterState State; public: - HexagonMCCodeEmitter(MCInstrInfo const &aMII, MCContext &aMCT); - - // Return parse bits for instruction `MCI' inside bundle `MCB' - uint32_t parseBits(size_t Last, MCInst const &MCB, MCInst const &MCI) const; + HexagonMCCodeEmitter(MCInstrInfo const &MII, MCContext &MCT) + : MCT(MCT), MCII(MII) {} void encodeInstruction(MCInst const &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, @@ -64,18 +59,30 @@ public: const MCSubtargetInfo &STI, uint32_t Parse) const; - // \brief TableGen'erated function for getting the + // TableGen'erated function for getting the // binary encoding for an instruction. uint64_t getBinaryCodeForInstr(MCInst const &MI, SmallVectorImpl<MCFixup> &Fixups, MCSubtargetInfo const &STI) const; - /// \brief Return binary encoding of operand. + /// Return binary encoding of operand. unsigned getMachineOpValue(MCInst const &MI, MCOperand const &MO, SmallVectorImpl<MCFixup> &Fixups, MCSubtargetInfo const &STI) const; private: + // helper routine for getMachineOpValue() + unsigned getExprOpValue(const MCInst &MI, const MCOperand &MO, + const MCExpr *ME, SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + + Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI, + const MCOperand &MO, + const MCSymbolRefExpr::VariantKind Kind) const; + + // Return parse bits for instruction `MCI' inside bundle `MCB' + uint32_t parseBits(size_t Last, MCInst const &MCB, MCInst const &MCI) const; + uint64_t computeAvailableFeatures(const FeatureBitset &FB) const; void verifyInstructionPredicates(const MCInst &MI, uint64_t AvailableFeatures) const; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp index 127c97e342dc..3eaef9ac7410 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp @@ -205,7 +205,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, switch (L.getOpcode()) { default: - DEBUG(dbgs() << "Possible compound ignored\n"); + LLVM_DEBUG(dbgs() << "Possible compound ignored\n"); return CompoundInsn; case Hexagon::A2_tfrsi: @@ -233,7 +233,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, break; case Hexagon::C2_cmpeq: - DEBUG(dbgs() << "CX: C2_cmpeq\n"); + LLVM_DEBUG(dbgs() << "CX: C2_cmpeq\n"); Rs = L.getOperand(1); Rt = L.getOperand(2); @@ -246,7 +246,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, break; case Hexagon::C2_cmpgt: - DEBUG(dbgs() << "CX: C2_cmpgt\n"); + LLVM_DEBUG(dbgs() << "CX: C2_cmpgt\n"); Rs = L.getOperand(1); Rt = L.getOperand(2); @@ -259,7 +259,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, break; case Hexagon::C2_cmpgtu: - DEBUG(dbgs() << "CX: C2_cmpgtu\n"); + LLVM_DEBUG(dbgs() << "CX: C2_cmpgtu\n"); Rs = L.getOperand(1); Rt = L.getOperand(2); @@ -272,7 +272,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, break; case Hexagon::C2_cmpeqi: - DEBUG(dbgs() << "CX: C2_cmpeqi\n"); + LLVM_DEBUG(dbgs() << "CX: C2_cmpeqi\n"); Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value); (void)Success; assert(Success); @@ -290,7 +290,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, break; case Hexagon::C2_cmpgti: - DEBUG(dbgs() << "CX: C2_cmpgti\n"); + LLVM_DEBUG(dbgs() << "CX: C2_cmpgti\n"); Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value); (void)Success; assert(Success); @@ -308,7 +308,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, break; case Hexagon::C2_cmpgtui: - DEBUG(dbgs() << "CX: C2_cmpgtui\n"); + LLVM_DEBUG(dbgs() << "CX: C2_cmpgtui\n"); Rs = L.getOperand(1); compoundOpcode = cmpgtuiBitOpcode[getCompoundOp(R)]; CompoundInsn = new (Context) MCInst; @@ -319,7 +319,7 @@ static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, break; case Hexagon::S2_tstbit_i: - DEBUG(dbgs() << "CX: S2_tstbit_i\n"); + LLVM_DEBUG(dbgs() << "CX: S2_tstbit_i\n"); Rs = L.getOperand(1); compoundOpcode = tstBitOpcode[getCompoundOp(R)]; CompoundInsn = new (Context) MCInst; @@ -372,14 +372,14 @@ static bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, BExtended = true; continue; } - DEBUG(dbgs() << "J,B: " << JumpInst->getOpcode() << "," - << Inst->getOpcode() << "\n"); + LLVM_DEBUG(dbgs() << "J,B: " << JumpInst->getOpcode() << "," + << Inst->getOpcode() << "\n"); if (isOrderedCompoundPair(*Inst, BExtended, *JumpInst, JExtended)) { MCInst *CompoundInsn = getCompoundInsn(Context, *Inst, *JumpInst); if (CompoundInsn) { - DEBUG(dbgs() << "B: " << Inst->getOpcode() << "," - << JumpInst->getOpcode() << " Compounds to " - << CompoundInsn->getOpcode() << "\n"); + LLVM_DEBUG(dbgs() << "B: " << Inst->getOpcode() << "," + << JumpInst->getOpcode() << " Compounds to " + << CompoundInsn->getOpcode() << "\n"); J->setInst(CompoundInsn); MCI.erase(B); return true; @@ -422,7 +422,7 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, MCSubtargetInfo co if (StartedValid && !llvm::HexagonMCShuffle(Context, false, MCII, STI, MCI)) { - DEBUG(dbgs() << "Found ERROR\n"); + LLVM_DEBUG(dbgs() << "Found ERROR\n"); MCI = OriginalBundle; } } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp index 4c18af60efd1..b208a3668124 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp @@ -263,12 +263,10 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) { break; case Hexagon::L4_return: - case Hexagon::L2_deallocframe: - return HexagonII::HSIG_L2; - case Hexagon::EH_RETURN_JMPR: + case Hexagon::EH_RETURN_JMPR: case Hexagon::J2_jumpr: case Hexagon::PS_jmpret: // jumpr r31 @@ -789,12 +787,12 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { addOps(Result, Inst, 2); break; // 1,3 SUBInst $Rdd = combine(#2, #$u2) } + break; case Hexagon::A4_combineir: Result.setOpcode(Hexagon::SA1_combinezr); addOps(Result, Inst, 0); addOps(Result, Inst, 2); break; // 1,3 SUBInst $Rdd = combine(#0, $Rs) - case Hexagon::A4_combineri: Result.setOpcode(Hexagon::SA1_combinerz); addOps(Result, Inst, 0); @@ -901,6 +899,7 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { addOps(Result, Inst, 1); break; // 2 1,2 SUBInst memb($Rs + #$u4_0)=#1 } + break; case Hexagon::S2_storerb_io: Result.setOpcode(Hexagon::SS1_storeb_io); addOps(Result, Inst, 0); @@ -937,6 +936,7 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) { addOps(Result, Inst, 2); break; // 1 2,3 SUBInst memw(r29 + #$u5_2) = $Rt } + break; case Hexagon::S2_storeri_io: if (Inst.getOperand(0).getReg() == Hexagon::R29) { Result.setOpcode(Hexagon::SS2_storew_sp); @@ -1045,8 +1045,8 @@ HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII, bool bisReversable = true; if (isStoreInst(MCB.getOperand(j).getInst()->getOpcode()) && isStoreInst(MCB.getOperand(k).getInst()->getOpcode())) { - DEBUG(dbgs() << "skip out of order write pair: " << k << "," << j - << "\n"); + LLVM_DEBUG(dbgs() << "skip out of order write pair: " << k << "," << j + << "\n"); bisReversable = false; } if (HexagonMCInstrInfo::isMemReorderDisabled(MCB)) // }:mem_noshuf @@ -1066,14 +1066,14 @@ HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII, // Save off pairs for duplex checking. duplexToTry.push_back(DuplexCandidate(j, k, iClass)); - DEBUG(dbgs() << "adding pair: " << j << "," << k << ":" - << MCB.getOperand(j).getInst()->getOpcode() << "," - << MCB.getOperand(k).getInst()->getOpcode() << "\n"); + LLVM_DEBUG(dbgs() << "adding pair: " << j << "," << k << ":" + << MCB.getOperand(j).getInst()->getOpcode() << "," + << MCB.getOperand(k).getInst()->getOpcode() << "\n"); continue; } else { - DEBUG(dbgs() << "skipping pair: " << j << "," << k << ":" - << MCB.getOperand(j).getInst()->getOpcode() << "," - << MCB.getOperand(k).getInst()->getOpcode() << "\n"); + LLVM_DEBUG(dbgs() << "skipping pair: " << j << "," << k << ":" + << MCB.getOperand(j).getInst()->getOpcode() << "," + << MCB.getOperand(k).getInst()->getOpcode() << "\n"); } // Try reverse. @@ -1091,13 +1091,15 @@ HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII, // Save off pairs for duplex checking. duplexToTry.push_back(DuplexCandidate(k, j, iClass)); - DEBUG(dbgs() << "adding pair:" << k << "," << j << ":" - << MCB.getOperand(j).getInst()->getOpcode() << "," - << MCB.getOperand(k).getInst()->getOpcode() << "\n"); + LLVM_DEBUG(dbgs() + << "adding pair:" << k << "," << j << ":" + << MCB.getOperand(j).getInst()->getOpcode() << "," + << MCB.getOperand(k).getInst()->getOpcode() << "\n"); } else { - DEBUG(dbgs() << "skipping pair: " << k << "," << j << ":" - << MCB.getOperand(j).getInst()->getOpcode() << "," - << MCB.getOperand(k).getInst()->getOpcode() << "\n"); + LLVM_DEBUG(dbgs() + << "skipping pair: " << k << "," << j << ":" + << MCB.getOperand(j).getInst()->getOpcode() << "," + << MCB.getOperand(k).getInst()->getOpcode() << "\n"); } } } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp index 691e269cb91f..f304bc50530f 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp @@ -25,6 +25,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" @@ -47,15 +48,15 @@ static cl::opt<unsigned> GPSize HexagonMCELFStreamer::HexagonMCELFStreamer( MCContext &Context, std::unique_ptr<MCAsmBackend> TAB, - raw_pwrite_stream &OS, std::unique_ptr<MCCodeEmitter> Emitter) - : MCELFStreamer(Context, std::move(TAB), OS, std::move(Emitter)), + std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter) + : MCELFStreamer(Context, std::move(TAB), std::move(OW), std::move(Emitter)), MCII(createHexagonMCInstrInfo()) {} HexagonMCELFStreamer::HexagonMCELFStreamer( MCContext &Context, std::unique_ptr<MCAsmBackend> TAB, - raw_pwrite_stream &OS, std::unique_ptr<MCCodeEmitter> Emitter, + std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter, MCAssembler *Assembler) - : MCELFStreamer(Context, std::move(TAB), OS, std::move(Emitter)), + : MCELFStreamer(Context, std::move(TAB), std::move(OW), std::move(Emitter)), MCII(createHexagonMCInstrInfo()) {} void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCB, @@ -63,21 +64,6 @@ void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCB, assert(MCB.getOpcode() == Hexagon::BUNDLE); assert(HexagonMCInstrInfo::bundleSize(MCB) <= HEXAGON_PACKET_SIZE); assert(HexagonMCInstrInfo::bundleSize(MCB) > 0); - bool Extended = false; - for (auto &I : HexagonMCInstrInfo::bundleInstructions(MCB)) { - MCInst *MCI = const_cast<MCInst *>(I.getInst()); - if (Extended) { - if (HexagonMCInstrInfo::isDuplex(*MCII, *MCI)) { - MCInst *SubInst = const_cast<MCInst *>(MCI->getOperand(1).getInst()); - HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *SubInst); - } else { - HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *MCI); - } - Extended = false; - } else { - Extended = HexagonMCInstrInfo::isImmext(*MCI); - } - } // At this point, MCB is a bundle // Iterate through the bundle and assign addends for the instructions @@ -124,7 +110,7 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol, MCSectionSubPair P = getCurrentSection(); SwitchSection(&Section); - if (ELFSymbol->isUndefined(false)) { + if (ELFSymbol->isUndefined()) { EmitValueToAlignment(ByteAlignment, 0, 1, 0); EmitLabel(Symbol); EmitZeros(Size); @@ -166,9 +152,10 @@ void HexagonMCELFStreamer::HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol, namespace llvm { MCStreamer *createHexagonELFStreamer(Triple const &TT, MCContext &Context, std::unique_ptr<MCAsmBackend> MAB, - raw_pwrite_stream &OS, + std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> CE) { - return new HexagonMCELFStreamer(Context, std::move(MAB), OS, std::move(CE)); + return new HexagonMCELFStreamer(Context, std::move(MAB), std::move(OW), + std::move(CE)); } } // end namespace llvm diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h index c6fa0021d86b..c02bef8f06f7 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h @@ -23,11 +23,11 @@ class HexagonMCELFStreamer : public MCELFStreamer { public: HexagonMCELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB, - raw_pwrite_stream &OS, + std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter); HexagonMCELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB, - raw_pwrite_stream &OS, + std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> Emitter, MCAssembler *Assembler); @@ -43,7 +43,7 @@ public: MCStreamer *createHexagonELFStreamer(Triple const &TT, MCContext &Context, std::unique_ptr<MCAsmBackend> MAB, - raw_pwrite_stream &OS, + std::unique_ptr<MCObjectWriter> OW, std::unique_ptr<MCCodeEmitter> CE); } // end namespace llvm diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp index 19308cd425e8..a11aa92ccbe1 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -158,23 +158,6 @@ bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII, return true; } -void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII, - MCContext &Context, MCInst &MCI) { - assert(HexagonMCInstrInfo::isExtendable(MCII, MCI) || - HexagonMCInstrInfo::isExtended(MCII, MCI)); - MCOperand &exOp = - MCI.getOperand(HexagonMCInstrInfo::getExtendableOp(MCII, MCI)); - // If the extended value is a constant, then use it for the extended and - // for the extender instructions, masking off the lower 6 bits and - // including the assumed bits. - int64_t Value; - if (exOp.getExpr()->evaluateAsAbsolute(Value)) { - unsigned Shift = HexagonMCInstrInfo::getExtentAlignment(MCII, MCI); - exOp.setExpr(HexagonMCExpr::create( - MCConstantExpr::create((Value & 0x3f) << Shift, Context), Context)); - } -} - MCInst HexagonMCInstrInfo::deriveExtender(MCInstrInfo const &MCII, MCInst const &Inst, MCOperand const &MO) { @@ -330,16 +313,19 @@ unsigned HexagonMCInstrInfo::getExtentBits(MCInstrInfo const &MCII, return ((F >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask); } +bool HexagonMCInstrInfo::isExtentSigned(MCInstrInfo const &MCII, + MCInst const &MCI) { + const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; + return (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask; +} + /// Return the maximum value of an extendable operand. int HexagonMCInstrInfo::getMaxValue(MCInstrInfo const &MCII, MCInst const &MCI) { - const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; - bool S = (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask; - assert(HexagonMCInstrInfo::isExtendable(MCII, MCI) || HexagonMCInstrInfo::isExtended(MCII, MCI)); - if (S) // if value is signed + if (HexagonMCInstrInfo::isExtentSigned(MCII, MCI)) // if value is signed return (1 << (HexagonMCInstrInfo::getExtentBits(MCII, MCI) - 1)) - 1; return (1 << HexagonMCInstrInfo::getExtentBits(MCII, MCI)) - 1; } @@ -347,13 +333,10 @@ int HexagonMCInstrInfo::getMaxValue(MCInstrInfo const &MCII, /// Return the minimum value of an extendable operand. int HexagonMCInstrInfo::getMinValue(MCInstrInfo const &MCII, MCInst const &MCI) { - const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; - bool S = (F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask; - assert(HexagonMCInstrInfo::isExtendable(MCII, MCI) || HexagonMCInstrInfo::isExtended(MCII, MCI)); - if (S) // if value is signed + if (HexagonMCInstrInfo::isExtentSigned(MCII, MCI)) // if value is signed return -(1 << (HexagonMCInstrInfo::getExtentBits(MCII, MCI) - 1)); return 0; } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h index 28d89429266b..d040bea23b6d 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -103,9 +103,6 @@ MCInst deriveExtender(MCInstrInfo const &MCII, MCInst const &Inst, // Convert this instruction in to a duplex subinst MCInst deriveSubInst(MCInst const &Inst); -// Clamp off upper 26 bits of extendable operand for emission -void clampExtended(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI); - // Return the extender for instruction at Index or nullptr if none MCInst const *extenderForIndex(MCInst const &MCB, size_t Index); void extendIfNeeded(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB, @@ -143,6 +140,9 @@ unsigned getExtentAlignment(MCInstrInfo const &MCII, MCInst const &MCI); // Return the number of logical bits of the extendable operand unsigned getExtentBits(MCInstrInfo const &MCII, MCInst const &MCI); +// Check if the extendable operand is signed. +bool isExtentSigned(MCInstrInfo const &MCII, MCInst const &MCI); + // Return the max value that a constant extendable operand can have // without being extended. int getMaxValue(MCInstrInfo const &MCII, MCInst const &MCI); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp index 7bd54fdfa3d5..4281144acaee 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.cpp @@ -38,7 +38,8 @@ void HexagonMCShuffler::init(MCInst &MCB) { // Copy the bundle for the shuffling. for (const auto &I : HexagonMCInstrInfo::bundleInstructions(MCB)) { MCInst &MI = *const_cast<MCInst *>(I.getInst()); - DEBUG(dbgs() << "Shuffling: " << MCII.getName(MI.getOpcode()) << '\n'); + LLVM_DEBUG(dbgs() << "Shuffling: " << MCII.getName(MI.getOpcode()) + << '\n'); assert(!HexagonMCInstrInfo::getDesc(MCII, MI).isPseudo()); if (!HexagonMCInstrInfo::isImmext(MI)) { @@ -98,7 +99,7 @@ bool HexagonMCShuffler::reshuffleTo(MCInst &MCB) { copyTo(MCB); return true; } - DEBUG(MCB.dump()); + LLVM_DEBUG(MCB.dump()); return false; } @@ -119,10 +120,10 @@ bool llvm::HexagonMCShuffle(MCContext &Context, bool Fatal, // * %d7 = IMPLICIT_DEF; flags: // After the IMPLICIT_DEFs were removed by the asm printer, the bundle // became empty. - DEBUG(dbgs() << "Skipping empty bundle"); + LLVM_DEBUG(dbgs() << "Skipping empty bundle"); return false; } else if (!HexagonMCInstrInfo::isBundle(MCB)) { - DEBUG(dbgs() << "Skipping stand-alone insn"); + LLVM_DEBUG(dbgs() << "Skipping stand-alone insn"); return false; } @@ -144,10 +145,10 @@ llvm::HexagonMCShuffle(MCContext &Context, MCInstrInfo const &MCII, // * %d7 = IMPLICIT_DEF; flags: // After the IMPLICIT_DEFs were removed by the asm printer, the bundle // became empty. - DEBUG(dbgs() << "Skipping empty bundle"); + LLVM_DEBUG(dbgs() << "Skipping empty bundle"); return false; } else if (!HexagonMCInstrInfo::isBundle(MCB)) { - DEBUG(dbgs() << "Skipping stand-alone insn"); + LLVM_DEBUG(dbgs() << "Skipping stand-alone insn"); return false; } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 3fbe2197f937..b211a81524fb 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -29,6 +29,7 @@ #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -147,7 +148,7 @@ public: auto PacketBundle = Contents.rsplit('\n'); auto HeadTail = PacketBundle.first.split('\n'); StringRef Separator = "\n"; - StringRef Indent = "\t\t"; + StringRef Indent = "\t"; OS << "\t{\n"; while (!HeadTail.first.empty()) { StringRef InstTxt; @@ -164,7 +165,7 @@ public: } if (HexagonMCInstrInfo::isMemReorderDisabled(Inst)) - OS << "\n\t}:mem_noshuf" << PacketBundle.second; + OS << "\n\t} :mem_noshuf" << PacketBundle.second; else OS << "\t}" << PacketBundle.second; } @@ -248,10 +249,10 @@ createMCAsmTargetStreamer(MCStreamer &S, formatted_raw_ostream &OS, static MCStreamer *createMCStreamer(Triple const &T, MCContext &Context, std::unique_ptr<MCAsmBackend> &&MAB, - raw_pwrite_stream &OS, + std::unique_ptr<MCObjectWriter> &&OW, std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll) { - return createHexagonELFStreamer(T, Context, std::move(MAB), OS, + return createHexagonELFStreamer(T, Context, std::move(MAB), std::move(OW), std::move(Emitter)); } @@ -308,6 +309,7 @@ static bool isCPUValid(std::string CPU) { std::vector<std::string> table { + "generic", "hexagonv4", "hexagonv5", "hexagonv55", @@ -342,8 +344,7 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) { break; } bool UseHvx = false; - for (unsigned F : {ExtensionHVX, ExtensionHVX64B, ExtensionHVX128B, - ExtensionHVXDbl}) { + for (unsigned F : {ExtensionHVX, ExtensionHVX64B, ExtensionHVX128B}) { if (!FB.test(F)) continue; UseHvx = true; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h index 05d17c368dcc..6cd1b3a4691f 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h @@ -27,7 +27,7 @@ class MCAsmBackend; class MCCodeEmitter; class MCContext; class MCInstrInfo; -class MCObjectWriter; +class MCObjectTargetWriter; class MCRegisterInfo; class MCSubtargetInfo; class MCTargetOptions; @@ -61,13 +61,12 @@ MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII, MCContext &MCT); MCAsmBackend *createHexagonAsmBackend(const Target &T, + const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, - const Triple &TT, StringRef CPU, const MCTargetOptions &Options); -std::unique_ptr<MCObjectWriter> -createHexagonELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, - StringRef CPU); +std::unique_ptr<MCObjectTargetWriter> +createHexagonELFObjectWriter(uint8_t OSABI, StringRef CPU); unsigned HexagonGetLastSlot(); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp index 7709a0f61624..59f3caa6af94 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -641,14 +641,14 @@ bool HexagonShuffler::shuffle() { } for (iterator ISJ = begin(); ISJ != end(); ++ISJ) - DEBUG(dbgs().write_hex(ISJ->Core.getUnits()); if (ISJ->CVI.isValid()) { + LLVM_DEBUG(dbgs().write_hex(ISJ->Core.getUnits()); if (ISJ->CVI.isValid()) { dbgs() << '/'; dbgs().write_hex(ISJ->CVI.getUnits()) << '|'; dbgs() << ISJ->CVI.getLanes(); } dbgs() << ':' << HexagonMCInstrInfo::getDesc(MCII, ISJ->getDesc()).getOpcode(); - dbgs() << '\n'); - DEBUG(dbgs() << '\n'); + dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); return Ok; } diff --git a/lib/Target/Hexagon/RDFCopy.cpp b/lib/Target/Hexagon/RDFCopy.cpp index f8c766ac972c..4339fa2089d9 100644 --- a/lib/Target/Hexagon/RDFCopy.cpp +++ b/lib/Target/Hexagon/RDFCopy.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/MCRegisterInfo.h" @@ -103,7 +104,7 @@ bool CopyPropagation::run() { if (trace()) { dbgs() << "Copies:\n"; - for (auto I : Copies) { + for (NodeId I : Copies) { dbgs() << "Instr: " << *DFG.addr<StmtNode*>(I).Addr->getCode(); dbgs() << " eq: {"; for (auto J : CopyMap[I]) @@ -130,7 +131,7 @@ bool CopyPropagation::run() { return 0; }; - for (auto C : Copies) { + for (NodeId C : Copies) { #ifndef NDEBUG if (HasLimit && CpCount >= CpLimit) break; diff --git a/lib/Target/Hexagon/RDFDeadCode.cpp b/lib/Target/Hexagon/RDFDeadCode.cpp index 240d7c355bc7..da339bfd3ff4 100644 --- a/lib/Target/Hexagon/RDFDeadCode.cpp +++ b/lib/Target/Hexagon/RDFDeadCode.cpp @@ -214,7 +214,7 @@ bool DeadCodeElimination::erase(const SetVector<NodeId> &Nodes) { return false; return A.Id < B.Id; }; - std::sort(DRNs.begin(), DRNs.end(), UsesFirst); + llvm::sort(DRNs.begin(), DRNs.end(), UsesFirst); if (trace()) dbgs() << "Removing dead ref nodes:\n"; diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp index d1f6e5a4c8ef..3d1ec31dada7 100644 --- a/lib/Target/Hexagon/RDFGraph.cpp +++ b/lib/Target/Hexagon/RDFGraph.cpp @@ -893,7 +893,7 @@ void DataFlowGraph::build(unsigned Options) { NodeAddr<BlockNode*> BA = newBlock(Func, &B); BlockNodes.insert(std::make_pair(&B, BA)); for (MachineInstr &I : B) { - if (I.isDebugValue()) + if (I.isDebugInstr()) continue; buildStmt(BA, I); } @@ -1471,7 +1471,7 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs, // and add a def for each S in the closure. // Sort the refs so that the phis will be created in a deterministic order. - std::sort(MaxRefs.begin(), MaxRefs.end()); + llvm::sort(MaxRefs.begin(), MaxRefs.end()); // Remove duplicates. auto NewEnd = std::unique(MaxRefs.begin(), MaxRefs.end()); MaxRefs.erase(NewEnd, MaxRefs.end()); diff --git a/lib/Target/Hexagon/RDFLiveness.cpp b/lib/Target/Hexagon/RDFLiveness.cpp index 13d9a1741978..c257d754ddf9 100644 --- a/lib/Target/Hexagon/RDFLiveness.cpp +++ b/lib/Target/Hexagon/RDFLiveness.cpp @@ -207,7 +207,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR, }; std::vector<NodeId> Tmp(Owners.begin(), Owners.end()); - std::sort(Tmp.begin(), Tmp.end(), Less); + llvm::sort(Tmp.begin(), Tmp.end(), Less); // The vector is a list of instructions, so that defs coming from // the same instruction don't need to be artificially ordered. @@ -628,7 +628,7 @@ void Liveness::computePhiInfo() { // Collect the set PropUp of uses that are reached by the current // phi PA, and are not covered by any intervening def between the - // currently visited use UA and the the upward phi P. + // currently visited use UA and the upward phi P. if (MidDefs.hasCoverOf(UR)) continue; @@ -813,7 +813,7 @@ void Liveness::computeLiveIns() { std::vector<RegisterRef> LV; for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I) LV.push_back(RegisterRef(I->PhysReg, I->LaneMask)); - std::sort(LV.begin(), LV.end()); + llvm::sort(LV.begin(), LV.end()); dbgs() << printMBBReference(B) << "\t rec = {"; for (auto I : LV) dbgs() << ' ' << Print<RegisterRef>(I, DFG); @@ -824,7 +824,7 @@ void Liveness::computeLiveIns() { const RegisterAggr &LG = LiveMap[&B]; for (auto I = LG.rr_begin(), E = LG.rr_end(); I != E; ++I) LV.push_back(*I); - std::sort(LV.begin(), LV.end()); + llvm::sort(LV.begin(), LV.end()); dbgs() << "\tcomp = {"; for (auto I : LV) dbgs() << ' ' << Print<RegisterRef>(I, DFG); @@ -880,7 +880,7 @@ void Liveness::resetKills(MachineBasicBlock *B) { for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) { MachineInstr *MI = &*I; - if (MI->isDebugValue()) + if (MI->isDebugInstr()) continue; MI->clearKillInfo(); diff --git a/lib/Target/Hexagon/RDFLiveness.h b/lib/Target/Hexagon/RDFLiveness.h index 8cfb6a1e9554..eaeb4ea115b3 100644 --- a/lib/Target/Hexagon/RDFLiveness.h +++ b/lib/Target/Hexagon/RDFLiveness.h @@ -53,8 +53,8 @@ namespace rdf { using RefMap = std::map<RegisterId, NodeRefSet>; Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g) - : DFG(g), TRI(g.getTRI()), PRI(g.getPRI()), MDT(g.getDT()), - MDF(g.getDF()), LiveMap(g.getPRI()), NoRegs(g.getPRI()) {} + : DFG(g), TRI(g.getTRI()), PRI(g.getPRI()), MDT(g.getDT()), + MDF(g.getDF()), LiveMap(g.getPRI()), Empty(), NoRegs(g.getPRI()) {} NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA, bool TopShadows, bool FullChain, const RegisterAggr &DefRRs); diff --git a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp index a330f27ed300..78e2f2b2ddb3 100644 --- a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp +++ b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp @@ -18,6 +18,6 @@ Target &llvm::getTheHexagonTarget() { } extern "C" void LLVMInitializeHexagonTargetInfo() { - RegisterTarget<Triple::hexagon, /*HasJIT=*/false> X( + RegisterTarget<Triple::hexagon, /*HasJIT=*/true> X( getTheHexagonTarget(), "hexagon", "Hexagon", "Hexagon"); } |