diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-01-04 22:11:11 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-01-04 22:11:11 +0000 |
commit | c82ad72f63369bc462e59458f09960d66daa58a9 (patch) | |
tree | 58bc455a8d052220f9ae11e65d6f06d671a7a4c4 /lib/Target | |
parent | b915e9e0fc85ba6f398b3fab0db6a81a8913af94 (diff) | |
download | src-c82ad72f63369bc462e59458f09960d66daa58a9.tar.gz src-c82ad72f63369bc462e59458f09960d66daa58a9.zip |
Notes
Diffstat (limited to 'lib/Target')
36 files changed, 749 insertions, 896 deletions
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index c40391d5ad9d..740766b151bb 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -264,9 +264,13 @@ def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", "Qualcomm Falkor processors", [ FeatureCRC, FeatureCrypto, + FeatureCustomCheapAsMoveHandling, FeatureFPARMv8, FeatureNEON, - FeaturePerfMon + FeaturePerfMon, + FeaturePostRAScheduler, + FeaturePredictableSelectIsExpensive, + FeatureZCZeroing ]>; def ProcVulcan : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan", diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index b2d96a32fd3a..efc221893782 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -76,7 +76,6 @@ public: void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); - void EmitXRayTable(); void EmitSled(const MachineInstr &MI, SledKind Kind); /// \brief tblgen'erated driver function for lowering simple MI->MC @@ -95,7 +94,7 @@ public: AArch64FI = F.getInfo<AArch64FunctionInfo>(); STI = static_cast<const AArch64Subtarget*>(&F.getSubtarget()); bool Result = AsmPrinter::runOnMachineFunction(F); - EmitXRayTable(); + emitXRayTable(); return Result; } @@ -150,59 +149,6 @@ void AArch64AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) EmitSled(MI, SledKind::TAIL_CALL); } -void AArch64AsmPrinter::EmitXRayTable() -{ - //TODO: merge the logic for ELF XRay sleds at a higher level, so to avoid - // code duplication as it is now for x86_64, ARM32 and AArch64. - if (Sleds.empty()) - return; - - auto PrevSection = OutStreamer->getCurrentSectionOnly(); - auto Fn = MF->getFunction(); - MCSection *Section; - - if (STI->isTargetELF()) { - if (Fn->hasComdat()) - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, - Fn->getComdat()->getName()); - else - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC); - } else if (STI->isTargetMachO()) { - Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, - SectionKind::getReadOnlyWithRel()); - } else { - llvm_unreachable("Unsupported target"); - } - - // Before we switch over, we force a reference to a label inside the - // xray_instr_map section. Since EmitXRayTable() is always called just - // before the function's end, we assume that this is happening after the - // last return instruction. - // - // We then align the reference to 16 byte boundaries, which we determined - // experimentally to be beneficial to avoid causing decoder stalls. - MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true); - OutStreamer->EmitCodeAlignment(16); - OutStreamer->EmitSymbolValue(Tmp, 8, false); - OutStreamer->SwitchSection(Section); - OutStreamer->EmitLabel(Tmp); - for (const auto &Sled : Sleds) { - OutStreamer->EmitSymbolValue(Sled.Sled, 8); - OutStreamer->EmitSymbolValue(CurrentFnSym, 8); - auto Kind = static_cast<uint8_t>(Sled.Kind); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast<const char *>(&Kind), 1)); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1)); - OutStreamer->EmitZeros(14); - } - OutStreamer->SwitchSection(PrevSection); - - Sleds.clear(); -} - void AArch64AsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) { static const int8_t NoopsInSledCount = 7; diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index dcb05601e5f4..8a76c42b5898 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1470,6 +1470,9 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { bool IsUnscaled = TII->isUnscaledLdSt(MI); int Offset = getLdStOffsetOp(MI).getImm(); int OffsetStride = IsUnscaled ? getMemScale(MI) : 1; + // Allow one more for offset. + if (Offset > 0) + Offset -= OffsetStride; if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride)) return false; diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index a87204d46eae..0b0a0e7d083e 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3048,6 +3048,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(KILL) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; NODE_NAME_CASE(SENDMSG) + NODE_NAME_CASE(SENDMSGHALT) NODE_NAME_CASE(INTERP_MOV) NODE_NAME_CASE(INTERP_P1) NODE_NAME_CASE(INTERP_P2) diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index 5cc5efb331e3..745c9923de2e 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -313,6 +313,7 @@ enum NodeType : unsigned { /// Pointer to the start of the shader's constant data. CONST_DATA_PTR, SENDMSG, + SENDMSGHALT, INTERP_MOV, INTERP_P1, INTERP_P2, diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td index e7b40016e272..f079c8d0c70c 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -266,6 +266,10 @@ def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG", SDTypeProfile<0, 1, [SDTCisInt<0>]>, [SDNPHasChain, SDNPInGlue]>; +def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT", + SDTypeProfile<0, 1, [SDTCisInt<0>]>, + [SDNPHasChain, SDNPInGlue]>; + def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV", SDTypeProfile<1, 3, [SDTCisFP<0>]>, [SDNPInGlue]>; diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index fa53831cbe16..c78e97dfd46f 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2706,12 +2706,19 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); switch (IntrinsicID) { - case AMDGPUIntrinsic::SI_sendmsg: { + case AMDGPUIntrinsic::SI_sendmsg: + case Intrinsic::amdgcn_s_sendmsg: { Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3)); SDValue Glue = Chain.getValue(1); return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain, Op.getOperand(2), Glue); } + case Intrinsic::amdgcn_s_sendmsghalt: { + Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3)); + SDValue Glue = Chain.getValue(1); + return DAG.getNode(AMDGPUISD::SENDMSGHALT, DL, MVT::Other, Chain, + Op.getOperand(2), Glue); + } case AMDGPUIntrinsic::SI_tbuffer_store: { SDValue Ops[] = { Chain, diff --git a/lib/Target/AMDGPU/SIInsertWaits.cpp b/lib/Target/AMDGPU/SIInsertWaits.cpp index 202a1e9ed8ac..fceabd7a8fdd 100644 --- a/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -504,7 +504,7 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB, return; // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG. - if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) { + if (LastInstWritesM0 && (I->getOpcode() == AMDGPU::S_SENDMSG || I->getOpcode() == AMDGPU::S_SENDMSGHALT)) { BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0); LastInstWritesM0 = false; return; @@ -619,7 +619,8 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { // signalling other hardware blocks if ((I->getOpcode() == AMDGPU::S_BARRIER && ST->needWaitcntBeforeBarrier()) || - I->getOpcode() == AMDGPU::S_SENDMSG) + I->getOpcode() == AMDGPU::S_SENDMSG || + I->getOpcode() == AMDGPU::S_SENDMSGHALT) Required = LastIssued; else Required = handleOperands(*I); diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td index 0aeb1297d3a7..73cd5774128e 100644 --- a/lib/Target/AMDGPU/SOPInstructions.td +++ b/lib/Target/AMDGPU/SOPInstructions.td @@ -828,9 +828,12 @@ let Uses = [EXEC, M0] in { def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16", [(AMDGPUsendmsg (i32 imm:$simm16))] >; + +def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16", + [(AMDGPUsendmsghalt (i32 imm:$simm16))] +>; } // End Uses = [EXEC, M0] -def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16">; def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">; def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> { let simm16 = 0; diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index f20768ab77a5..8ec9cb02813c 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -164,9 +164,6 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Emit the rest of the function body. EmitFunctionBody(); - // Emit the XRay table for this function. - EmitXRayTable(); - // If we need V4T thumb mode Register Indirect Jump pads, emit them. // These are created per function, rather than per TU, since it's // relatively easy to exceed the thumb branch range within a TU. diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index ce0b04d56d9e..93fed10eb2d0 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -113,9 +113,6 @@ public: void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); - // Helper function that emits the XRay sleds we've collected for a particular - // function. - void EmitXRayTable(); private: void EmitSled(const MachineInstr &MI, SledKind Kind); diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index 293a527b09e8..07044b9697b6 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -22,9 +22,6 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSymbolELF.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCStreamer.h" using namespace llvm; @@ -226,38 +223,3 @@ void ARMAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) { EmitSled(MI, SledKind::TAIL_CALL); } - -void ARMAsmPrinter::EmitXRayTable() -{ - if (Sleds.empty()) - return; - - MCSection *Section = nullptr; - if (Subtarget->isTargetELF()) { - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_GROUP | - ELF::SHF_MERGE, - 0, CurrentFnSym->getName()); - } else if (Subtarget->isTargetMachO()) { - Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, - SectionKind::getReadOnlyWithRel()); - } else { - llvm_unreachable("Unsupported target"); - } - - auto PrevSection = OutStreamer->getCurrentSectionOnly(); - OutStreamer->SwitchSection(Section); - for (const auto &Sled : Sleds) { - OutStreamer->EmitSymbolValue(Sled.Sled, 4); - OutStreamer->EmitSymbolValue(CurrentFnSym, 4); - auto Kind = static_cast<uint8_t>(Sled.Kind); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast<const char *>(&Kind), 1)); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1)); - OutStreamer->EmitZeros(6); - } - OutStreamer->SwitchSection(PrevSection); - - Sleds.clear(); -} diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp index c0591c332dea..963fb99ce09b 100644 --- a/lib/Target/Hexagon/BitTracker.cpp +++ b/lib/Target/Hexagon/BitTracker.cpp @@ -53,28 +53,36 @@ // // The code below is intended to be fully target-independent. +#include "BitTracker.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" - -#include "BitTracker.h" +#include <iterator> +#include <cassert> +#include <cstdint> using namespace llvm; typedef BitTracker BT; namespace { + // Local trickery to pretty print a register (without the whole "%vreg" // business). struct printv { printv(unsigned r) : R(r) {} + unsigned R; }; + raw_ostream &operator<< (raw_ostream &OS, const printv &PV) { if (PV.R) OS << 'v' << TargetRegisterInfo::virtReg2Index(PV.R); @@ -82,9 +90,11 @@ namespace { OS << 's'; return OS; } -} + +} // end anonymous namespace namespace llvm { + raw_ostream &operator<<(raw_ostream &OS, const BT::BitValue &BV) { switch (BV.Type) { case BT::BitValue::Top: @@ -167,14 +177,14 @@ namespace llvm { return OS; } -} + +} // end namespace llvm void BitTracker::print_cells(raw_ostream &OS) const { for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I) dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n"; } - BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F) : Trace(false), ME(E), MF(F), MRI(F.getRegInfo()), Map(*new CellMapType) {} @@ -182,7 +192,6 @@ BitTracker::~BitTracker() { delete ⤅ } - // If we were allowed to update a cell for a part of a register, the meet // operation would need to be parametrized by the register number and the // exact part of the register, so that the computer BitRefs correspond to @@ -201,7 +210,6 @@ bool BT::RegisterCell::meet(const RegisterCell &RC, unsigned SelfR) { return Changed; } - // Insert the entire cell RC into the current cell at position given by M. BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC, const BitMask &M) { @@ -224,7 +232,6 @@ BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC, return *this; } - BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const { uint16_t B = M.first(), E = M.last(), W = width(); assert(B < W && E < W); @@ -243,7 +250,6 @@ BT::RegisterCell BT::RegisterCell::extract(const BitMask &M) const { return RC; } - BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) { // Rotate left (i.e. towards increasing bit indices). // Swap the two parts: [0..W-Sh-1] [W-Sh..W-1] @@ -265,7 +271,6 @@ BT::RegisterCell &BT::RegisterCell::rol(uint16_t Sh) { return *this; } - BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E, const BitValue &V) { assert(B <= E); @@ -274,7 +279,6 @@ BT::RegisterCell &BT::RegisterCell::fill(uint16_t B, uint16_t E, return *this; } - BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) { // Append the cell given as the argument to the "this" cell. // Bit 0 of RC becomes bit W of the result, where W is this->width(). @@ -285,7 +289,6 @@ BT::RegisterCell &BT::RegisterCell::cat(const RegisterCell &RC) { return *this; } - uint16_t BT::RegisterCell::ct(bool B) const { uint16_t W = width(); uint16_t C = 0; @@ -295,7 +298,6 @@ uint16_t BT::RegisterCell::ct(bool B) const { return C; } - uint16_t BT::RegisterCell::cl(bool B) const { uint16_t W = width(); uint16_t C = 0; @@ -305,7 +307,6 @@ uint16_t BT::RegisterCell::cl(bool B) const { return C; } - bool BT::RegisterCell::operator== (const RegisterCell &RC) const { uint16_t W = Bits.size(); if (RC.Bits.size() != W) @@ -316,7 +317,6 @@ bool BT::RegisterCell::operator== (const RegisterCell &RC) const { return true; } - uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const { // The general problem is with finding a register class that corresponds // to a given reference reg:sub. There can be several such classes, and @@ -342,7 +342,6 @@ uint16_t BT::MachineEvaluator::getRegBitWidth(const RegisterRef &RR) const { return BW; } - BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR, const CellMapType &M) const { uint16_t BW = getRegBitWidth(RR); @@ -370,7 +369,6 @@ BT::RegisterCell BT::MachineEvaluator::getCell(const RegisterRef &RR, return RegisterCell::top(BW); } - void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const { // While updating the cell map can be done in a meaningful way for @@ -388,7 +386,6 @@ void BT::MachineEvaluator::putCell(const RegisterRef &RR, RegisterCell RC, M[RR.Reg] = RC; } - // Check if the cell represents a compile-time integer value. bool BT::MachineEvaluator::isInt(const RegisterCell &A) const { uint16_t W = A.width(); @@ -398,7 +395,6 @@ bool BT::MachineEvaluator::isInt(const RegisterCell &A) const { return true; } - // Convert a cell to the integer value. The result must fit in uint64_t. uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const { assert(isInt(A)); @@ -411,7 +407,6 @@ uint64_t BT::MachineEvaluator::toInt(const RegisterCell &A) const { return Val; } - // Evaluator helper functions. These implement some common operation on // register cells that can be used to implement target-specific instructions // in a target-specific evaluator. @@ -426,7 +421,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(int64_t V, uint16_t W) const { return Res; } - BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const { const APInt &A = CI->getValue(); uint16_t BW = A.getBitWidth(); @@ -437,7 +431,6 @@ BT::RegisterCell BT::MachineEvaluator::eIMM(const ConstantInt *CI) const { return Res; } - BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -471,7 +464,6 @@ BT::RegisterCell BT::MachineEvaluator::eADD(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -505,29 +497,26 @@ BT::RegisterCell BT::MachineEvaluator::eSUB(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eMLS(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width() + A2.width(); - uint16_t Z = A1.ct(0) + A2.ct(0); + uint16_t Z = A1.ct(false) + A2.ct(false); RegisterCell Res(W); Res.fill(0, Z, BitValue::Zero); Res.fill(Z, W, BitValue::self()); return Res; } - BT::RegisterCell BT::MachineEvaluator::eMLU(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width() + A2.width(); - uint16_t Z = A1.ct(0) + A2.ct(0); + uint16_t Z = A1.ct(false) + A2.ct(false); RegisterCell Res(W); Res.fill(0, Z, BitValue::Zero); Res.fill(Z, W, BitValue::self()); return Res; } - BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1, uint16_t Sh) const { assert(Sh <= A1.width()); @@ -537,7 +526,6 @@ BT::RegisterCell BT::MachineEvaluator::eASL(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1, uint16_t Sh) const { uint16_t W = A1.width(); @@ -548,7 +536,6 @@ BT::RegisterCell BT::MachineEvaluator::eLSR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1, uint16_t Sh) const { uint16_t W = A1.width(); @@ -560,7 +547,6 @@ BT::RegisterCell BT::MachineEvaluator::eASR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -583,7 +569,6 @@ BT::RegisterCell BT::MachineEvaluator::eAND(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -606,7 +591,6 @@ BT::RegisterCell BT::MachineEvaluator::eORL(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1, const RegisterCell &A2) const { uint16_t W = A1.width(); @@ -627,7 +611,6 @@ BT::RegisterCell BT::MachineEvaluator::eXOR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const { uint16_t W = A1.width(); RegisterCell Res(W); @@ -643,7 +626,6 @@ BT::RegisterCell BT::MachineEvaluator::eNOT(const RegisterCell &A1) const { return Res; } - BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1, uint16_t BitN) const { assert(BitN < A1.width()); @@ -652,7 +634,6 @@ BT::RegisterCell BT::MachineEvaluator::eSET(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1, uint16_t BitN) const { assert(BitN < A1.width()); @@ -661,7 +642,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B, uint16_t W) const { uint16_t C = A1.cl(B), AW = A1.width(); @@ -672,7 +652,6 @@ BT::RegisterCell BT::MachineEvaluator::eCLB(const RegisterCell &A1, bool B, return RegisterCell::self(0, W); } - BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B, uint16_t W) const { uint16_t C = A1.ct(B), AW = A1.width(); @@ -683,7 +662,6 @@ BT::RegisterCell BT::MachineEvaluator::eCTB(const RegisterCell &A1, bool B, return RegisterCell::self(0, W); } - BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1, uint16_t FromN) const { uint16_t W = A1.width(); @@ -695,7 +673,6 @@ BT::RegisterCell BT::MachineEvaluator::eSXT(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1, uint16_t FromN) const { uint16_t W = A1.width(); @@ -705,7 +682,6 @@ BT::RegisterCell BT::MachineEvaluator::eZXT(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1, uint16_t B, uint16_t E) const { uint16_t W = A1.width(); @@ -718,7 +694,6 @@ BT::RegisterCell BT::MachineEvaluator::eXTR(const RegisterCell &A1, return Res; } - BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1, const RegisterCell &A2, uint16_t AtN) const { uint16_t W1 = A1.width(), W2 = A2.width(); @@ -731,7 +706,6 @@ BT::RegisterCell BT::MachineEvaluator::eINS(const RegisterCell &A1, return Res; } - BT::BitMask BT::MachineEvaluator::mask(unsigned Reg, unsigned Sub) const { assert(Sub == 0 && "Generic BitTracker::mask called for Sub != 0"); uint16_t W = getRegBitWidth(Reg); @@ -785,7 +759,6 @@ bool BT::MachineEvaluator::evaluate(const MachineInstr &MI, return true; } - // Main W-Z implementation. void BT::visitPHI(const MachineInstr &PI) { @@ -977,7 +950,6 @@ void BT::visitBranchesFrom(const MachineInstr &BI) { } } - void BT::visitUsesOf(unsigned Reg) { if (Trace) dbgs() << "visiting uses of " << PrintReg(Reg, &ME.TRI) << "\n"; @@ -997,17 +969,14 @@ void BT::visitUsesOf(unsigned Reg) { } } - BT::RegisterCell BT::get(RegisterRef RR) const { return ME.getCell(RR, Map); } - void BT::put(RegisterRef RR, const RegisterCell &RC) { ME.putCell(RR, RC, Map); } - // Replace all references to bits from OldRR with the corresponding bits // in NewRR. void BT::subst(RegisterRef OldRR, RegisterRef NewRR) { @@ -1033,7 +1002,6 @@ void BT::subst(RegisterRef OldRR, RegisterRef NewRR) { } } - // Check if the block has been "executed" during propagation. (If not, the // block is dead, but it may still appear to be reachable.) bool BT::reached(const MachineBasicBlock *B) const { @@ -1047,7 +1015,6 @@ bool BT::reached(const MachineBasicBlock *B) const { return false; } - // Visit an individual instruction. This could be a newly added instruction, // or one that has been modified by an optimization. void BT::visit(const MachineInstr &MI) { @@ -1061,14 +1028,12 @@ void BT::visit(const MachineInstr &MI) { FlowQ.pop(); } - void BT::reset() { EdgeExec.clear(); InstrExec.clear(); Map.clear(); } - void BT::run() { reset(); assert(FlowQ.empty()); @@ -1141,4 +1106,3 @@ void BT::run() { if (Trace) print_cells(dbgs() << "Cells after propagation:\n"); } - diff --git a/lib/Target/Hexagon/BitTracker.h b/lib/Target/Hexagon/BitTracker.h index 74cafcd00b60..48c5f2266acf 100644 --- a/lib/Target/Hexagon/BitTracker.h +++ b/lib/Target/Hexagon/BitTracker.h @@ -1,4 +1,4 @@ -//===--- BitTracker.h -----------------------------------------------------===// +//===--- BitTracker.h -------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,24 +7,27 @@ // //===----------------------------------------------------------------------===// -#ifndef BITTRACKER_H -#define BITTRACKER_H +#ifndef LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H +#define LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" - +#include "llvm/CodeGen/MachineOperand.h" +#include <cassert> +#include <cstdint> #include <map> #include <queue> #include <set> +#include <utility> namespace llvm { - class ConstantInt; - class MachineRegisterInfo; - class MachineBasicBlock; - class MachineInstr; - class MachineOperand; - class raw_ostream; + +class ConstantInt; +class MachineRegisterInfo; +class MachineBasicBlock; +class MachineInstr; +class raw_ostream; struct BitTracker { struct BitRef; @@ -76,19 +79,19 @@ private: CellMapType ⤅ }; - // Abstraction of a reference to bit at position Pos from a register Reg. struct BitTracker::BitRef { BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {} + bool operator== (const BitRef &BR) const { // If Reg is 0, disregard Pos. return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos); } + unsigned Reg; uint16_t Pos; }; - // Abstraction of a register reference in MachineOperand. It contains the // register number and the subregister index. struct BitTracker::RegisterRef { @@ -96,10 +99,10 @@ struct BitTracker::RegisterRef { : Reg(R), Sub(S) {} RegisterRef(const MachineOperand &MO) : Reg(MO.getReg()), Sub(MO.getSubReg()) {} + unsigned Reg, Sub; }; - // Value that a single bit can take. This is outside of the context of // any register, it is more of an abstraction of the two-element set of // possible bit values. One extension here is the "Ref" type, which @@ -158,6 +161,7 @@ struct BitTracker::BitValue { bool operator!= (const BitValue &V) const { return !operator==(V); } + bool is(unsigned T) const { assert(T == 0 || T == 1); return T == 0 ? Type == Zero @@ -209,6 +213,7 @@ struct BitTracker::BitValue { bool num() const { return Type == Zero || Type == One; } + operator bool() const { assert(Type == Zero || Type == One); return Type == One; @@ -217,7 +222,6 @@ struct BitTracker::BitValue { friend raw_ostream &operator<<(raw_ostream &OS, const BitValue &BV); }; - // This operation must be idempotent, i.e. ref(ref(V)) == ref(V). inline BitTracker::BitValue BitTracker::BitValue::ref(const BitValue &V) { @@ -228,26 +232,26 @@ BitTracker::BitValue::ref(const BitValue &V) { return self(); } - inline BitTracker::BitValue BitTracker::BitValue::self(const BitRef &Self) { return BitValue(Self.Reg, Self.Pos); } - // A sequence of bits starting from index B up to and including index E. // If E < B, the mask represents two sections: [0..E] and [B..W) where // W is the width of the register. struct BitTracker::BitMask { - BitMask() : B(0), E(0) {} + BitMask() = default; BitMask(uint16_t b, uint16_t e) : B(b), E(e) {} + uint16_t first() const { return B; } uint16_t last() const { return E; } + private: - uint16_t B, E; + uint16_t B = 0; + uint16_t E = 0; }; - // Representation of a register: a list of BitValues. struct BitTracker::RegisterCell { RegisterCell(uint16_t Width = DefaultBitN) : Bits(Width) {} @@ -255,6 +259,7 @@ struct BitTracker::RegisterCell { uint16_t width() const { return Bits.size(); } + const BitValue &operator[](uint16_t BitN) const { assert(BitN < Bits.size()); return Bits[BitN]; @@ -297,12 +302,10 @@ private: friend raw_ostream &operator<<(raw_ostream &OS, const RegisterCell &RC); }; - inline bool BitTracker::has(unsigned Reg) const { return Map.find(Reg) != Map.end(); } - inline const BitTracker::RegisterCell& BitTracker::lookup(unsigned Reg) const { CellMapType::const_iterator F = Map.find(Reg); @@ -310,7 +313,6 @@ BitTracker::lookup(unsigned Reg) const { return F->second; } - inline BitTracker::RegisterCell BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) { RegisterCell RC(Width); @@ -319,7 +321,6 @@ BitTracker::RegisterCell::self(unsigned Reg, uint16_t Width) { return RC; } - inline BitTracker::RegisterCell BitTracker::RegisterCell::top(uint16_t Width) { RegisterCell RC(Width); @@ -328,7 +329,6 @@ BitTracker::RegisterCell::top(uint16_t Width) { return RC; } - inline BitTracker::RegisterCell BitTracker::RegisterCell::ref(const RegisterCell &C) { uint16_t W = C.width(); @@ -345,12 +345,13 @@ BitTracker::RegisterCell::ref(const RegisterCell &C) { struct BitTracker::MachineEvaluator { MachineEvaluator(const TargetRegisterInfo &T, MachineRegisterInfo &M) : TRI(T), MRI(M) {} - virtual ~MachineEvaluator() {} + virtual ~MachineEvaluator() = default; uint16_t getRegBitWidth(const RegisterRef &RR) const; RegisterCell getCell(const RegisterRef &RR, const CellMapType &M) const; void putCell(const RegisterRef &RR, RegisterCell RC, CellMapType &M) const; + // A result of any operation should use refs to the source cells, not // the cells directly. This function is a convenience wrapper to quickly // generate a ref for a cell corresponding to a register reference. @@ -435,4 +436,4 @@ struct BitTracker::MachineEvaluator { } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_HEXAGON_BITTRACKER_H diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp index b78c4126e0b1..436f88dcd450 100644 --- a/lib/Target/Hexagon/HexagonBitTracker.cpp +++ b/lib/Target/Hexagon/HexagonBitTracker.cpp @@ -7,16 +7,30 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - #include "Hexagon.h" +#include "HexagonBitTracker.h" #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonTargetMachine.h" -#include "HexagonBitTracker.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstdlib> +#include <utility> +#include <vector> using namespace llvm; @@ -76,11 +90,11 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri, } } - BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const { + using namespace Hexagon; + if (Sub == 0) return MachineEvaluator::mask(Reg, 0); - using namespace Hexagon; const TargetRegisterClass *RC = MRI.getRegClass(Reg); unsigned ID = RC->getID(); uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub)); @@ -102,6 +116,7 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const { } namespace { + class RegisterRefs { std::vector<BT::RegisterRef> Vector; @@ -117,17 +132,21 @@ public: } size_t size() const { return Vector.size(); } + const BT::RegisterRef &operator[](unsigned n) const { // The main purpose of this operator is to assert with bad argument. assert(n < Vector.size()); return Vector[n]; } }; -} + +} // end anonymous namespace bool HexagonEvaluator::evaluate(const MachineInstr &MI, const CellMapType &Inputs, CellMapType &Outputs) const { + using namespace Hexagon; + unsigned NumDefs = 0; // Sanity verification: there should not be any defs with subregisters. @@ -142,7 +161,6 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, if (NumDefs == 0) return false; - using namespace Hexagon; unsigned Opc = MI.getOpcode(); if (MI.mayLoad()) { @@ -779,10 +797,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, case S2_cl0: case S2_cl0p: // Always produce a 32-bit result. - return rr0(eCLB(rc(1), 0/*bit*/, 32), Outputs); + return rr0(eCLB(rc(1), false/*bit*/, 32), Outputs); case S2_cl1: case S2_cl1p: - return rr0(eCLB(rc(1), 1/*bit*/, 32), Outputs); + return rr0(eCLB(rc(1), true/*bit*/, 32), Outputs); case S2_clb: case S2_clbp: { uint16_t W1 = getRegBitWidth(Reg[1]); @@ -794,10 +812,10 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI, } case S2_ct0: case S2_ct0p: - return rr0(eCTB(rc(1), 0/*bit*/, 32), Outputs); + return rr0(eCTB(rc(1), false/*bit*/, 32), Outputs); case S2_ct1: case S2_ct1p: - return rr0(eCTB(rc(1), 1/*bit*/, 32), Outputs); + return rr0(eCTB(rc(1), true/*bit*/, 32), Outputs); case S5_popcountp: // TODO break; @@ -953,6 +971,8 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI, bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI, const CellMapType &Inputs, CellMapType &Outputs) const { + using namespace Hexagon; + if (TII.isPredicated(MI)) return false; assert(MI.mayLoad() && "A load that mayn't?"); @@ -960,7 +980,6 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI, uint16_t BitNum; bool SignEx; - using namespace Hexagon; switch (Opc) { default: @@ -1141,9 +1160,9 @@ bool HexagonEvaluator::evaluateFormalCopy(const MachineInstr &MI, return true; } - unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const { using namespace Hexagon; + bool Is64 = DoubleRegsRegClass.contains(PReg); assert(PReg == 0 || Is64 || IntRegsRegClass.contains(PReg)); @@ -1180,7 +1199,6 @@ unsigned HexagonEvaluator::getNextPhysReg(unsigned PReg, unsigned Width) const { return (Idx64+1 < Num64) ? Phys64[Idx64+1] : 0; } - unsigned HexagonEvaluator::getVirtRegFor(unsigned PReg) const { typedef MachineRegisterInfo::livein_iterator iterator; for (iterator I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) { diff --git a/lib/Target/Hexagon/HexagonBitTracker.h b/lib/Target/Hexagon/HexagonBitTracker.h index 9e7b1dbe298f..2cbf65e66ca6 100644 --- a/lib/Target/Hexagon/HexagonBitTracker.h +++ b/lib/Target/Hexagon/HexagonBitTracker.h @@ -1,4 +1,4 @@ -//===--- HexagonBitTracker.h ----------------------------------------------===// +//===--- HexagonBitTracker.h ------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,15 +7,17 @@ // //===----------------------------------------------------------------------===// -#ifndef HEXAGONBITTRACKER_H -#define HEXAGONBITTRACKER_H +#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H +#define LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H #include "BitTracker.h" #include "llvm/ADT/DenseMap.h" +#include <cstdint> namespace llvm { - class HexagonInstrInfo; - class HexagonRegisterInfo; + +class HexagonInstrInfo; +class HexagonRegisterInfo; struct HexagonEvaluator : public BitTracker::MachineEvaluator { typedef BitTracker::CellMapType CellMapType; @@ -49,10 +51,12 @@ private: // Type of formal parameter extension. struct ExtType { enum { SExt, ZExt }; - char Type; - uint16_t Width; - ExtType() : Type(0), Width(0) {} + + ExtType() = default; ExtType(char t, uint16_t w) : Type(t), Width(w) {} + + char Type = 0; + uint16_t Width = 0; }; // Map VR -> extension type. typedef DenseMap<unsigned, ExtType> RegExtMap; @@ -61,4 +65,4 @@ private: } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONBITTRACKER_H diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 34ce3e652995..0a7dc6b49d00 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -11,26 +11,45 @@ // //===----------------------------------------------------------------------===// +#include "Hexagon.h" #include "HexagonHazardRecognizer.h" #include "HexagonInstrInfo.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <cassert> #include <cctype> +#include <cstdint> +#include <cstring> +#include <iterator> using namespace llvm; @@ -108,19 +127,16 @@ HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST) : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP), RI() {} - static bool isIntRegForSubInst(unsigned Reg) { return (Reg >= Hexagon::R0 && Reg <= Hexagon::R7) || (Reg >= Hexagon::R16 && Reg <= Hexagon::R23); } - static bool isDblRegForSubInst(unsigned Reg, const HexagonRegisterInfo &HRI) { return isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::isub_lo)) && isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::isub_hi)); } - /// Calculate number of instructions excluding the debug instructions. static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB, MachineBasicBlock::const_instr_iterator MIE) { @@ -132,7 +148,6 @@ static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB, return Count; } - /// Find the hardware loop instruction used to set-up the specified loop. /// On Hexagon, we have two instructions used to set-up the hardware loop /// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions @@ -164,17 +179,16 @@ static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp, return &*I; // We've reached a different loop, which means the loop0 has been removed. if (Opc == EndLoopOp) - return 0; + return nullptr; } // Check the predecessors for the LOOP instruction. MachineInstr *loop = findLoopInstr(*PB, EndLoopOp, Visited); if (loop) return loop; } - return 0; + return nullptr; } - /// Gather register def/uses from MI. /// This treats possible (predicated) defs as actually happening ones /// (conservatively). @@ -201,7 +215,6 @@ static inline void parseOperands(const MachineInstr &MI, } } - // Position dependent, so check twice for swap. static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) { switch (Ga) { @@ -228,8 +241,6 @@ static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) { return false; } - - /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If @@ -280,7 +291,6 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, return 0; } - /// isStoreToStackSlot - If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of /// the source reg along with the FrameIndex of the loaded stack slot. If @@ -337,7 +347,6 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr &MI, return 0; } - /// This function can analyze one/two way branching only and should (mostly) be /// called by target independent side. /// First entry is always the opcode of the branching instruction, except when @@ -401,7 +410,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, // Delete the J2_jump if it's equivalent to a fall-through. if (AllowModify && JumpToBlock && MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { - DEBUG(dbgs()<< "\nErasing the jump to successor block\n";); + DEBUG(dbgs() << "\nErasing the jump to successor block\n";); I->eraseFromParent(); I = MBB.instr_end(); if (I == MBB.instr_begin()) @@ -415,7 +424,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineInstr *LastInst = &*I; MachineInstr *SecondLastInst = nullptr; // Find one more terminator if present. - for (;;) { + while (true) { if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(*I)) { if (!SecondLastInst) SecondLastInst = &*I; @@ -524,7 +533,6 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB, return true; } - unsigned HexagonInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const { assert(!BytesRemoved && "code size not handled"); @@ -730,7 +738,6 @@ bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, return nonDbgBBSize(&MBB) <= 3; } - bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTCycles, unsigned ExtraTCycles, MachineBasicBlock &FMBB, unsigned NumFCycles, unsigned ExtraFCycles, BranchProbability Probability) @@ -738,7 +745,6 @@ bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, return nonDbgBBSize(&TMBB) <= 3 && nonDbgBBSize(&FMBB) <= 3; } - bool HexagonInstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs, BranchProbability Probability) const { return NumInstrs <= 4; @@ -853,7 +859,6 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB, llvm_unreachable("Unimplemented"); } - void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { @@ -976,7 +981,6 @@ void HexagonInstrInfo::loadRegFromStackSlot( } } - static void getLiveRegsAt(LivePhysRegs &Regs, const MachineInstr &MI) { const MachineBasicBlock &B = *MI.getParent(); Regs.addLiveOuts(B); @@ -1307,7 +1311,6 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return false; } - // We indicate that we want to reverse the branch by // inserting the reversed branching opcode. bool HexagonInstrInfo::reverseBranchCondition( @@ -1325,19 +1328,16 @@ bool HexagonInstrInfo::reverseBranchCondition( return false; } - void HexagonInstrInfo::insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { DebugLoc DL; BuildMI(MBB, MI, DL, get(Hexagon::A2_nop)); } - bool HexagonInstrInfo::isPostIncrement(const MachineInstr &MI) const { return getAddrMode(MI) == HexagonII::PostInc; } - // Returns true if an instruction is predicated irrespective of the predicate // sense. For example, all of the following will return true. // if (p0) R1 = add(R2, R3) @@ -1351,7 +1351,6 @@ bool HexagonInstrInfo::isPredicated(const MachineInstr &MI) const { return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask; } - bool HexagonInstrInfo::PredicateInstruction( MachineInstr &MI, ArrayRef<MachineOperand> Cond) const { if (Cond.empty() || isNewValueJump(Cond[0].getImm()) || @@ -1403,14 +1402,12 @@ bool HexagonInstrInfo::PredicateInstruction( return true; } - bool HexagonInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1, ArrayRef<MachineOperand> Pred2) const { // TODO: Fix this return false; } - bool HexagonInstrInfo::DefinesPredicate( MachineInstr &MI, std::vector<MachineOperand> &Pred) const { auto &HRI = getRegisterInfo(); @@ -1427,7 +1424,6 @@ bool HexagonInstrInfo::DefinesPredicate( return false; } - bool HexagonInstrInfo::isPredicable(MachineInstr &MI) const { return MI.getDesc().isPredicable(); } @@ -1466,7 +1462,6 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr &MI, return false; } - /// Measure the specified inline asm to determine an approximation of its /// length. /// Comments (which run till the next SeparatorString or newline) do not @@ -1502,7 +1497,6 @@ unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str, return Length; } - ScheduleHazardRecognizer* HexagonInstrInfo::CreateTargetPostRAHazardRecognizer( const InstrItineraryData *II, const ScheduleDAG *DAG) const { @@ -1513,7 +1507,6 @@ HexagonInstrInfo::CreateTargetPostRAHazardRecognizer( return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); } - /// \brief For a comparison instruction, return the source registers in /// \p SrcReg and \p SrcReg2 if having two register operands, and the value it /// compares against in CmpValue. Return true if the comparison instruction @@ -1609,14 +1602,12 @@ unsigned HexagonInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, return getInstrTimingClassLatency(ItinData, MI); } - DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState( const TargetSubtargetInfo &STI) const { const InstrItineraryData *II = STI.getInstrItineraryData(); return static_cast<const HexagonSubtarget&>(STI).createDFAPacketizer(II); } - // Inspired by this pair: // %R13<def> = L2_loadri_io %R29, 136; mem:LD4[FixedStack0] // S2_storeri_io %R29, 132, %R1<kill>; flags: mem:ST4[FixedStack1] @@ -1661,7 +1652,6 @@ bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint( return false; } - /// If the instruction is an increment of a constant value, return the amount. bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI, int &Value) const { @@ -1677,7 +1667,6 @@ bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI, return false; } - unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const { MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetRegisterClass *TRC; @@ -1695,18 +1684,15 @@ unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const { return NewReg; } - bool HexagonInstrInfo::isAbsoluteSet(const MachineInstr &MI) const { return (getAddrMode(MI) == HexagonII::AbsoluteSet); } - bool HexagonInstrInfo::isAccumulator(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask); } - bool HexagonInstrInfo::isComplex(const MachineInstr &MI) const { const MachineFunction *MF = MI.getParent()->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -1727,13 +1713,11 @@ bool HexagonInstrInfo::isComplex(const MachineInstr &MI) const { return false; } - // Return true if the instruction is a compund branch instruction. bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr &MI) const { return (getType(MI) == HexagonII::TypeCOMPOUND && MI.isBranch()); } - bool HexagonInstrInfo::isCondInst(const MachineInstr &MI) const { return (MI.isBranch() && isPredicated(MI)) || isConditionalTransfer(MI) || @@ -1744,7 +1728,6 @@ bool HexagonInstrInfo::isCondInst(const MachineInstr &MI) const { !isPredicatedNew(MI)); } - bool HexagonInstrInfo::isConditionalALU32(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::A2_paddf: @@ -1802,7 +1785,6 @@ bool HexagonInstrInfo::isConditionalALU32(const MachineInstr &MI) const { return false; } - // FIXME - Function name and it's functionality don't match. // It should be renamed to hasPredNewOpcode() bool HexagonInstrInfo::isConditionalLoad(const MachineInstr &MI) const { @@ -1814,7 +1796,6 @@ bool HexagonInstrInfo::isConditionalLoad(const MachineInstr &MI) const { return PNewOpcode >= 0; } - // Returns true if an instruction is a conditional store. // // Note: It doesn't include conditional new-value stores as they can't be @@ -1872,7 +1853,6 @@ bool HexagonInstrInfo::isConditionalStore(const MachineInstr &MI) const { } } - bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::A2_tfrt: @@ -1893,7 +1873,6 @@ bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr &MI) const { return false; } - // TODO: In order to have isExtendable for fpimm/f32Ext, we need to handle // isFPImm and later getFPImm as well. bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const { @@ -1942,7 +1921,6 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const { return (ImmValue < MinValue || ImmValue > MaxValue); } - bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::L4_return : @@ -1957,7 +1935,6 @@ bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const { return false; } - // Return true when ConsMI uses a register defined by ProdMI. bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI, const MachineInstr &ConsMI) const { @@ -1994,7 +1971,6 @@ bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI, return false; } - // Returns true if the instruction is alread a .cur. bool HexagonInstrInfo::isDotCurInst(const MachineInstr &MI) const { switch (MI.getOpcode()) { @@ -2007,7 +1983,6 @@ bool HexagonInstrInfo::isDotCurInst(const MachineInstr &MI) const { return false; } - // Returns true, if any one of the operands is a dot new // insn, whether it is predicated dot new or register dot new. bool HexagonInstrInfo::isDotNewInst(const MachineInstr &MI) const { @@ -2017,7 +1992,6 @@ bool HexagonInstrInfo::isDotNewInst(const MachineInstr &MI) const { return false; } - /// Symmetrical. See if these two instructions are fit for duplex pair. bool HexagonInstrInfo::isDuplexPair(const MachineInstr &MIa, const MachineInstr &MIb) const { @@ -2026,7 +2000,6 @@ bool HexagonInstrInfo::isDuplexPair(const MachineInstr &MIa, return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG)); } - bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr &MI) const { if (MI.mayLoad() || MI.mayStore() || MI.isCompare()) return true; @@ -2038,13 +2011,11 @@ bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr &MI) const { return false; } - bool HexagonInstrInfo::isEndLoopN(unsigned Opcode) const { return (Opcode == Hexagon::ENDLOOP0 || Opcode == Hexagon::ENDLOOP1); } - bool HexagonInstrInfo::isExpr(unsigned OpType) const { switch(OpType) { case MachineOperand::MO_MachineBasicBlock: @@ -2059,7 +2030,6 @@ bool HexagonInstrInfo::isExpr(unsigned OpType) const { } } - bool HexagonInstrInfo::isExtendable(const MachineInstr &MI) const { const MCInstrDesc &MID = MI.getDesc(); const uint64_t F = MID.TSFlags; @@ -2079,7 +2049,6 @@ bool HexagonInstrInfo::isExtendable(const MachineInstr &MI) const { return false; } - // This returns true in two cases: // - The OP code itself indicates that this is an extended instruction. // - One of MOs has been marked with HMOTF_ConstExtended flag. @@ -2098,14 +2067,12 @@ bool HexagonInstrInfo::isExtended(const MachineInstr &MI) const { return false; } - bool HexagonInstrInfo::isFloat(const MachineInstr &MI) const { unsigned Opcode = MI.getOpcode(); const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::FPPos) & HexagonII::FPMask; } - // No V60 HVX VMEM with A_INDIRECT. bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr &I, const MachineInstr &J) const { @@ -2116,7 +2083,6 @@ bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr &I, return J.isIndirectBranch() || isIndirectCall(J) || isIndirectL4Return(J); } - bool HexagonInstrInfo::isIndirectCall(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::J2_callr : @@ -2128,7 +2094,6 @@ bool HexagonInstrInfo::isIndirectCall(const MachineInstr &MI) const { return false; } - bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::L4_return : @@ -2143,7 +2108,6 @@ bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr &MI) const { return false; } - bool HexagonInstrInfo::isJumpR(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::J2_jumpr : @@ -2158,7 +2122,6 @@ bool HexagonInstrInfo::isJumpR(const MachineInstr &MI) const { return false; } - // Return true if a given MI can accommodate given offset. // Use abs estimate as oppose to the exact number. // TODO: This will need to be changed to use MC level @@ -2203,7 +2166,6 @@ bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr &MI, } } - bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI, const MachineInstr &ESMI) const { bool isLate = isLateResultInstr(LRMI); @@ -2222,7 +2184,6 @@ bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI, return false; } - bool HexagonInstrInfo::isLateResultInstr(const MachineInstr &MI) const { switch (MI.getOpcode()) { case TargetOpcode::EXTRACT_SUBREG: @@ -2259,14 +2220,12 @@ bool HexagonInstrInfo::isLateResultInstr(const MachineInstr &MI) const { return true; } - bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr &MI) const { // Instructions with iclass A_CVI_VX and attribute A_CVI_LATE uses a multiply // resource, but all operands can be received late like an ALU instruction. return MI.getDesc().getSchedClass() == Hexagon::Sched::CVI_VX_LATE; } - bool HexagonInstrInfo::isLoopN(const MachineInstr &MI) const { unsigned Opcode = MI.getOpcode(); return Opcode == Hexagon::J2_loop0i || @@ -2279,7 +2238,6 @@ bool HexagonInstrInfo::isLoopN(const MachineInstr &MI) const { Opcode == Hexagon::J2_loop1rext; } - bool HexagonInstrInfo::isMemOp(const MachineInstr &MI) const { switch (MI.getOpcode()) { default: return false; @@ -2312,46 +2270,38 @@ bool HexagonInstrInfo::isMemOp(const MachineInstr &MI) const { return false; } - bool HexagonInstrInfo::isNewValue(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask; } - bool HexagonInstrInfo::isNewValue(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask; } - bool HexagonInstrInfo::isNewValueInst(const MachineInstr &MI) const { return isNewValueJump(MI) || isNewValueStore(MI); } - bool HexagonInstrInfo::isNewValueJump(const MachineInstr &MI) const { return isNewValue(MI) && MI.isBranch(); } - bool HexagonInstrInfo::isNewValueJump(unsigned Opcode) const { return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode); } - bool HexagonInstrInfo::isNewValueStore(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask; } - bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask; } - // Returns true if a particular operand is extendable for an instruction. bool HexagonInstrInfo::isOperandExtended(const MachineInstr &MI, unsigned OperandNum) const { @@ -2360,28 +2310,24 @@ bool HexagonInstrInfo::isOperandExtended(const MachineInstr &MI, == OperandNum; } - bool HexagonInstrInfo::isPredicatedNew(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; assert(isPredicated(MI)); return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask; } - bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; assert(isPredicated(Opcode)); return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask; } - bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return !((F >> HexagonII::PredicatedFalsePos) & HexagonII::PredicatedFalseMask); } - bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; // Make sure that the instruction is predicated. @@ -2390,19 +2336,16 @@ bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const { HexagonII::PredicatedFalseMask); } - bool HexagonInstrInfo::isPredicated(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask; } - bool HexagonInstrInfo::isPredicateLate(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return ~(F >> HexagonII::PredicateLatePos) & HexagonII::PredicateLateMask; } - bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; assert(get(Opcode).isBranch() && @@ -2410,7 +2353,6 @@ bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const { return (F >> HexagonII::TakenPos) & HexagonII::TakenMask; } - bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr &MI) const { return MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4 || MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT || @@ -2496,13 +2438,11 @@ bool HexagonInstrInfo::isSignExtendingLoad(const MachineInstr &MI) const { } } - bool HexagonInstrInfo::isSolo(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::SoloPos) & HexagonII::SoloMask; } - bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr &MI) const { switch (MI.getOpcode()) { case Hexagon::STriw_pred : @@ -2513,7 +2453,6 @@ bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr &MI) const { } } - bool HexagonInstrInfo::isTailCall(const MachineInstr &MI) const { if (!MI.isBranch()) return false; @@ -2524,7 +2463,6 @@ bool HexagonInstrInfo::isTailCall(const MachineInstr &MI) const { return false; } - // Returns true when SU has a timing class TC1. bool HexagonInstrInfo::isTC1(const MachineInstr &MI) const { unsigned SchedClass = MI.getDesc().getSchedClass(); @@ -2544,7 +2482,6 @@ bool HexagonInstrInfo::isTC1(const MachineInstr &MI) const { } } - bool HexagonInstrInfo::isTC2(const MachineInstr &MI) const { unsigned SchedClass = MI.getDesc().getSchedClass(); switch (SchedClass) { @@ -2561,7 +2498,6 @@ bool HexagonInstrInfo::isTC2(const MachineInstr &MI) const { } } - bool HexagonInstrInfo::isTC2Early(const MachineInstr &MI) const { unsigned SchedClass = MI.getDesc().getSchedClass(); switch (SchedClass) { @@ -2582,13 +2518,11 @@ bool HexagonInstrInfo::isTC2Early(const MachineInstr &MI) const { } } - bool HexagonInstrInfo::isTC4x(const MachineInstr &MI) const { unsigned SchedClass = MI.getDesc().getSchedClass(); return SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23; } - // Schedule this ASAP. bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr &MI1, const MachineInstr &MI2) const { @@ -2608,13 +2542,11 @@ bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr &MI1, return false; } - bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr &MI) const { const uint64_t V = getType(MI); return HexagonII::TypeCVI_FIRST <= V && V <= HexagonII::TypeCVI_LAST; } - // Check if the Offset is a valid auto-inc imm by Load/Store Type. // bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const { @@ -2653,7 +2585,6 @@ bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const { llvm_unreachable("Not an auto-inc opc!"); } - bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, bool Extend) const { // This function is to check whether the "Offset" is in the correct range of @@ -2808,12 +2739,10 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset, "Please define it in the above switch statement!"); } - bool HexagonInstrInfo::isVecAcc(const MachineInstr &MI) const { return isV60VectorInstruction(MI) && isAccumulator(MI); } - bool HexagonInstrInfo::isVecALU(const MachineInstr &MI) const { const uint64_t F = get(MI.getOpcode()).TSFlags; const uint64_t V = ((F >> HexagonII::TypePos) & HexagonII::TypeMask); @@ -2822,7 +2751,6 @@ bool HexagonInstrInfo::isVecALU(const MachineInstr &MI) const { V == HexagonII::TypeCVI_VA_DV; } - bool HexagonInstrInfo::isVecUsableNextPacket(const MachineInstr &ProdMI, const MachineInstr &ConsMI) const { if (EnableACCForwarding && isVecAcc(ProdMI) && isVecAcc(ConsMI)) @@ -2915,7 +2843,6 @@ bool HexagonInstrInfo::isZeroExtendingLoad(const MachineInstr &MI) const { } } - // Add latency to instruction. bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1, const MachineInstr &MI2) const { @@ -2925,7 +2852,6 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1, return false; } - /// \brief Get the base register and byte offset of a load/store instr. bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI) @@ -2937,7 +2863,6 @@ bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, return BaseReg != 0; } - /// \brief Can these instructions execute at the same time in a bundle. bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First, const MachineInstr &Second) const { @@ -2959,13 +2884,11 @@ bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First, return false; } - bool HexagonInstrInfo::doesNotReturn(const MachineInstr &CallMI) const { unsigned Opc = CallMI.getOpcode(); return Opc == Hexagon::PS_call_nr || Opc == Hexagon::PS_callr_nr; } - bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const { for (auto &I : *B) if (I.isEHLabel()) @@ -2973,7 +2896,6 @@ bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const { return false; } - // Returns true if an instruction can be converted into a non-extended // equivalent instruction. bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr &MI) const { @@ -3011,13 +2933,11 @@ bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr &MI) const { return false; } - bool HexagonInstrInfo::hasPseudoInstrPair(const MachineInstr &MI) const { return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Pseudo) >= 0; } - bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B) const { MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end(); @@ -3029,7 +2949,6 @@ bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B) return false; } - // Returns true, if a LD insn can be promoted to a cur load. bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr &MI) const { auto &HST = MI.getParent()->getParent()->getSubtarget<HexagonSubtarget>(); @@ -3038,14 +2957,12 @@ bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr &MI) const { HST.hasV60TOps(); } - // Returns true, if a ST insn can be promoted to a new-value store. bool HexagonInstrInfo::mayBeNewStore(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::mayNVStorePos) & HexagonII::mayNVStoreMask; } - bool HexagonInstrInfo::producesStall(const MachineInstr &ProdMI, const MachineInstr &ConsMI) const { // There is no stall when ProdMI is not a V60 vector. @@ -3064,7 +2981,6 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &ProdMI, return true; } - bool HexagonInstrInfo::producesStall(const MachineInstr &MI, MachineBasicBlock::const_instr_iterator BII) const { // There is no stall when I is not a V60 vector. @@ -3091,7 +3007,6 @@ bool HexagonInstrInfo::producesStall(const MachineInstr &MI, return false; } - bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI, unsigned PredReg) const { for (unsigned opNum = 0; opNum < MI.getNumOperands(); opNum++) { @@ -3106,7 +3021,6 @@ bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI, return MI.getOpcode() != Hexagon::A4_tlbmatch; } - bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const { return (Opcode == Hexagon::J2_jumpt) || (Opcode == Hexagon::J2_jumpf) || @@ -3116,25 +3030,21 @@ bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const { (Opcode == Hexagon::J2_jumpfnewpt); } - bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const { if (Cond.empty() || !isPredicated(Cond[0].getImm())) return false; return !isPredicatedTrue(Cond[0].getImm()); } - short HexagonInstrInfo::getAbsoluteForm(const MachineInstr &MI) const { return Hexagon::getAbsoluteForm(MI.getOpcode()); } - unsigned HexagonInstrInfo::getAddrMode(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask; } - // Returns the base register in a memory access (load/store). The offset is // returned in Offset and the access size is returned in AccessSize. unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI, @@ -3171,7 +3081,6 @@ unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI, return MI.getOperand(basePos).getReg(); } - /// Return the position of the base and offset operands for this instruction. bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos, unsigned &OffsetPos) const { @@ -3203,7 +3112,6 @@ bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr &MI, return true; } - // Inserts branching instructions in reverse order of their occurrence. // e.g. jump_t t1 (i1) // jump t2 (i2) @@ -3265,24 +3173,20 @@ SmallVector<MachineInstr*, 2> HexagonInstrInfo::getBranchingInstrs( return Jumpers; } - short HexagonInstrInfo::getBaseWithLongOffset(short Opcode) const { if (Opcode < 0) return -1; return Hexagon::getBaseWithLongOffset(Opcode); } - short HexagonInstrInfo::getBaseWithLongOffset(const MachineInstr &MI) const { return Hexagon::getBaseWithLongOffset(MI.getOpcode()); } - short HexagonInstrInfo::getBaseWithRegOffset(const MachineInstr &MI) const { return Hexagon::getBaseWithRegOffset(MI.getOpcode()); } - // Returns Operand Index for the constant extended instruction. unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; @@ -3379,7 +3283,6 @@ HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup( return HexagonII::HCG_None; } - // Returns -1 when there is no opcode found. unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA, const MachineInstr &GB) const { @@ -3398,7 +3301,6 @@ unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA, return -1; } - int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const { enum Hexagon::PredSense inPredSense; inPredSense = invertPredicate ? Hexagon::PredSense_false : @@ -3410,7 +3312,6 @@ int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const { llvm_unreachable("Unexpected predicable instruction"); } - // Return the cur value instruction for a given store. int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const { switch (MI.getOpcode()) { @@ -3428,8 +3329,6 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const { return 0; } - - // The diagram below shows the steps involved in the conversion of a predicated // store instruction to its .new predicated new-value form. // @@ -3509,7 +3408,6 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const { // promoted. Therefore, in case of dependence check failure (due to R5) during // next iteration, it should be converted back to its most basic form. - // Return the new value instruction for a given store. int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const { int NVOpcode = Hexagon::getNewValueOpcode(MI.getOpcode()); @@ -3552,7 +3450,6 @@ int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const { return 0; } - // Returns the opcode to use when converting MI, which is a conditional jump, // into a conditional instruction which uses the .new value of the predicate. // We also use branch probabilities to add a hint to the jump. @@ -3579,7 +3476,6 @@ int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI, } } - // Return .new predicate version for an instruction. int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI, const MachineBranchProbabilityInfo *MBPI) const { @@ -3599,7 +3495,6 @@ int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI, return 0; } - int HexagonInstrInfo::getDotOldOp(const int opc) const { int NewOp = opc; if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form @@ -3615,7 +3510,6 @@ int HexagonInstrInfo::getDotOldOp(const int opc) const { return NewOp; } - // See if instruction could potentially be a duplex candidate. // If so, return its group. Zero otherwise. HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( @@ -3960,12 +3854,10 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup( return HexagonII::HSIG_None; } - short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr &MI) const { return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Real); } - // Return first non-debug instruction in the basic block. MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB) const { @@ -3978,7 +3870,6 @@ MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB) return nullptr; } - unsigned HexagonInstrInfo::getInstrTimingClassLatency( const InstrItineraryData *ItinData, const MachineInstr &MI) const { // Default to one cycle for no itinerary. However, an "empty" itinerary may @@ -4000,7 +3891,6 @@ unsigned HexagonInstrInfo::getInstrTimingClassLatency( return Latency; } - // inverts the predication logic. // p -> NotP // NotP -> P @@ -4013,7 +3903,6 @@ bool HexagonInstrInfo::getInvertedPredSense( return true; } - unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { int InvPredOpcode; InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc) @@ -4024,7 +3913,6 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const { llvm_unreachable("Unexpected predicated instruction"); } - // Returns the max value that doesn't need to be extended. int HexagonInstrInfo::getMaxValue(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; @@ -4039,13 +3927,11 @@ int HexagonInstrInfo::getMaxValue(const MachineInstr &MI) const { return ~(-1U << bits); } - unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::MemAccessSizePos) & HexagonII::MemAccesSizeMask; } - // Returns the min value that doesn't need to be extended. int HexagonInstrInfo::getMinValue(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; @@ -4060,7 +3946,6 @@ int HexagonInstrInfo::getMinValue(const MachineInstr &MI) const { return 0; } - // Returns opcode of the non-extended equivalent instruction. short HexagonInstrInfo::getNonExtOpcode(const MachineInstr &MI) const { // Check if the instruction has a register form that uses register in place @@ -4086,7 +3971,6 @@ short HexagonInstrInfo::getNonExtOpcode(const MachineInstr &MI) const { return -1; } - bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond, unsigned &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const { if (Cond.empty()) @@ -4107,17 +3991,14 @@ bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond, return true; } - short HexagonInstrInfo::getPseudoInstrPair(const MachineInstr &MI) const { return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Pseudo); } - short HexagonInstrInfo::getRegForm(const MachineInstr &MI) const { return Hexagon::getRegForm(MI.getOpcode()); } - // Return the number of bytes required to encode the instruction. // Hexagon instructions are fixed length, 4 bytes, unless they // use a constant extender, which requires another 4 bytes. @@ -4156,13 +4037,11 @@ unsigned HexagonInstrInfo::getSize(const MachineInstr &MI) const { return Size; } - uint64_t HexagonInstrInfo::getType(const MachineInstr &MI) const { const uint64_t F = MI.getDesc().TSFlags; return (F >> HexagonII::TypePos) & HexagonII::TypeMask; } - unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const { const TargetSubtargetInfo &ST = MI.getParent()->getParent()->getSubtarget(); const InstrItineraryData &II = *ST.getInstrItineraryData(); @@ -4171,19 +4050,16 @@ unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const { return IS.getUnits(); } - unsigned HexagonInstrInfo::getValidSubTargets(const unsigned Opcode) const { const uint64_t F = get(Opcode).TSFlags; return (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask; } - // Calculate size of the basic block without debug instructions. unsigned HexagonInstrInfo::nonDbgBBSize(const MachineBasicBlock *BB) const { return nonDbgMICount(BB->instr_begin(), BB->instr_end()); } - unsigned HexagonInstrInfo::nonDbgBundleSize( MachineBasicBlock::const_iterator BundleHead) const { assert(BundleHead->isBundle() && "Not a bundle header"); @@ -4192,7 +4068,6 @@ unsigned HexagonInstrInfo::nonDbgBundleSize( return nonDbgMICount(++MII, getBundleEnd(BundleHead.getInstrIterator())); } - /// immediateExtend - Changes the instruction in place to one using an immediate /// extender. void HexagonInstrInfo::immediateExtend(MachineInstr &MI) const { @@ -4208,7 +4083,6 @@ void HexagonInstrInfo::immediateExtend(MachineInstr &MI) const { MO.addTargetFlag(HexagonII::HMOTF_ConstExtended); } - bool HexagonInstrInfo::invertAndChangeJumpTarget( MachineInstr &MI, MachineBasicBlock *NewTarget) const { DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to BB#" @@ -4229,7 +4103,6 @@ bool HexagonInstrInfo::invertAndChangeJumpTarget( return true; } - void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const { /* +++ The code below is used to generate complete set of Hexagon Insn +++ */ MachineFunction::iterator A = MF.begin(); @@ -4248,7 +4121,6 @@ void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const { /* --- The code above is used to generate complete set of Hexagon Insn --- */ } - // inverts the predication logic. // p -> NotP // NotP -> P @@ -4258,7 +4130,6 @@ bool HexagonInstrInfo::reversePredSense(MachineInstr &MI) const { return true; } - // Reverse the branch prediction. unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const { int PredRevOpcode = -1; @@ -4270,14 +4141,12 @@ unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const { return PredRevOpcode; } - // TODO: Add more rigorous validation. bool HexagonInstrInfo::validateBranchCond(const ArrayRef<MachineOperand> &Cond) const { return Cond.empty() || (Cond[0].isImm() && (Cond.size() != 1)); } - short HexagonInstrInfo::xformRegToImmOffset(const MachineInstr &MI) const { return Hexagon::xformRegToImmOffset(MI.getOpcode()); } diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 2d184d1484e9..2358d4b7e4c0 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -16,9 +16,14 @@ #include "HexagonRegisterInfo.h" #include "MCTargetDesc/HexagonBaseInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/Target/TargetInstrInfo.h" +#include <cstdint> +#include <vector> #define GET_INSTRINFO_HEADER #include "HexagonGenInstrInfo.inc" @@ -29,9 +34,10 @@ struct EVT; class HexagonSubtarget; class HexagonInstrInfo : public HexagonGenInstrInfo { - virtual void anchor(); const HexagonRegisterInfo RI; + virtual void anchor(); + public: explicit HexagonInstrInfo(HexagonSubtarget &ST); @@ -260,7 +266,7 @@ public: /// PredCost. unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr &MI, - unsigned *PredCost = 0) const override; + unsigned *PredCost = nullptr) const override; /// Create machine specific model for scheduling. DFAPacketizer * @@ -378,7 +384,6 @@ public: bool PredOpcodeHasJMP_c(unsigned Opcode) const; bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const; - short getAbsoluteForm(const MachineInstr &MI) const; unsigned getAddrMode(const MachineInstr &MI) const; unsigned getBaseAndOffset(const MachineInstr &MI, int &Offset, @@ -421,13 +426,11 @@ public: unsigned getUnits(const MachineInstr &MI) const; unsigned getValidSubTargets(const unsigned Opcode) const; - /// getInstrTimingClassLatency - Compute the instruction latency of a given /// instruction using Timing Class information, if available. unsigned nonDbgBBSize(const MachineBasicBlock *BB) const; unsigned nonDbgBundleSize(MachineBasicBlock::const_iterator BundleHead) const; - void immediateExtend(MachineInstr &MI) const; bool invertAndChangeJumpTarget(MachineInstr &MI, MachineBasicBlock* NewTarget) const; @@ -438,6 +441,6 @@ public: short xformRegToImmOffset(const MachineInstr &MI) const; }; -} +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONINSTRINFO_H diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h index 371b52108b9b..d83bcbc41553 100644 --- a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h +++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h @@ -15,33 +15,31 @@ namespace llvm { - namespace Hexagon { +namespace Hexagon { + const unsigned int StartPacket = 0x1; const unsigned int EndPacket = 0x2; - } +} // end namespace Hexagon /// Hexagon target-specific information for each MachineFunction. class HexagonMachineFunctionInfo : public MachineFunctionInfo { // SRetReturnReg - Some subtargets require that sret lowering includes // returning the value of the returned struct in a register. This field // holds the virtual register into which the sret argument is passed. - unsigned SRetReturnReg; - unsigned StackAlignBaseVReg; // Aligned-stack base register (virtual) - unsigned StackAlignBasePhysReg; // (physical) + unsigned SRetReturnReg = 0; + unsigned StackAlignBaseVReg = 0; // Aligned-stack base register (virtual) + unsigned StackAlignBasePhysReg = 0; // (physical) int VarArgsFrameIndex; - bool HasClobberLR; - bool HasEHReturn; + bool HasClobberLR = false; + bool HasEHReturn = false; std::map<const MachineInstr*, unsigned> PacketInfo; virtual void anchor(); public: - HexagonMachineFunctionInfo() : SRetReturnReg(0), StackAlignBaseVReg(0), - StackAlignBasePhysReg(0), HasClobberLR(0), HasEHReturn(false) {} + HexagonMachineFunctionInfo() = default; - HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0), - StackAlignBaseVReg(0), StackAlignBasePhysReg(0), HasClobberLR(0), - HasEHReturn(false) {} + HexagonMachineFunctionInfo(MachineFunction &MF) {} unsigned getSRetReturnReg() const { return SRetReturnReg; } void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; } @@ -75,6 +73,7 @@ public: void setStackAlignBasePhysReg(unsigned R) { StackAlignBasePhysReg = R; } unsigned getStackAlignBasePhysReg() const { return StackAlignBasePhysReg; } }; -} // End llvm namespace -#endif +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONMACHINEFUNCTIONINFO_H diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp index e902f600e881..c9c4f95dbaaa 100644 --- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp +++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp @@ -10,17 +10,27 @@ // This file contains the declarations of the HexagonTargetAsmInfo properties. // //===----------------------------------------------------------------------===// + #define DEBUG_TYPE "hexagon-sdata" -#include "HexagonTargetMachine.h" #include "HexagonTargetObjectFile.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Type.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/SectionKind.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -44,13 +54,21 @@ static cl::opt<bool> TraceGVPlacement("trace-gv-placement", // (e.g. -debug and -debug-only=globallayout) #define TRACE_TO(s, X) s << X #ifdef NDEBUG -#define TRACE(X) do { if (TraceGVPlacement) { TRACE_TO(errs(), X); } } while (0) +#define TRACE(X) \ + do { \ + if (TraceGVPlacement) { \ + TRACE_TO(errs(), X); \ + } \ + } while (false) #else -#define TRACE(X) \ - do { \ - if (TraceGVPlacement) { TRACE_TO(errs(), X); } \ - else { DEBUG( TRACE_TO(dbgs(), X) ); } \ - } while (0) +#define TRACE(X) \ + do { \ + if (TraceGVPlacement) { \ + TRACE_TO(errs(), X); \ + } else { \ + DEBUG(TRACE_TO(dbgs(), X)); \ + } \ + } while (false) #endif // Returns true if the section name is such that the symbol will be put @@ -69,7 +87,6 @@ static bool isSmallDataSection(StringRef Sec) { Sec.find(".scommon.") != StringRef::npos; } - static const char *getSectionSuffixForSize(unsigned Size) { switch (Size) { default: @@ -163,7 +180,6 @@ MCSection *HexagonTargetObjectFile::getExplicitSectionGlobal( return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, Kind, TM); } - /// Return true if this global value should be placed into small data/bss /// section. bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO, @@ -232,17 +248,14 @@ bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO, return true; } - bool HexagonTargetObjectFile::isSmallDataEnabled() const { return SmallDataThreshold > 0; } - unsigned HexagonTargetObjectFile::getSmallDataSize() const { return SmallDataThreshold; } - /// Descends any type down to "elementary" components, /// discovering the smallest addressable one. /// If zero is returned, declaration will not be modified. diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp index 5feaffe6efb9..9a09a17767a6 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp @@ -1,5 +1,4 @@ - -//=== HexagonMCCompound.cpp - Hexagon Compound checker -------===// +//=== HexagonMCCompound.cpp - Hexagon Compound checker -------------------===// // // The LLVM Compiler Infrastructure // @@ -11,18 +10,17 @@ // This file is looks at a packet and tries to form compound insns // //===----------------------------------------------------------------------===// + #include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" -#include "MCTargetDesc/HexagonMCShuffler.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/MC/MCAssembler.h" +#include "MCTargetDesc/HexagonMCInstrInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstdint> using namespace llvm; using namespace Hexagon; @@ -79,8 +77,7 @@ static const unsigned cmpgtn1BitOpcode[8] = { }; // enum HexagonII::CompoundGroup -namespace { -unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { +static unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { unsigned DstReg, SrcReg, Src1Reg, Src2Reg; switch (MI.getOpcode()) { @@ -173,11 +170,9 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) { return HexagonII::HCG_None; } -} /// getCompoundOp - Return the index from 0-7 into the above opcode lists. -namespace { -unsigned getCompoundOp(MCInst const &HMCI) { +static unsigned getCompoundOp(MCInst const &HMCI) { const MCOperand &Predicate = HMCI.getOperand(0); unsigned PredReg = Predicate.getReg(); @@ -198,11 +193,10 @@ unsigned getCompoundOp(MCInst const &HMCI) { return (PredReg == Hexagon::P0) ? tp0_jump_t : tp1_jump_t; } } -} -namespace { -MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { - MCInst *CompoundInsn = 0; +static MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, + MCInst const &R) { + MCInst *CompoundInsn = nullptr; unsigned compoundOpcode; MCOperand Rs, Rt; int64_t Value; @@ -336,12 +330,10 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) { return CompoundInsn; } -} /// Non-Symmetrical. See if these two instructions are fit for compound pair. -namespace { -bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA, - MCInst const &MIb, bool IsExtendedB) { +static bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA, + MCInst const &MIb, bool IsExtendedB) { unsigned MIaG = getCompoundCandidateGroup(MIa, IsExtendedA); unsigned MIbG = getCompoundCandidateGroup(MIb, IsExtendedB); // We have two candidates - check that this is the same register @@ -353,10 +345,9 @@ bool isOrderedCompoundPair(MCInst const &MIa, bool IsExtendedA, return ((MIaG == HexagonII::HCG_A && MIbG == HexagonII::HCG_B) && (MIa.getOperand(0).getReg() == MIb.getOperand(0).getReg())); } -} -namespace { -bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { +static bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, + MCInst &MCI) { assert(HexagonMCInstrInfo::isBundle(MCI)); bool JExtended = false; for (MCInst::iterator J = @@ -367,8 +358,7 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { JExtended = true; continue; } - if (llvm::HexagonMCInstrInfo::getType(MCII, *JumpInst) == - HexagonII::TypeJ) { + if (HexagonMCInstrInfo::getType(MCII, *JumpInst) == HexagonII::TypeJ) { // Try to pair with another insn (B)undled with jump. bool BExtended = false; for (MCInst::iterator B = @@ -401,7 +391,6 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) { } return false; } -} /// tryCompound - Given a bundle check for compound insns when one /// is found update the contents fo the bundle with the compound insn. @@ -420,6 +409,4 @@ void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII, // a compound is found. while (lookForCompound(MCII, Context, MCI)) ; - - return; } diff --git a/lib/Target/Hexagon/RDFCopy.h b/lib/Target/Hexagon/RDFCopy.h index 517f17cc9c64..5ece11bd5ce4 100644 --- a/lib/Target/Hexagon/RDFCopy.h +++ b/lib/Target/Hexagon/RDFCopy.h @@ -1,4 +1,4 @@ -//===--- RDFCopy.h --------------------------------------------------------===// +//===--- RDFCopy.h ----------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,23 +7,26 @@ // //===----------------------------------------------------------------------===// -#ifndef RDF_COPY_H -#define RDF_COPY_H +#ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H +#define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H #include "RDFGraph.h" #include <map> #include <vector> namespace llvm { + class MachineBasicBlock; class MachineDominatorTree; class MachineInstr; namespace rdf { + struct CopyPropagation { CopyPropagation(DataFlowGraph &dfg) : MDT(dfg.getDT()), DFG(dfg), Trace(false) {} - virtual ~CopyPropagation() {} + + virtual ~CopyPropagation() = default; bool run(); void trace(bool On) { Trace = On; } @@ -49,7 +52,9 @@ namespace rdf { void updateMap(NodeAddr<InstrNode*> IA); bool scanBlock(MachineBasicBlock *B); }; -} // namespace rdf -} // namespace llvm -#endif +} // end namespace rdf + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp index 33c3f03790f3..fa272ea1a76a 100644 --- a/lib/Target/Hexagon/RDFGraph.cpp +++ b/lib/Target/Hexagon/RDFGraph.cpp @@ -10,16 +10,31 @@ // Target-independent, SSA-based data flow graph for register data flow (RDF). // #include "RDFGraph.h" - #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineDominanceFrontier.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <utility> +#include <vector> using namespace llvm; using namespace rdf; @@ -88,14 +103,12 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) { return OS; } -namespace { - void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA, - const DataFlowGraph &G) { - OS << Print<NodeId>(RA.Id, G) << '<' - << Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>'; - if (RA.Addr->getFlags() & NodeAttrs::Fixed) - OS << '!'; - } +static void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA, + const DataFlowGraph &G) { + OS << Print<NodeId>(RA.Id, G) << '<' + << Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>'; + if (RA.Addr->getFlags() & NodeAttrs::Fixed) + OS << '!'; } template<> @@ -183,9 +196,11 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) { } namespace { + template <typename T> struct PrintListV { PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {} + typedef T Type; const NodeList &List; const DataFlowGraph &G; @@ -201,7 +216,8 @@ namespace { } return OS; } -} + +} // end anonymous namespace template<> raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) { @@ -219,10 +235,10 @@ raw_ostream &operator<< (raw_ostream &OS, // Print the target for calls and branches (for readability). if (MI.isCall() || MI.isBranch()) { MachineInstr::const_mop_iterator T = - find_if(MI.operands(), - [] (const MachineOperand &Op) -> bool { - return Op.isMBB() || Op.isGlobal() || Op.isSymbol(); - }); + llvm::find_if(MI.operands(), + [] (const MachineOperand &Op) -> bool { + return Op.isMBB() || Op.isGlobal() || Op.isSymbol(); + }); if (T != MI.operands_end()) { OS << ' '; if (T->isMBB()) @@ -327,8 +343,8 @@ raw_ostream &operator<< (raw_ostream &OS, return OS; } -} // namespace rdf -} // namespace llvm +} // end namespace rdf +} // end namespace llvm // Node allocation functions. // @@ -390,7 +406,6 @@ void NodeAllocator::clear() { ActiveEnd = nullptr; } - // Insert node NA after "this" in the circular chain. void NodeBase::append(NodeAddr<NodeBase*> NA) { NodeId Nx = Next; @@ -401,7 +416,6 @@ void NodeBase::append(NodeAddr<NodeBase*> NA) { } } - // Fundamental node manipulator functions. // Obtain the register reference from a reference node. @@ -590,7 +604,6 @@ NodeAddr<BlockNode*> FuncNode::getEntryBlock(const DataFlowGraph &G) { return findBlock(EntryB, G); } - // Target operand information. // @@ -641,7 +654,6 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum) return false; } - RegisterRef RegisterAggr::normalize(RegisterRef RR) const { RegisterId SuperReg = RR.Reg; while (true) { @@ -745,7 +757,6 @@ void RegisterAggr::print(raw_ostream &OS) const { OS << " }"; } - // // The data flow graph construction. // @@ -753,10 +764,9 @@ void RegisterAggr::print(raw_ostream &OS) const { DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii, const TargetRegisterInfo &tri, const MachineDominatorTree &mdt, const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi) - : LMI(), MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) { + : MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) { } - // The implementation of the definition stack. // Each register reference has its own definition stack. In particular, // for a register references "Reg" and "Reg:subreg" will each have their @@ -845,7 +855,6 @@ unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const { return P; } - // Register information. // Get the list of references aliased to RR. Lane masks are ignored. @@ -915,7 +924,6 @@ NodeAddr<NodeBase*> DataFlowGraph::cloneNode(const NodeAddr<NodeBase*> B) { return NA; } - // Allocation routines for specific node types/kinds. NodeAddr<UseNode*> DataFlowGraph::newUse(NodeAddr<InstrNode*> Owner, @@ -1248,7 +1256,6 @@ bool DataFlowGraph::alias(RegisterRef RA, RegisterRef RB) const { return false; } - // Clear all information in the graph. void DataFlowGraph::reset() { Memory.clear(); @@ -1256,7 +1263,6 @@ void DataFlowGraph::reset() { Func = NodeAddr<FuncNode*>(); } - // Return the next reference node in the instruction node IA that is related // to RA. Conceptually, two reference nodes are related if they refer to the // same instance of a register access, but differ in flags or other minor diff --git a/lib/Target/Hexagon/RDFGraph.h b/lib/Target/Hexagon/RDFGraph.h index 871062ff2b05..49d78a8b22b5 100644 --- a/lib/Target/Hexagon/RDFGraph.h +++ b/lib/Target/Hexagon/RDFGraph.h @@ -1,4 +1,4 @@ -//===--- RDFGraph.h -------------------------------------------------------===// +//===--- RDFGraph.h ---------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -221,20 +221,25 @@ // The statement s5 has two use nodes for t0: u7" and u9". The quotation // mark " indicates that the node is a shadow. // -#ifndef RDF_GRAPH_H -#define RDF_GRAPH_H + +#ifndef LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H +#define LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/Support/Allocator.h" -#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Timer.h" #include "llvm/Target/TargetRegisterInfo.h" - +#include <cassert> +#include <cstdint> +#include <cstring> #include <functional> #include <map> #include <set> #include <unordered_map> +#include <utility> #include <vector> // RDF uses uint32_t to refer to registers. This is to ensure that the type @@ -243,6 +248,7 @@ static_assert(sizeof(uint32_t) == sizeof(unsigned), "Those should be equal"); namespace llvm { + class MachineBasicBlock; class MachineFunction; class MachineInstr; @@ -252,6 +258,7 @@ namespace llvm { class TargetInstrInfo; namespace rdf { + typedef uint32_t NodeId; typedef uint32_t RegisterId; @@ -293,9 +300,11 @@ namespace rdf { static uint16_t set_type(uint16_t A, uint16_t T) { return (A & ~TypeMask) | T; } + static uint16_t set_kind(uint16_t A, uint16_t K) { return (A & ~KindMask) | K; } + static uint16_t set_flags(uint16_t A, uint16_t F) { return (A & ~FlagMask) | F; } @@ -326,9 +335,14 @@ namespace rdf { }; template <typename T> struct NodeAddr { - NodeAddr() : Addr(nullptr), Id(0) {} + NodeAddr() : Addr(nullptr) {} NodeAddr(T A, NodeId I) : Addr(A), Id(I) {} + // Type cast (casting constructor). The reason for having this class + // instead of std::pair. + template <typename S> NodeAddr(const NodeAddr<S> &NA) + : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {} + bool operator== (const NodeAddr<T> &NA) const { assert((Addr == NA.Addr) == (Id == NA.Id)); return Addr == NA.Addr; @@ -336,13 +350,9 @@ namespace rdf { bool operator!= (const NodeAddr<T> &NA) const { return !operator==(NA); } - // Type cast (casting constructor). The reason for having this class - // instead of std::pair. - template <typename S> NodeAddr(const NodeAddr<S> &NA) - : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {} T Addr; - NodeId Id; + NodeId Id = 0; }; struct NodeBase; @@ -366,17 +376,20 @@ namespace rdf { struct NodeAllocator { // Amount of storage for a single node. enum { NodeMemSize = 32 }; + NodeAllocator(uint32_t NPB = 4096) : NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)), - IndexMask((1 << BitsPerIndex)-1), ActiveEnd(nullptr) { + IndexMask((1 << BitsPerIndex)-1) { assert(isPowerOf2_32(NPB)); } + NodeBase *ptr(NodeId N) const { uint32_t N1 = N-1; uint32_t BlockN = N1 >> BitsPerIndex; uint32_t Offset = (N1 & IndexMask) * NodeMemSize; return reinterpret_cast<NodeBase*>(Blocks[BlockN]+Offset); } + NodeId id(const NodeBase *P) const; NodeAddr<NodeBase*> New(); void clear(); @@ -384,6 +397,7 @@ namespace rdf { private: void startNewBlock(); bool needNewBlock(); + uint32_t makeId(uint32_t Block, uint32_t Index) const { // Add 1 to the id, to avoid the id of 0, which is treated as "null". return ((Block << BitsPerIndex) | Index) + 1; @@ -392,7 +406,7 @@ namespace rdf { const uint32_t NodesPerBlock; const uint32_t BitsPerIndex; const uint32_t IndexMask; - char *ActiveEnd; + char *ActiveEnd = nullptr; std::vector<char*> Blocks; typedef BumpPtrAllocatorImpl<MallocAllocator, 65536> AllocatorTy; AllocatorTy MemPool; @@ -405,6 +419,7 @@ namespace rdf { RegisterRef() : RegisterRef(0) {} explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll()) : Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {} + operator bool() const { return Reg != 0 && Mask.any(); } bool operator== (const RegisterRef &RR) const { return Reg == RR.Reg && Mask == RR.Mask; @@ -420,7 +435,8 @@ namespace rdf { struct TargetOperandInfo { TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {} - virtual ~TargetOperandInfo() {} + virtual ~TargetOperandInfo() = default; + virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const; virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const; virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const; @@ -428,7 +444,6 @@ namespace rdf { const TargetInstrInfo &TII; }; - // Packed register reference. Only used for storage. struct PackedRegisterRef { RegisterId Reg; @@ -442,11 +457,13 @@ namespace rdf { template <typename T, unsigned N = 32> struct IndexedSet { IndexedSet() : Map() { Map.reserve(N); } + T get(uint32_t Idx) const { // Index Idx corresponds to Map[Idx-1]. assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size()); return Map[Idx-1]; } + uint32_t insert(T Val) { // Linear search. auto F = llvm::find(Map, Val); @@ -455,11 +472,13 @@ namespace rdf { Map.push_back(Val); return Map.size(); // Return actual_index + 1. } + uint32_t find(T Val) const { auto F = llvm::find(Map, Val); assert(F != Map.end()); return F - Map.begin(); } + private: std::vector<T> Map; }; @@ -478,12 +497,14 @@ namespace rdf { assert(LM.any()); return LM.all() ? 0 : find(LM); } + PackedRegisterRef pack(RegisterRef RR) { return { RR.Reg, getIndexForLaneMask(RR.Mask) }; } PackedRegisterRef pack(RegisterRef RR) const { return { RR.Reg, getIndexForLaneMask(RR.Mask) }; } + RegisterRef unpack(PackedRegisterRef PR) const { return RegisterRef(PR.Reg, getLaneMaskForIndex(PR.MaskId)); } @@ -491,11 +512,8 @@ namespace rdf { struct RegisterAggr { RegisterAggr(const TargetRegisterInfo &tri) - : Masks(), ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false), - TRI(tri) {} - RegisterAggr(const RegisterAggr &RG) - : Masks(RG.Masks), ExpAliasUnits(RG.ExpAliasUnits), - CheckUnits(RG.CheckUnits), TRI(RG.TRI) {} + : ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false), TRI(tri) {} + RegisterAggr(const RegisterAggr &RG) = default; bool empty() const { return Masks.empty(); } bool hasAliasOf(RegisterRef RR) const; @@ -530,11 +548,11 @@ namespace rdf { const TargetRegisterInfo &TRI; }; - struct NodeBase { public: // Make sure this is a POD. NodeBase() = default; + uint16_t getType() const { return NodeAttrs::type(Attrs); } uint16_t getKind() const { return NodeAttrs::kind(Attrs); } uint16_t getFlags() const { return NodeAttrs::flags(Attrs); } @@ -596,29 +614,36 @@ namespace rdf { struct RefNode : public NodeBase { RefNode() = default; + RegisterRef getRegRef(const DataFlowGraph &G) const; + MachineOperand &getOp() { assert(!(getFlags() & NodeAttrs::PhiRef)); return *Ref.Op; } + void setRegRef(RegisterRef RR, DataFlowGraph &G); void setRegRef(MachineOperand *Op, DataFlowGraph &G); + NodeId getReachingDef() const { return Ref.RD; } void setReachingDef(NodeId RD) { Ref.RD = RD; } + NodeId getSibling() const { return Ref.Sib; } void setSibling(NodeId Sib) { Ref.Sib = Sib; } + bool isUse() const { assert(getType() == NodeAttrs::Ref); return getKind() == NodeAttrs::Use; } + bool isDef() const { assert(getType() == NodeAttrs::Ref); return getKind() == NodeAttrs::Def; @@ -702,6 +727,7 @@ namespace rdf { MachineBasicBlock *getCode() const { return CodeNode::getCode<MachineBasicBlock*>(); } + void addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G); }; @@ -709,6 +735,7 @@ namespace rdf { MachineFunction *getCode() const { return CodeNode::getCode<MachineFunction*>(); } + NodeAddr<BlockNode*> findBlock(const MachineBasicBlock *BB, const DataFlowGraph &G) const; NodeAddr<BlockNode*> getEntryBlock(const DataFlowGraph &G); @@ -723,6 +750,7 @@ namespace rdf { template <typename T> T ptr(NodeId N) const { return static_cast<T>(ptr(N)); } + NodeId id(const NodeBase *P) const; template <typename T> NodeAddr<T> addr(NodeId N) const { @@ -738,13 +766,17 @@ namespace rdf { struct DefStack { DefStack() = default; + bool empty() const { return Stack.empty() || top() == bottom(); } + private: typedef NodeAddr<DefNode*> value_type; struct Iterator { typedef DefStack::value_type value_type; + Iterator &up() { Pos = DS.nextUp(Pos); return *this; } Iterator &down() { Pos = DS.nextDown(Pos); return *this; } + value_type operator*() const { assert(Pos >= 1); return DS.Stack[Pos-1]; @@ -755,14 +787,17 @@ namespace rdf { } bool operator==(const Iterator &It) const { return Pos == It.Pos; } bool operator!=(const Iterator &It) const { return Pos != It.Pos; } + private: Iterator(const DefStack &S, bool Top); + // Pos-1 is the index in the StorageType object that corresponds to // the top of the DefStack. const DefStack &DS; unsigned Pos; friend struct DefStack; }; + public: typedef Iterator iterator; iterator top() const { return Iterator(*this, true); } @@ -773,14 +808,18 @@ namespace rdf { void pop(); void start_block(NodeId N); void clear_block(NodeId N); + private: friend struct Iterator; typedef std::vector<value_type> StorageType; + bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const { return (P.Addr == nullptr) && (N == 0 || P.Id == N); } + unsigned nextUp(unsigned P) const; unsigned nextDown(unsigned P) const; + StorageType Stack; }; @@ -819,6 +858,7 @@ namespace rdf { if (RemoveFromOwner) removeFromOwner(UA); } + void unlinkDef(NodeAddr<DefNode*> DA, bool RemoveFromOwner) { unlinkDefDF(DA); if (RemoveFromOwner) @@ -831,23 +871,28 @@ namespace rdf { return BA.Addr->getType() == NodeAttrs::Ref && BA.Addr->getKind() == Kind; } + template <uint16_t Kind> static bool IsCode(const NodeAddr<NodeBase*> BA) { return BA.Addr->getType() == NodeAttrs::Code && BA.Addr->getKind() == Kind; } + static bool IsDef(const NodeAddr<NodeBase*> BA) { return BA.Addr->getType() == NodeAttrs::Ref && BA.Addr->getKind() == NodeAttrs::Def; } + static bool IsUse(const NodeAddr<NodeBase*> BA) { return BA.Addr->getType() == NodeAttrs::Ref && BA.Addr->getKind() == NodeAttrs::Use; } + static bool IsPhi(const NodeAddr<NodeBase*> BA) { return BA.Addr->getType() == NodeAttrs::Code && BA.Addr->getKind() == NodeAttrs::Phi; } + static bool IsPreservingDef(const NodeAddr<DefNode*> DA) { uint16_t Flags = DA.Addr->getFlags(); return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef); @@ -902,6 +947,7 @@ namespace rdf { void unlinkUseDF(NodeAddr<UseNode*> UA); void unlinkDefDF(NodeAddr<DefNode*> DA); + void removeFromOwner(NodeAddr<RefNode*> RA) { NodeAddr<InstrNode*> IA = RA.Addr->getOwner(*this); IA.Addr->removeMember(RA, *this); @@ -967,7 +1013,6 @@ namespace rdf { return MM; } - // Optionally print the lane mask, if it is not ~0. struct PrintLaneMaskOpt { PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {} @@ -991,7 +1036,9 @@ namespace rdf { PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g) : Print<NodeAddr<T>>(x, g) {} }; -} // namespace rdf -} // namespace llvm -#endif // RDF_GRAPH_H +} // end namespace rdf + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_HEXAGON_RDFGRAPH_H diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 6f0fdddd7d55..92d3c001df94 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -28,6 +28,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" +#include "llvm/IR/Dominators.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -43,6 +44,11 @@ bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { return MipsDAGToDAGISel::runOnMachineFunction(MF); } +void MipsSEDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTreeWrapperPass>(); + SelectionDAGISel::getAnalysisUsage(AU); +} + void MipsSEDAGToDAGISel::addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI, MachineFunction &MF) { MachineInstrBuilder MIB(MF, &MI); diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h index 2a8e5877e848..f89a350cab04 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -28,6 +28,8 @@ private: bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI, MachineFunction &MF); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index aa3ffde24b99..2b9195b095e1 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3981,40 +3981,46 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, static bool isFunctionGlobalAddress(SDValue Callee); static bool -resideInSameModule(SDValue Callee, Reloc::Model RelMod) { +resideInSameSection(const Function *Caller, SDValue Callee, + const TargetMachine &TM) { // If !G, Callee can be an external symbol. GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); - if (!G) return false; + if (!G) + return false; const GlobalValue *GV = G->getGlobal(); - - if (GV->isDeclaration()) return false; - - switch(GV->getLinkage()) { - default: llvm_unreachable("unknow linkage type"); - case GlobalValue::AvailableExternallyLinkage: - case GlobalValue::ExternalWeakLinkage: + if (!GV->isStrongDefinitionForLinker()) return false; - // Callee with weak linkage is allowed if it has hidden or protected - // visibility - case GlobalValue::LinkOnceAnyLinkage: - case GlobalValue::LinkOnceODRLinkage: // e.g. c++ inline functions - case GlobalValue::WeakAnyLinkage: - case GlobalValue::WeakODRLinkage: // e.g. c++ template instantiation - if (GV->hasDefaultVisibility()) + // Any explicitly-specified sections and section prefixes must also match. + // Also, if we're using -ffunction-sections, then each function is always in + // a different section (the same is true for COMDAT functions). + if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() || + GV->getSection() != Caller->getSection()) + return false; + if (const auto *F = dyn_cast<Function>(GV)) { + if (F->getSectionPrefix() != Caller->getSectionPrefix()) return false; - - case GlobalValue::ExternalLinkage: - case GlobalValue::InternalLinkage: - case GlobalValue::PrivateLinkage: - break; } - // With '-fPIC', calling default visiblity function need insert 'nop' after - // function call, no matter that function resides in same module or not, so - // we treat it as in different module. - if (RelMod == Reloc::PIC_ && GV->hasDefaultVisibility()) + // If the callee might be interposed, then we can't assume the ultimate call + // target will be in the same section. Even in cases where we can assume that + // interposition won't happen, in any case where the linker might insert a + // stub to allow for interposition, we must generate code as though + // interposition might occur. To understand why this matters, consider a + // situation where: a -> b -> c where the arrows indicate calls. b and c are + // in the same section, but a is in a different module (i.e. has a different + // TOC base pointer). If the linker allows for interposition between b and c, + // then it will generate a stub for the call edge between b and c which will + // save the TOC pointer into the designated stack slot allocated by b. If we + // return true here, and therefore allow a tail call between b and c, that + // stack slot won't exist and the b -> c stub will end up saving b'c TOC base + // pointer into the stack slot allocated by a (where the a -> b stub saved + // a's TOC base pointer). If we're not considering a tail call, but rather, + // whether a nop is needed after the call instruction in b, because the linker + // will insert a stub, it might complain about a missing nop if we omit it + // (although many don't complain in this case). + if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV)) return false; return true; @@ -4130,11 +4136,11 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( !isa<ExternalSymbolSDNode>(Callee)) return false; - // Check if Callee resides in the same module, because for now, PPC64 SVR4 ABI - // (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another - // module. + // Check if Callee resides in the same section, because for now, PPC64 SVR4 + // ABI (ELFv1/ELFv2) doesn't allow tail calls to a symbol resides in another + // section. // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 - if (!resideInSameModule(Callee, getTargetMachine().getRelocationModel())) + if (!resideInSameSection(MF.getFunction(), Callee, getTargetMachine())) return false; // TCO allows altering callee ABI, so we don't have to check further. @@ -4592,14 +4598,6 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, return CallOpc; } -static -bool isLocalCall(const SDValue &Callee) -{ - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - return G->getGlobal()->isStrongDefinitionForLinker(); - return false; -} - SDValue PPCTargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, @@ -4701,6 +4699,7 @@ SDValue PPCTargetLowering::FinishCall( // stack frame. If caller and callee belong to the same module (and have the // same TOC), the NOP will remain unchanged. + MachineFunction &MF = DAG.getMachineFunction(); if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && !isPatchPoint) { if (CallOpc == PPCISD::BCTRL) { @@ -4724,11 +4723,11 @@ SDValue PPCTargetLowering::FinishCall( // The address needs to go after the chain input but before the flag (or // any other variadic arguments). Ops.insert(std::next(Ops.begin()), AddTOC); - } else if ((CallOpc == PPCISD::CALL) && - (!isLocalCall(Callee) || - DAG.getTarget().getRelocationModel() == Reloc::PIC_)) + } else if (CallOpc == PPCISD::CALL && + !resideInSameSection(MF.getFunction(), Callee, DAG.getTarget())) { // Otherwise insert NOP for non-local calls. CallOpc = PPCISD::CALL_NOP; + } } Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index d42e1187ce64..e1825ca1eda1 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -70,7 +70,7 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { EmitFunctionBody(); // Emit the XRay table for this function. - EmitXRayTable(); + emitXRayTable(); // We didn't modify anything. return false; diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 1deefe1231ca..cd690442bb9f 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -373,6 +373,10 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI; MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr : std::next(MBBI); + PI = skipDebugInstructionsBackward(PI, MBB.begin()); + if (NI != nullptr) + NI = skipDebugInstructionsForward(NI, MBB.end()); + unsigned Opc = PI->getOpcode(); int Offset = 0; @@ -2586,6 +2590,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0; uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0; I = MBB.erase(I); + auto InsertPos = skipDebugInstructionsForward(I, MBB.end()); if (!reserveCallFrame) { // If the stack pointer can be changed after prologue, turn the @@ -2615,7 +2620,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, if (HasDwarfEHHandlers && !isDestroy && MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences()) - BuildCFI(MBB, I, DL, + BuildCFI(MBB, InsertPos, DL, MCCFIInstruction::createGnuArgsSize(nullptr, Amount)); if (Amount == 0) @@ -2629,7 +2634,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // If this is a callee-pop calling convention, emit a CFA adjust for // the amount the callee popped. if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF)) - BuildCFI(MBB, I, DL, + BuildCFI(MBB, InsertPos, DL, MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt)); // Add Amount to SP to destroy a frame, or subtract to setup. @@ -2640,13 +2645,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // Merge with any previous or following adjustment instruction. Note: the // instructions merged with here do not have CFI, so their stack // adjustments do not feed into CfaAdjustment. - StackAdjustment += mergeSPUpdates(MBB, I, true); - StackAdjustment += mergeSPUpdates(MBB, I, false); + StackAdjustment += mergeSPUpdates(MBB, InsertPos, true); + StackAdjustment += mergeSPUpdates(MBB, InsertPos, false); if (StackAdjustment) { if (!(Fn->optForMinSize() && - adjustStackWithPops(MBB, I, DL, StackAdjustment))) - BuildStackAdjustment(MBB, I, DL, StackAdjustment, + adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment))) + BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment, /*InEpilogue=*/false); } } @@ -2662,8 +2667,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // TODO: When not using precise CFA, we also need to adjust for the // InternalAmt here. if (CfaAdjustment) { - BuildCFI(MBB, I, DL, MCCFIInstruction::createAdjustCfaOffset( - nullptr, CfaAdjustment)); + BuildCFI(MBB, InsertPos, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, + CfaAdjustment)); } } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b293dfa98f82..fd2189397279 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11474,6 +11474,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, const SmallBitVector &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { + SmallVector<int, 4> WidenedMask; + if (!canWidenShuffleElements(Mask, WidenedMask)) + return SDValue(); + // TODO: If minimizing size and one of the inputs is a zero vector and the // the zero vector has only one use, we could use a VPERM2X128 to save the // instruction bytes needed to explicitly generate the zero vector. @@ -11521,15 +11525,10 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, // [6] - ignore // [7] - zero high half of destination - int MaskLO = Mask[0]; - if (MaskLO == SM_SentinelUndef) - MaskLO = Mask[1] == SM_SentinelUndef ? 0 : Mask[1]; - - int MaskHI = Mask[2]; - if (MaskHI == SM_SentinelUndef) - MaskHI = Mask[3] == SM_SentinelUndef ? 0 : Mask[3]; + int MaskLO = WidenedMask[0] < 0 ? 0 : WidenedMask[0]; + int MaskHI = WidenedMask[1] < 0 ? 0 : WidenedMask[1]; - unsigned PermMask = MaskLO / 2 | (MaskHI / 2) << 4; + unsigned PermMask = MaskLO | (MaskHI << 4); // If either input is a zero vector, replace it with an undef input. // Shuffle mask values < 4 are selecting elements of V1. @@ -11538,16 +11537,16 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, // selecting the zero vector and setting the zero mask bit. if (IsV1Zero) { V1 = DAG.getUNDEF(VT); - if (MaskLO < 4) + if (MaskLO < 2) PermMask = (PermMask & 0xf0) | 0x08; - if (MaskHI < 4) + if (MaskHI < 2) PermMask = (PermMask & 0x0f) | 0x80; } if (IsV2Zero) { V2 = DAG.getUNDEF(VT); - if (MaskLO >= 4) + if (MaskLO >= 2) PermMask = (PermMask & 0xf0) | 0x08; - if (MaskHI >= 4) + if (MaskHI >= 2) PermMask = (PermMask & 0x0f) | 0x80; } @@ -12012,11 +12011,9 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!"); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); - SmallVector<int, 4> WidenedMask; - if (canWidenShuffleElements(Mask, WidenedMask)) - if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask, - Zeroable, Subtarget, DAG)) - return V; + if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) + return V; if (V2.isUndef()) { // Check for being able to broadcast a single element. @@ -12107,11 +12104,9 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!"); - SmallVector<int, 4> WidenedMask; - if (canWidenShuffleElements(Mask, WidenedMask)) - if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask, - Zeroable, Subtarget, DAG)) - return V; + if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) + return V; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) @@ -12605,33 +12600,72 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT, if (!canWidenShuffleElements(Mask, WidenedMask)) return SDValue(); + // Check for patterns which can be matched with a single insert of a 256-bit + // subvector. + bool OnlyUsesV1 = isShuffleEquivalent(V1, V2, Mask, + {0, 1, 2, 3, 0, 1, 2, 3}); + if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask, + {0, 1, 2, 3, 8, 9, 10, 11})) { + MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4); + SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, + DAG.getIntPtrConstant(0, DL)); + SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, + OnlyUsesV1 ? V1 : V2, + DAG.getIntPtrConstant(0, DL)); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV); + } + + assert(WidenedMask.size() == 4); + + // See if this is an insertion of the lower 128-bits of V2 into V1. + bool IsInsert = true; + int V2Index = -1; + for (int i = 0; i < 4; ++i) { + assert(WidenedMask[i] >= -1); + if (WidenedMask[i] < 0) + continue; + + // Make sure all V1 subvectors are in place. + if (WidenedMask[i] < 4) { + if (WidenedMask[i] != i) { + IsInsert = false; + break; + } + } else { + // Make sure we only have a single V2 index and its the lowest 128-bits. + if (V2Index >= 0 || WidenedMask[i] != 4) { + IsInsert = false; + break; + } + V2Index = i; + } + } + if (IsInsert && V2Index >= 0) { + MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); + SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2, + DAG.getIntPtrConstant(0, DL)); + return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL); + } + + // Try to lower to to vshuf64x2/vshuf32x4. SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)}; + unsigned PermMask = 0; // Insure elements came from the same Op. - int MaxOp1Index = VT.getVectorNumElements()/2 - 1; - for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) { - if (WidenedMask[i] == SM_SentinelZero) - return SDValue(); - if (WidenedMask[i] == SM_SentinelUndef) + for (int i = 0; i < 4; ++i) { + assert(WidenedMask[i] >= -1); + if (WidenedMask[i] < 0) continue; - SDValue Op = WidenedMask[i] > MaxOp1Index ? V2 : V1; - unsigned OpIndex = (i < Size/2) ? 0 : 1; + SDValue Op = WidenedMask[i] >= 4 ? V2 : V1; + unsigned OpIndex = i / 2; if (Ops[OpIndex].isUndef()) Ops[OpIndex] = Op; else if (Ops[OpIndex] != Op) return SDValue(); - } - // Form a 128-bit permutation. - // Convert the 64-bit shuffle mask selection values into 128-bit selection - // bits defined by a vshuf64x2 instruction's immediate control byte. - unsigned PermMask = 0, Imm = 0; - unsigned ControlBitsNum = WidenedMask.size() / 2; - - for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) { - // Use first element in place of undef mask. - Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i]; - PermMask |= (Imm % WidenedMask.size()) << (i * ControlBitsNum); + // Convert the 128-bit shuffle mask selection values into 128-bit selection + // bits defined by a vshuf64x2 instruction's immediate control byte. + PermMask |= (WidenedMask[i] % 4) << (i * 2); } return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1], @@ -13051,10 +13085,10 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask, static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) { int NumElements = Mask.size(); - int NumV1Elements = 0, NumV2Elements = 0, NumSentinelElements = 0; + int NumV1Elements = 0, NumV2Elements = 0; for (int M : Mask) if (M < 0) - ++NumSentinelElements; + continue; else if (M < NumElements) ++NumV1Elements; else @@ -18660,8 +18694,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_3OP_IMM8_MASK: - case INTR_TYPE_3OP_MASK: - case INSERT_SUBVEC: { + case INTR_TYPE_3OP_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); @@ -18670,13 +18703,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK) Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3); - else if (IntrData->Type == INSERT_SUBVEC) { - // imm should be adapted to ISD::INSERT_SUBVECTOR behavior - assert(isa<ConstantSDNode>(Src3) && "Expected a ConstantSDNode here!"); - unsigned Imm = cast<ConstantSDNode>(Src3)->getZExtValue(); - Imm *= Src2.getSimpleValueType().getVectorNumElements(); - Src3 = DAG.getTargetConstant(Imm, dl, MVT::i32); - } // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, @@ -28693,6 +28719,29 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG, return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1), Op.getOperand(2)); } + case ISD::INSERT_SUBVECTOR: { + unsigned EltSize = EltVT.getSizeInBits(); + if (EltSize != 32 && EltSize != 64) + return false; + MVT OpEltVT = Op.getSimpleValueType().getVectorElementType(); + // Only change element size, not type. + if (VT.isInteger() != OpEltVT.isInteger()) + return false; + uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); + Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize; + SDValue Op0 = DAG.getBitcast(VT, Op.getOperand(0)); + DCI.AddToWorklist(Op0.getNode()); + // Op1 needs to be bitcasted to a smaller vector with the same element type. + SDValue Op1 = Op.getOperand(1); + MVT Op1VT = MVT::getVectorVT(EltVT, + Op1.getSimpleValueType().getSizeInBits() / EltSize); + Op1 = DAG.getBitcast(Op1VT, Op1); + DCI.AddToWorklist(Op1.getNode()); + DCI.CombineTo(OrigOp.getNode(), + DAG.getNode(Opcode, DL, VT, Op0, Op1, + DAG.getConstant(Imm, DL, MVT::i8))); + return true; + } } return false; @@ -31784,6 +31833,83 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// Attempt to pre-truncate inputs to arithmetic ops if it will simplify +/// the codegen. +/// e.g. TRUNC( BINOP( X, Y ) ) --> BINOP( TRUNC( X ), TRUNC( Y ) ) +static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget, + SDLoc &DL) { + assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode"); + SDValue Src = N->getOperand(0); + unsigned Opcode = Src.getOpcode(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + EVT VT = N->getValueType(0); + EVT SrcVT = Src.getValueType(); + + auto IsRepeatedOpOrOneUseConstant = [](SDValue Op0, SDValue Op1) { + // TODO: Add extra cases where we can truncate both inputs for the + // cost of one (or none). + // e.g. TRUNC( BINOP( EXT( X ), EXT( Y ) ) ) --> BINOP( X, Y ) + if (Op0 == Op1) + return true; + + SDValue BC0 = peekThroughOneUseBitcasts(Op0); + SDValue BC1 = peekThroughOneUseBitcasts(Op1); + return ISD::isBuildVectorOfConstantSDNodes(BC0.getNode()) || + ISD::isBuildVectorOfConstantSDNodes(BC1.getNode()); + }; + + auto TruncateArithmetic = [&](SDValue N0, SDValue N1) { + SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0); + SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1); + return DAG.getNode(Opcode, DL, VT, Trunc0, Trunc1); + }; + + // Don't combine if the operation has other uses. + if (!N->isOnlyUserOf(Src.getNode())) + return SDValue(); + + // Only support vector truncation for now. + // TODO: i64 scalar math would benefit as well. + if (!VT.isVector()) + return SDValue(); + + // In most cases its only worth pre-truncating if we're only facing the cost + // of one truncation. + // i.e. if one of the inputs will constant fold or the input is repeated. + switch (Opcode) { + case ISD::AND: + case ISD::XOR: + case ISD::OR: { + SDValue Op0 = Src.getOperand(0); + SDValue Op1 = Src.getOperand(1); + if (TLI.isOperationLegalOrPromote(Opcode, VT) && + IsRepeatedOpOrOneUseConstant(Op0, Op1)) + return TruncateArithmetic(Op0, Op1); + break; + } + + case ISD::MUL: + // X86 is rubbish at scalar and vector i64 multiplies (until AVX512DQ) - its + // better to truncate if we have the chance. + if (SrcVT.getScalarType() == MVT::i64 && TLI.isOperationLegal(Opcode, VT) && + !TLI.isOperationLegal(Opcode, SrcVT)) + return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1)); + LLVM_FALLTHROUGH; + case ISD::ADD: { + SDValue Op0 = Src.getOperand(0); + SDValue Op1 = Src.getOperand(1); + if (TLI.isOperationLegal(Opcode, VT) && + IsRepeatedOpOrOneUseConstant(Op0, Op1)) + return TruncateArithmetic(Op0, Op1); + break; + } + } + + return SDValue(); +} + /// Truncate a group of v4i32 into v16i8/v8i16 using X86ISD::PACKUS. static SDValue combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG, @@ -31970,6 +32096,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, SDValue Src = N->getOperand(0); SDLoc DL(N); + // Attempt to pre-truncate inputs to arithmetic ops instead. + if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL)) + return V; + // Try to detect AVG pattern first. if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) return Avg; diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index da7437ea0ccb..908053e1342d 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -650,33 +650,6 @@ multiclass vextract_for_size<int Opcode, From.ZSuffix # "rrkz") To.KRCWM:$mask, From.RC:$src1, (EXTRACT_get_vextract_imm To.RC:$ext))>; - - // Intrinsic call with masking. - def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName # - "x" # To.NumElts # "_" # From.Size) - From.RC:$src1, (iPTR imm:$idx), To.RC:$src0, To.MRC:$mask), - (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts # - From.ZSuffix # "rrk") - To.RC:$src0, - (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM), - From.RC:$src1, imm:$idx)>; - - // Intrinsic call with zero-masking. - def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName # - "x" # To.NumElts # "_" # From.Size) - From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, To.MRC:$mask), - (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts # - From.ZSuffix # "rrkz") - (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM), - From.RC:$src1, imm:$idx)>; - - // Intrinsic call without masking. - def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName # - "x" # To.NumElts # "_" # From.Size) - From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)), - (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts # - From.ZSuffix # "rr") - From.RC:$src1, imm:$idx)>; } // Codegen pattern for the alternative types @@ -6871,18 +6844,18 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in { VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } let isCodeGenOnly = 1 in { - defm Int_VUCOMISSZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v4f32, f128mem, - load, "ucomiss">, PS, EVEX, VEX_LIG, + defm Int_VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem, + sse_load_f32, "ucomiss">, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; - defm Int_VUCOMISDZ : sse12_ord_cmp<0x2E, VR128X, X86ucomi, v2f64, f128mem, - load, "ucomisd">, PD, EVEX, + defm Int_VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem, + sse_load_f64, "ucomisd">, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; - defm Int_VCOMISSZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v4f32, f128mem, - load, "comiss">, PS, EVEX, VEX_LIG, + defm Int_VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem, + sse_load_f32, "comiss">, PS, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>; - defm Int_VCOMISDZ : sse12_ord_cmp<0x2F, VR128X, X86comi, v2f64, f128mem, - load, "comisd">, PD, EVEX, + defm Int_VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem, + sse_load_f64, "comisd">, PD, EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>; } } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 9d6a89363044..4cd6ae563f03 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2373,6 +2373,23 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, Sched<[WriteFAddLd, ReadAfterLd]>; } +// sse12_ord_cmp_int - Intrinsic version of sse12_ord_cmp +multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode, + ValueType vt, Operand memop, + ComplexPattern mem_cpat, string OpcodeStr> { + def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], + IIC_SSE_COMIS_RR>, + Sched<[WriteFAdd]>; + def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, memop:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (OpNode (vt RC:$src1), + mem_cpat:$src2))], + IIC_SSE_COMIS_RM>, + Sched<[WriteFAddLd, ReadAfterLd]>; +} + let Defs = [EFLAGS] in { defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, "ucomiss">, PS, VEX, VEX_LIG; @@ -2386,15 +2403,15 @@ let Defs = [EFLAGS] in { } let isCodeGenOnly = 1 in { - defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, - load, "ucomiss">, PS, VEX; - defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, - load, "ucomisd">, PD, VEX; - - defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, - load, "comiss">, PS, VEX; - defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, - load, "comisd">, PD, VEX; + defm Int_VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, + sse_load_f32, "ucomiss">, PS, VEX; + defm Int_VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, + sse_load_f64, "ucomisd">, PD, VEX; + + defm Int_VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, + sse_load_f32, "comiss">, PS, VEX; + defm Int_VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, + sse_load_f64, "comisd">, PD, VEX; } defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, "ucomiss">, PS; @@ -2409,15 +2426,15 @@ let Defs = [EFLAGS] in { } let isCodeGenOnly = 1 in { - defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, - load, "ucomiss">, PS; - defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, - load, "ucomisd">, PD; - - defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, - "comiss">, PS; - defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load, - "comisd">, PD; + defm Int_UCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem, + sse_load_f32, "ucomiss">, PS; + defm Int_UCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem, + sse_load_f64, "ucomisd">, PD; + + defm Int_COMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem, + sse_load_f32, "comiss">, PS; + defm Int_COMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem, + sse_load_f64, "comisd">, PD; } } // Defs = [EFLAGS] diff --git a/lib/Target/X86/X86InstrTablesInfo.h b/lib/Target/X86/X86InstrTablesInfo.h index 5d2af829028a..415a891bfd97 100755 --- a/lib/Target/X86/X86InstrTablesInfo.h +++ b/lib/Target/X86/X86InstrTablesInfo.h @@ -1,4 +1,4 @@ -//===-- X86AVX512Info.h - X86 Instruction Tables Information ----*- C++ -*-===// +//===-- X86InstrTablesInfo.h - X86 Instruction Tables -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -25,8 +25,7 @@ struct X86EvexToVexCompressTableEntry { // X86 EVEX encoded instructions that have a VEX 128 encoding // (table format: <EVEX opcode, VEX-128 opcode>). -static const X86EvexToVexCompressTableEntry - X86EvexToVex128CompressTable[] = { +static const X86EvexToVexCompressTableEntry X86EvexToVex128CompressTable[] = { // EVEX scalar with corresponding VEX. { X86::Int_VCOMISDZrm , X86::Int_VCOMISDrm }, { X86::Int_VCOMISDZrr , X86::Int_VCOMISDrr }, @@ -250,20 +249,20 @@ static const X86EvexToVexCompressTableEntry { X86::VUCOMISDZrr , X86::VUCOMISDrr }, { X86::VUCOMISSZrm , X86::VUCOMISSrm }, { X86::VUCOMISSZrr , X86::VUCOMISSrr }, - + { X86::VMOV64toPQIZrr , X86::VMOV64toPQIrr }, { X86::VMOV64toSDZrr , X86::VMOV64toSDrr }, { X86::VMOVDI2PDIZrm , X86::VMOVDI2PDIrm }, { X86::VMOVDI2PDIZrr , X86::VMOVDI2PDIrr }, { X86::VMOVLHPSZrr , X86::VMOVLHPSrr }, - { X86::VMOVHLPSZrr , X86::VMOVHLPSrr }, + { X86::VMOVHLPSZrr , X86::VMOVHLPSrr }, { X86::VMOVPDI2DIZmr , X86::VMOVPDI2DImr }, { X86::VMOVPDI2DIZrr , X86::VMOVPDI2DIrr }, { X86::VMOVPQI2QIZmr , X86::VMOVPQI2QImr }, { X86::VMOVPQIto64Zrr , X86::VMOVPQIto64rr }, { X86::VMOVQI2PQIZrm , X86::VMOVQI2PQIrm }, { X86::VMOVZPQILo2PQIZrr , X86::VMOVZPQILo2PQIrr }, - + { X86::VPEXTRBZmr , X86::VPEXTRBmr }, { X86::VPEXTRBZrr , X86::VPEXTRBrr }, { X86::VPEXTRDZmr , X86::VPEXTRDmr }, @@ -272,7 +271,7 @@ static const X86EvexToVexCompressTableEntry { X86::VPEXTRQZrr , X86::VPEXTRQrr }, { X86::VPEXTRWZmr , X86::VPEXTRWmr }, { X86::VPEXTRWZrr , X86::VPEXTRWri }, - + { X86::VPINSRBZrm , X86::VPINSRBrm }, { X86::VPINSRBZrr , X86::VPINSRBrr }, { X86::VPINSRDZrm , X86::VPINSRDrm }, @@ -294,7 +293,7 @@ static const X86EvexToVexCompressTableEntry { X86::VANDPDZ128rm , X86::VANDPDrm }, { X86::VANDPDZ128rr , X86::VANDPDrr }, { X86::VANDPSZ128rm , X86::VANDPSrm }, - { X86::VANDPSZ128rr , X86::VANDPSrr }, + { X86::VANDPSZ128rr , X86::VANDPSrr }, { X86::VBROADCASTSSZ128m , X86::VBROADCASTSSrm }, { X86::VBROADCASTSSZ128r , X86::VBROADCASTSSrr }, { X86::VBROADCASTSSZ128r_s , X86::VBROADCASTSSrr }, @@ -414,8 +413,8 @@ static const X86EvexToVexCompressTableEntry { X86::VMOVAPDZ128rm , X86::VMOVAPDrm }, { X86::VMOVAPDZ128rr , X86::VMOVAPDrr }, { X86::VMOVAPDZ128rr_REV , X86::VMOVAPDrr_REV }, - { X86::VMOVAPSZ128mr , X86::VMOVAPSmr }, - { X86::VMOVAPSZ128rm , X86::VMOVAPSrm }, + { X86::VMOVAPSZ128mr , X86::VMOVAPSmr }, + { X86::VMOVAPSZ128rm , X86::VMOVAPSrm }, { X86::VMOVAPSZ128rr , X86::VMOVAPSrr }, { X86::VMOVAPSZ128rr_REV , X86::VMOVAPSrr_REV }, { X86::VMOVDDUPZ128rm , X86::VMOVDDUPrm }, @@ -464,8 +463,8 @@ static const X86EvexToVexCompressTableEntry { X86::VMOVUPDZ128rm , X86::VMOVUPDrm }, { X86::VMOVUPDZ128rr , X86::VMOVUPDrr }, { X86::VMOVUPDZ128rr_REV , X86::VMOVUPDrr_REV }, - { X86::VMOVUPSZ128mr , X86::VMOVUPSmr }, - { X86::VMOVUPSZ128rm , X86::VMOVUPSrm }, + { X86::VMOVUPSZ128mr , X86::VMOVUPSmr }, + { X86::VMOVUPSZ128rm , X86::VMOVUPSrm }, { X86::VMOVUPSZ128rr , X86::VMOVUPSrr }, { X86::VMOVUPSZ128rr_REV , X86::VMOVUPSrr_REV }, { X86::VMULPDZ128rm , X86::VMULPDrm }, @@ -520,9 +519,9 @@ static const X86EvexToVexCompressTableEntry { X86::VPBROADCASTBZ128r , X86::VPBROADCASTBrr }, { X86::VPBROADCASTDZ128m , X86::VPBROADCASTDrm }, { X86::VPBROADCASTDZ128r , X86::VPBROADCASTDrr }, - { X86::VPBROADCASTQZ128m , X86::VPBROADCASTQrm }, - { X86::VPBROADCASTQZ128r , X86::VPBROADCASTQrr }, - { X86::VPBROADCASTWZ128m , X86::VPBROADCASTWrm }, + { X86::VPBROADCASTQZ128m , X86::VPBROADCASTQrm }, + { X86::VPBROADCASTQZ128r , X86::VPBROADCASTQrr }, + { X86::VPBROADCASTWZ128m , X86::VPBROADCASTWrm }, { X86::VPBROADCASTWZ128r , X86::VPBROADCASTWrr }, { X86::VPERMILPDZ128mi , X86::VPERMILPDmi }, { X86::VPERMILPDZ128ri , X86::VPERMILPDri }, @@ -583,7 +582,7 @@ static const X86EvexToVexCompressTableEntry { X86::VPMOVZXWDZ128rm , X86::VPMOVZXWDrm }, { X86::VPMOVZXWDZ128rr , X86::VPMOVZXWDrr }, { X86::VPMOVZXWQZ128rm , X86::VPMOVZXWQrm }, - { X86::VPMOVZXWQZ128rr , X86::VPMOVZXWQrr }, + { X86::VPMOVZXWQZ128rr , X86::VPMOVZXWQrr }, { X86::VPMULDQZ128rm , X86::VPMULDQrm }, { X86::VPMULDQZ128rr , X86::VPMULDQrr }, { X86::VPMULHRSWZ128rm , X86::VPMULHRSWrm }, @@ -612,10 +611,10 @@ static const X86EvexToVexCompressTableEntry { X86::VPSHUFHWZ128ri , X86::VPSHUFHWri }, { X86::VPSHUFLWZ128mi , X86::VPSHUFLWmi }, { X86::VPSHUFLWZ128ri , X86::VPSHUFLWri }, - { X86::VPSLLDQZ128rr , X86::VPSLLDQri }, + { X86::VPSLLDQZ128rr , X86::VPSLLDQri }, { X86::VPSLLDZ128ri , X86::VPSLLDri }, { X86::VPSLLDZ128rm , X86::VPSLLDrm }, - { X86::VPSLLDZ128rr , X86::VPSLLDrr }, + { X86::VPSLLDZ128rr , X86::VPSLLDrr }, { X86::VPSLLQZ128ri , X86::VPSLLQri }, { X86::VPSLLQZ128rm , X86::VPSLLQrm }, { X86::VPSLLQZ128rr , X86::VPSLLQrr }, @@ -713,8 +712,7 @@ static const X86EvexToVexCompressTableEntry // X86 EVEX encoded instructions that have a VEX 256 encoding // (table format: <EVEX opcode, VEX-256 opcode>). - static const X86EvexToVexCompressTableEntry - X86EvexToVex256CompressTable[] = { + static const X86EvexToVexCompressTableEntry X86EvexToVex256CompressTable[] = { { X86::VADDPDZ256rm , X86::VADDPDYrm }, { X86::VADDPDZ256rr , X86::VADDPDYrr }, { X86::VADDPSZ256rm , X86::VADDPSYrm }, @@ -727,11 +725,11 @@ static const X86EvexToVexCompressTableEntry { X86::VANDPDZ256rr , X86::VANDPDYrr }, { X86::VANDPSZ256rm , X86::VANDPSYrm }, { X86::VANDPSZ256rr , X86::VANDPSYrr }, - { X86::VBROADCASTSDZ256m , X86::VBROADCASTSDYrm }, - { X86::VBROADCASTSDZ256r , X86::VBROADCASTSDYrr }, - { X86::VBROADCASTSDZ256r_s , X86::VBROADCASTSDYrr }, + { X86::VBROADCASTSDZ256m , X86::VBROADCASTSDYrm }, + { X86::VBROADCASTSDZ256r , X86::VBROADCASTSDYrr }, + { X86::VBROADCASTSDZ256r_s , X86::VBROADCASTSDYrr }, { X86::VBROADCASTSSZ256m , X86::VBROADCASTSSYrm }, - { X86::VBROADCASTSSZ256r , X86::VBROADCASTSSYrr }, + { X86::VBROADCASTSSZ256r , X86::VBROADCASTSSYrr }, { X86::VBROADCASTSSZ256r_s , X86::VBROADCASTSSYrr }, { X86::VCVTDQ2PDZ256rm , X86::VCVTDQ2PDYrm }, { X86::VCVTDQ2PDZ256rr , X86::VCVTDQ2PDYrr }, @@ -757,6 +755,14 @@ static const X86EvexToVexCompressTableEntry { X86::VDIVPDZ256rr , X86::VDIVPDYrr }, { X86::VDIVPSZ256rm , X86::VDIVPSYrm }, { X86::VDIVPSZ256rr , X86::VDIVPSYrr }, + { X86::VEXTRACTF32x4Z256mr , X86::VEXTRACTF128mr }, + { X86::VEXTRACTF64x2Z256mr , X86::VEXTRACTF128mr }, + { X86::VEXTRACTF32x4Z256rr , X86::VEXTRACTF128rr }, + { X86::VEXTRACTF64x2Z256rr , X86::VEXTRACTF128rr }, + { X86::VEXTRACTI32x4Z256mr , X86::VEXTRACTI128mr }, + { X86::VEXTRACTI64x2Z256mr , X86::VEXTRACTI128mr }, + { X86::VEXTRACTI32x4Z256rr , X86::VEXTRACTI128rr }, + { X86::VEXTRACTI64x2Z256rr , X86::VEXTRACTI128rr }, { X86::VFMADD132PDZ256m , X86::VFMADD132PDYm }, { X86::VFMADD132PDZ256r , X86::VFMADD132PDYr }, { X86::VFMADD132PSZ256m , X86::VFMADD132PSYm }, @@ -829,6 +835,14 @@ static const X86EvexToVexCompressTableEntry { X86::VFNMSUB231PDZ256r , X86::VFNMSUB231PDYr }, { X86::VFNMSUB231PSZ256m , X86::VFNMSUB231PSYm }, { X86::VFNMSUB231PSZ256r , X86::VFNMSUB231PSYr }, + { X86::VINSERTF32x4Z256rm , X86::VINSERTF128rm }, + { X86::VINSERTF64x2Z256rm , X86::VINSERTF128rm }, + { X86::VINSERTF32x4Z256rr , X86::VINSERTF128rr }, + { X86::VINSERTF64x2Z256rr , X86::VINSERTF128rr }, + { X86::VINSERTI32x4Z256rm , X86::VINSERTI128rm }, + { X86::VINSERTI64x2Z256rm , X86::VINSERTI128rm }, + { X86::VINSERTI32x4Z256rr , X86::VINSERTI128rr }, + { X86::VINSERTI64x2Z256rr , X86::VINSERTI128rr }, { X86::VMAXCPDZ256rm , X86::VMAXCPDYrm }, { X86::VMAXCPDZ256rr , X86::VMAXCPDYrr }, { X86::VMAXCPSZ256rm , X86::VMAXCPSYrm }, @@ -849,8 +863,8 @@ static const X86EvexToVexCompressTableEntry { X86::VMOVAPDZ256rm , X86::VMOVAPDYrm }, { X86::VMOVAPDZ256rr , X86::VMOVAPDYrr }, { X86::VMOVAPDZ256rr_REV , X86::VMOVAPDYrr_REV }, - { X86::VMOVAPSZ256mr , X86::VMOVAPSYmr }, - { X86::VMOVAPSZ256rm , X86::VMOVAPSYrm }, + { X86::VMOVAPSZ256mr , X86::VMOVAPSYmr }, + { X86::VMOVAPSZ256rm , X86::VMOVAPSYrm }, { X86::VMOVAPSZ256rr , X86::VMOVAPSYrr }, { X86::VMOVAPSZ256rr_REV , X86::VMOVAPSYrr_REV }, { X86::VMOVDDUPZ256rm , X86::VMOVDDUPYrm }, @@ -943,14 +957,14 @@ static const X86EvexToVexCompressTableEntry { X86::VPAVGBZ256rr , X86::VPAVGBYrr }, { X86::VPAVGWZ256rm , X86::VPAVGWYrm }, { X86::VPAVGWZ256rr , X86::VPAVGWYrr }, - { X86::VPBROADCASTBZ256m , X86::VPBROADCASTBYrm }, - { X86::VPBROADCASTBZ256r , X86::VPBROADCASTBYrr }, - { X86::VPBROADCASTDZ256m , X86::VPBROADCASTDYrm }, - { X86::VPBROADCASTDZ256r , X86::VPBROADCASTDYrr }, - { X86::VPBROADCASTQZ256m , X86::VPBROADCASTQYrm }, - { X86::VPBROADCASTQZ256r , X86::VPBROADCASTQYrr }, - { X86::VPBROADCASTWZ256m , X86::VPBROADCASTWYrm }, - { X86::VPBROADCASTWZ256r , X86::VPBROADCASTWYrr }, + { X86::VPBROADCASTBZ256m , X86::VPBROADCASTBYrm }, + { X86::VPBROADCASTBZ256r , X86::VPBROADCASTBYrr }, + { X86::VPBROADCASTDZ256m , X86::VPBROADCASTDYrm }, + { X86::VPBROADCASTDZ256r , X86::VPBROADCASTDYrr }, + { X86::VPBROADCASTQZ256m , X86::VPBROADCASTQYrm }, + { X86::VPBROADCASTQZ256r , X86::VPBROADCASTQYrr }, + { X86::VPBROADCASTWZ256m , X86::VPBROADCASTWYrm }, + { X86::VPBROADCASTWZ256r , X86::VPBROADCASTWYrr }, { X86::VPERMDZ256rm , X86::VPERMDYrm }, { X86::VPERMDZ256rr , X86::VPERMDYrr }, { X86::VPERMILPDZ256mi , X86::VPERMILPDYmi }, @@ -1050,7 +1064,7 @@ static const X86EvexToVexCompressTableEntry { X86::VPSLLDQZ256rr , X86::VPSLLDQYri }, { X86::VPSLLDZ256ri , X86::VPSLLDYri }, { X86::VPSLLDZ256rm , X86::VPSLLDYrm }, - { X86::VPSLLDZ256rr , X86::VPSLLDYrr }, + { X86::VPSLLDZ256rr , X86::VPSLLDYrr }, { X86::VPSLLQZ256ri , X86::VPSLLQYri }, { X86::VPSLLQZ256rm , X86::VPSLLQYrm }, { X86::VPSLLQZ256rr , X86::VPSLLQYrr }, @@ -1060,7 +1074,7 @@ static const X86EvexToVexCompressTableEntry { X86::VPSLLVQZ256rr , X86::VPSLLVQYrr }, { X86::VPSLLWZ256ri , X86::VPSLLWYri }, { X86::VPSLLWZ256rm , X86::VPSLLWYrm }, - { X86::VPSLLWZ256rr , X86::VPSLLWYrr }, + { X86::VPSLLWZ256rr , X86::VPSLLWYrr }, { X86::VPSRADZ256ri , X86::VPSRADYri }, { X86::VPSRADZ256rm , X86::VPSRADYrm }, { X86::VPSRADZ256rr , X86::VPSRADYrr }, @@ -1072,7 +1086,7 @@ static const X86EvexToVexCompressTableEntry { X86::VPSRLDQZ256rr , X86::VPSRLDQYri }, { X86::VPSRLDZ256ri , X86::VPSRLDYri }, { X86::VPSRLDZ256rm , X86::VPSRLDYrm }, - { X86::VPSRLDZ256rr , X86::VPSRLDYrr }, + { X86::VPSRLDZ256rr , X86::VPSRLDYrr }, { X86::VPSRLQZ256ri , X86::VPSRLQYri }, { X86::VPSRLQZ256rm , X86::VPSRLQYrm }, { X86::VPSRLQZ256rr , X86::VPSRLQYrr }, @@ -1145,4 +1159,4 @@ static const X86EvexToVexCompressTableEntry { X86::VXORPSZ256rr , X86::VXORPSYrr }, }; -#endif
\ No newline at end of file +#endif diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index df47b4ad583d..63a02af02faa 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -34,7 +34,7 @@ enum IntrinsicType : uint16_t { INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, BRCST32x2_TO_VEC, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, - EXPAND_FROM_MEM, INSERT_SUBVEC, + EXPAND_FROM_MEM, TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS, FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK }; @@ -795,30 +795,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VGETMANTS, 0), X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM, X86ISD::VGETMANTS, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf32x4_256, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf32x4_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf32x8_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf64x2_256, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf64x2_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_insertf64x4_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti32x4_256, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti32x4_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti32x8_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti64x2_256, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti64x2_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), - X86_INTRINSIC_DATA(avx512_mask_inserti64x4_512, INSERT_SUBVEC, - ISD::INSERT_SUBVECTOR, 0), X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK, ISD::CTLZ, 0), X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK, diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 2f69df064e7f..a38a4b30b77d 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -1115,56 +1115,6 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLo OutStreamer->EmitInstruction(TC, getSubtargetInfo()); } -void X86AsmPrinter::EmitXRayTable() { - if (Sleds.empty()) - return; - - auto PrevSection = OutStreamer->getCurrentSectionOnly(); - auto Fn = MF->getFunction(); - MCSection *Section = nullptr; - if (Subtarget->isTargetELF()) { - if (Fn->hasComdat()) { - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, - Fn->getComdat()->getName()); - } else { - Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, - ELF::SHF_ALLOC); - } - } else if (Subtarget->isTargetMachO()) { - Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, - SectionKind::getReadOnlyWithRel()); - } else { - llvm_unreachable("Unsupported target"); - } - - // Before we switch over, we force a reference to a label inside the - // xray_instr_map section. Since EmitXRayTable() is always called just - // before the function's end, we assume that this is happening after the - // last return instruction. - // - // We then align the reference to 16 byte boundaries, which we determined - // experimentally to be beneficial to avoid causing decoder stalls. - MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true); - OutStreamer->EmitCodeAlignment(16); - OutStreamer->EmitSymbolValue(Tmp, 8, false); - OutStreamer->SwitchSection(Section); - OutStreamer->EmitLabel(Tmp); - for (const auto &Sled : Sleds) { - OutStreamer->EmitSymbolValue(Sled.Sled, 8); - OutStreamer->EmitSymbolValue(CurrentFnSym, 8); - auto Kind = static_cast<uint8_t>(Sled.Kind); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast<const char *>(&Kind), 1)); - OutStreamer->EmitBytes( - StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1)); - OutStreamer->EmitZeros(14); - } - OutStreamer->SwitchSection(PrevSection); - - Sleds.clear(); -} - // Returns instruction preceding MBBI in MachineFunction. // If MBBI is the first instruction of the first basic block, returns null. static MachineBasicBlock::const_iterator diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 2b0e672d56f2..d7792e296a58 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -598,198 +598,136 @@ int X86TTIImpl::getArithmeticInstrCost( int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { - - if (Kind == TTI::SK_Reverse) { + if (Kind == TTI::SK_Reverse || Kind == TTI::SK_Alternate) { + // 64-bit packed float vectors (v2f32) are widened to type v4f32. + // 64-bit packed integer vectors (v2i32) are promoted to type v2i64. std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); static const CostTblEntry AVX512VBMIShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v64i8, 1 }, // vpermb - { ISD::VECTOR_SHUFFLE, MVT::v32i8, 1 } // vpermb + { TTI::SK_Reverse, MVT::v64i8, 1 }, // vpermb + { TTI::SK_Reverse, MVT::v32i8, 1 } // vpermb }; if (ST->hasVBMI()) - if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) + if (const auto *Entry = + CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; static const CostTblEntry AVX512BWShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v32i16, 1 }, // vpermw - { ISD::VECTOR_SHUFFLE, MVT::v16i16, 1 }, // vpermw - { ISD::VECTOR_SHUFFLE, MVT::v64i8, 6 } // vextracti64x4 + 2*vperm2i128 - // + 2*pshufb + vinserti64x4 + { TTI::SK_Reverse, MVT::v32i16, 1 }, // vpermw + { TTI::SK_Reverse, MVT::v16i16, 1 }, // vpermw + { TTI::SK_Reverse, MVT::v64i8, 6 } // vextracti64x4 + 2*vperm2i128 + // + 2*pshufb + vinserti64x4 }; if (ST->hasBWI()) - if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) + if (const auto *Entry = + CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; static const CostTblEntry AVX512ShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v8f64, 1 }, // vpermpd - { ISD::VECTOR_SHUFFLE, MVT::v16f32, 1 }, // vpermps - { ISD::VECTOR_SHUFFLE, MVT::v8i64, 1 }, // vpermq - { ISD::VECTOR_SHUFFLE, MVT::v16i32, 1 }, // vpermd + { TTI::SK_Reverse, MVT::v8f64, 1 }, // vpermpd + { TTI::SK_Reverse, MVT::v16f32, 1 }, // vpermps + { TTI::SK_Reverse, MVT::v8i64, 1 }, // vpermq + { TTI::SK_Reverse, MVT::v16i32, 1 }, // vpermd }; if (ST->hasAVX512()) if (const auto *Entry = - CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) + CostTableLookup(AVX512ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; static const CostTblEntry AVX2ShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v4f64, 1 }, // vpermpd - { ISD::VECTOR_SHUFFLE, MVT::v8f32, 1 }, // vpermps - { ISD::VECTOR_SHUFFLE, MVT::v4i64, 1 }, // vpermq - { ISD::VECTOR_SHUFFLE, MVT::v8i32, 1 }, // vpermd - { ISD::VECTOR_SHUFFLE, MVT::v16i16, 2 }, // vperm2i128 + pshufb - { ISD::VECTOR_SHUFFLE, MVT::v32i8, 2 } // vperm2i128 + pshufb + { TTI::SK_Reverse, MVT::v4f64, 1 }, // vpermpd + { TTI::SK_Reverse, MVT::v8f32, 1 }, // vpermps + { TTI::SK_Reverse, MVT::v4i64, 1 }, // vpermq + { TTI::SK_Reverse, MVT::v8i32, 1 }, // vpermd + { TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb + { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb + + { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw + { TTI::SK_Alternate, MVT::v32i8, 1 } // vpblendvb }; if (ST->hasAVX2()) - if (const auto *Entry = - CostTableLookup(AVX2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) + if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; static const CostTblEntry AVX1ShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd - { ISD::VECTOR_SHUFFLE, MVT::v8f32, 2 }, // vperm2f128 + vpermilps - { ISD::VECTOR_SHUFFLE, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd - { ISD::VECTOR_SHUFFLE, MVT::v8i32, 2 }, // vperm2f128 + vpermilps - { ISD::VECTOR_SHUFFLE, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb - // + vinsertf128 - { ISD::VECTOR_SHUFFLE, MVT::v32i8, 4 } // vextractf128 + 2*pshufb - // + vinsertf128 + { TTI::SK_Reverse, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd + { TTI::SK_Reverse, MVT::v8f32, 2 }, // vperm2f128 + vpermilps + { TTI::SK_Reverse, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd + { TTI::SK_Reverse, MVT::v8i32, 2 }, // vperm2f128 + vpermilps + { TTI::SK_Reverse, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb + // + vinsertf128 + { TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb + // + vinsertf128 + + { TTI::SK_Alternate, MVT::v4i64, 1 }, // vblendpd + { TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd + { TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps + { TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps + { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor + { TTI::SK_Alternate, MVT::v32i8, 3 } // vpand + vpandn + vpor }; if (ST->hasAVX()) - if (const auto *Entry = - CostTableLookup(AVX1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) + if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second)) + return LT.first * Entry->Cost; + + static const CostTblEntry SSE41ShuffleTbl[] = { + { TTI::SK_Alternate, MVT::v2i64, 1 }, // pblendw + { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd + { TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw + { TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps + { TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw + { TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb + }; + + if (ST->hasSSE41()) + if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; static const CostTblEntry SSSE3ShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v8i16, 1 }, // pshufb - { ISD::VECTOR_SHUFFLE, MVT::v16i8, 1 } // pshufb + { TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb + { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb + + { TTI::SK_Alternate, MVT::v8i16, 3 }, // pshufb + pshufb + por + { TTI::SK_Alternate, MVT::v16i8, 3 } // pshufb + pshufb + por }; if (ST->hasSSSE3()) - if (const auto *Entry = - CostTableLookup(SSSE3ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) + if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; static const CostTblEntry SSE2ShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 }, // shufpd - { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 }, // pshufd - { ISD::VECTOR_SHUFFLE, MVT::v4i32, 1 }, // pshufd - { ISD::VECTOR_SHUFFLE, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd - { ISD::VECTOR_SHUFFLE, MVT::v16i8, 9 } // 2*pshuflw + 2*pshufhw - // + 2*pshufd + 2*unpck + packus + { TTI::SK_Reverse, MVT::v2f64, 1 }, // shufpd + { TTI::SK_Reverse, MVT::v2i64, 1 }, // pshufd + { TTI::SK_Reverse, MVT::v4i32, 1 }, // pshufd + { TTI::SK_Reverse, MVT::v8i16, 3 }, // pshuflw + pshufhw + pshufd + { TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw + // + 2*pshufd + 2*unpck + packus + + { TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd + { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd + { TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps + { TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por + { TTI::SK_Alternate, MVT::v16i8, 3 } // pand + pandn + por }; if (ST->hasSSE2()) - if (const auto *Entry = - CostTableLookup(SSE2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) + if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; static const CostTblEntry SSE1ShuffleTbl[] = { - { ISD::VECTOR_SHUFFLE, MVT::v4f32, 1 }, // shufps + { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps + { TTI::SK_Alternate, MVT::v4f32, 2 } // 2*shufps }; if (ST->hasSSE1()) - if (const auto *Entry = - CostTableLookup(SSE1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) + if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; - } else if (Kind == TTI::SK_Alternate) { - // 64-bit packed float vectors (v2f32) are widened to type v4f32. - // 64-bit packed integer vectors (v2i32) are promoted to type v2i64. - std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); - - // The backend knows how to generate a single VEX.256 version of - // instruction VPBLENDW if the target supports AVX2. - if (ST->hasAVX2() && LT.second == MVT::v16i16) - return LT.first; - - static const CostTblEntry AVXAltShuffleTbl[] = { - {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vblendpd - {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vblendpd - - {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vblendps - {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vblendps - - // This shuffle is custom lowered into a sequence of: - // 2x vextractf128 , 2x vpblendw , 1x vinsertf128 - {ISD::VECTOR_SHUFFLE, MVT::v16i16, 5}, - - // This shuffle is custom lowered into a long sequence of: - // 2x vextractf128 , 4x vpshufb , 2x vpor , 1x vinsertf128 - {ISD::VECTOR_SHUFFLE, MVT::v32i8, 9} - }; - - if (ST->hasAVX()) - if (const auto *Entry = CostTableLookup(AVXAltShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - - static const CostTblEntry SSE41AltShuffleTbl[] = { - // These are lowered into movsd. - {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, - - // packed float vectors with four elements are lowered into BLENDI dag - // nodes. A v4i32/v4f32 BLENDI generates a single 'blendps'/'blendpd'. - {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, - {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, - - // This shuffle generates a single pshufw. - {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, - - // There is no instruction that matches a v16i8 alternate shuffle. - // The backend will expand it into the sequence 'pshufb + pshufb + or'. - {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} - }; - - if (ST->hasSSE41()) - if (const auto *Entry = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE, - LT.second)) - return LT.first * Entry->Cost; - - static const CostTblEntry SSSE3AltShuffleTbl[] = { - {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd - {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd - - // SSE3 doesn't have 'blendps'. The following shuffles are expanded into - // the sequence 'shufps + pshufd' - {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, - {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, - - {ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or - {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or - }; - - if (ST->hasSSSE3()) - if (const auto *Entry = CostTableLookup(SSSE3AltShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - - static const CostTblEntry SSEAltShuffleTbl[] = { - {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd - {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd - - {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd - {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd - - // This is expanded into a long sequence of four extract + four insert. - {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw. - - // 8 x (pinsrw + pextrw + and + movb + movzb + or) - {ISD::VECTOR_SHUFFLE, MVT::v16i8, 48} - }; - - // Fall-back (SSE3 and SSE2). - if (const auto *Entry = CostTableLookup(SSEAltShuffleTbl, - ISD::VECTOR_SHUFFLE, LT.second)) - return LT.first * Entry->Cost; - } else if (Kind == TTI::SK_PermuteTwoSrc) { // We assume that source and destination have the same vector type. std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); |