aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/X86/X86CompressEVEX.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/X86/X86CompressEVEX.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/X86/X86CompressEVEX.cpp296
1 files changed, 296 insertions, 0 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86CompressEVEX.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86CompressEVEX.cpp
new file mode 100644
index 000000000000..b95baddd9dea
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/Target/X86/X86CompressEVEX.cpp
@@ -0,0 +1,296 @@
+//===- X86CompressEVEX.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass compresses instructions from EVEX space to legacy/VEX/EVEX space
+// when possible in order to reduce code size or facilitate HW decoding.
+//
+// Possible compression:
+// a. AVX512 instruction (EVEX) -> AVX instruction (VEX)
+// b. Promoted instruction (EVEX) -> pre-promotion instruction (legacy/VEX)
+// c. NDD (EVEX) -> non-NDD (legacy)
+// d. NF_ND (EVEX) -> NF (EVEX)
+//
+// Compression a, b and c can always reduce code size, with some exceptions
+// such as promoted 16-bit CRC32 which is as long as the legacy version.
+//
+// legacy:
+// crc32w %si, %eax ## encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc6]
+// promoted:
+// crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
+//
+// From performance perspective, these should be same (same uops and same EXE
+// ports). From a FMV perspective, an older legacy encoding is preferred b/c it
+// can execute in more places (broader HW install base). So we will still do
+// the compression.
+//
+// Compression d can help hardware decode (HW may skip reading the NDD
+// register) although the instruction length remains unchanged.
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86InstComments.h"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Pass.h"
+#include <atomic>
+#include <cassert>
+#include <cstdint>
+
+using namespace llvm;
+
+// Including the generated EVEX compression tables.
+struct X86CompressEVEXTableEntry {
+ uint16_t OldOpc;
+ uint16_t NewOpc;
+
+ bool operator<(const X86CompressEVEXTableEntry &RHS) const {
+ return OldOpc < RHS.OldOpc;
+ }
+
+ friend bool operator<(const X86CompressEVEXTableEntry &TE, unsigned Opc) {
+ return TE.OldOpc < Opc;
+ }
+};
+#include "X86GenCompressEVEXTables.inc"
+
+#define COMP_EVEX_DESC "Compressing EVEX instrs when possible"
+#define COMP_EVEX_NAME "x86-compress-evex"
+
+#define DEBUG_TYPE COMP_EVEX_NAME
+
+namespace {
+
+class CompressEVEXPass : public MachineFunctionPass {
+public:
+ static char ID;
+ CompressEVEXPass() : MachineFunctionPass(ID) {}
+ StringRef getPassName() const override { return COMP_EVEX_DESC; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ // This pass runs after regalloc and doesn't support VReg operands.
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+};
+
+} // end anonymous namespace
+
+char CompressEVEXPass::ID = 0;
+
+static bool usesExtendedRegister(const MachineInstr &MI) {
+ auto isHiRegIdx = [](unsigned Reg) {
+ // Check for XMM register with indexes between 16 - 31.
+ if (Reg >= X86::XMM16 && Reg <= X86::XMM31)
+ return true;
+ // Check for YMM register with indexes between 16 - 31.
+ if (Reg >= X86::YMM16 && Reg <= X86::YMM31)
+ return true;
+ // Check for GPR with indexes between 16 - 31.
+ if (X86II::isApxExtendedReg(Reg))
+ return true;
+ return false;
+ };
+
+ // Check that operands are not ZMM regs or
+ // XMM/YMM regs with hi indexes between 16 - 31.
+ for (const MachineOperand &MO : MI.explicit_operands()) {
+ if (!MO.isReg())
+ continue;
+
+ Register Reg = MO.getReg();
+ assert(!X86II::isZMMReg(Reg) &&
+ "ZMM instructions should not be in the EVEX->VEX tables");
+ if (isHiRegIdx(Reg))
+ return true;
+ }
+
+ return false;
+}
+
+static bool checkVEXInstPredicate(unsigned OldOpc, const X86Subtarget &ST) {
+ switch (OldOpc) {
+ default:
+ return true;
+ case X86::VCVTNEPS2BF16Z128rm:
+ case X86::VCVTNEPS2BF16Z128rr:
+ case X86::VCVTNEPS2BF16Z256rm:
+ case X86::VCVTNEPS2BF16Z256rr:
+ return ST.hasAVXNECONVERT();
+ case X86::VPDPBUSDSZ128m:
+ case X86::VPDPBUSDSZ128r:
+ case X86::VPDPBUSDSZ256m:
+ case X86::VPDPBUSDSZ256r:
+ case X86::VPDPBUSDZ128m:
+ case X86::VPDPBUSDZ128r:
+ case X86::VPDPBUSDZ256m:
+ case X86::VPDPBUSDZ256r:
+ case X86::VPDPWSSDSZ128m:
+ case X86::VPDPWSSDSZ128r:
+ case X86::VPDPWSSDSZ256m:
+ case X86::VPDPWSSDSZ256r:
+ case X86::VPDPWSSDZ128m:
+ case X86::VPDPWSSDZ128r:
+ case X86::VPDPWSSDZ256m:
+ case X86::VPDPWSSDZ256r:
+ return ST.hasAVXVNNI();
+ case X86::VPMADD52HUQZ128m:
+ case X86::VPMADD52HUQZ128r:
+ case X86::VPMADD52HUQZ256m:
+ case X86::VPMADD52HUQZ256r:
+ case X86::VPMADD52LUQZ128m:
+ case X86::VPMADD52LUQZ128r:
+ case X86::VPMADD52LUQZ256m:
+ case X86::VPMADD52LUQZ256r:
+ return ST.hasAVXIFMA();
+ }
+}
+
+// Do any custom cleanup needed to finalize the conversion.
+static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc) {
+ (void)NewOpc;
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ case X86::VALIGNDZ128rri:
+ case X86::VALIGNDZ128rmi:
+ case X86::VALIGNQZ128rri:
+ case X86::VALIGNQZ128rmi: {
+ assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
+ "Unexpected new opcode!");
+ unsigned Scale =
+ (Opc == X86::VALIGNQZ128rri || Opc == X86::VALIGNQZ128rmi) ? 8 : 4;
+ MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
+ Imm.setImm(Imm.getImm() * Scale);
+ break;
+ }
+ case X86::VSHUFF32X4Z256rmi:
+ case X86::VSHUFF32X4Z256rri:
+ case X86::VSHUFF64X2Z256rmi:
+ case X86::VSHUFF64X2Z256rri:
+ case X86::VSHUFI32X4Z256rmi:
+ case X86::VSHUFI32X4Z256rri:
+ case X86::VSHUFI64X2Z256rmi:
+ case X86::VSHUFI64X2Z256rri: {
+ assert((NewOpc == X86::VPERM2F128rr || NewOpc == X86::VPERM2I128rr ||
+ NewOpc == X86::VPERM2F128rm || NewOpc == X86::VPERM2I128rm) &&
+ "Unexpected new opcode!");
+ MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
+ int64_t ImmVal = Imm.getImm();
+ // Set bit 5, move bit 1 to bit 4, copy bit 0.
+ Imm.setImm(0x20 | ((ImmVal & 2) << 3) | (ImmVal & 1));
+ break;
+ }
+ case X86::VRNDSCALEPDZ128rri:
+ case X86::VRNDSCALEPDZ128rmi:
+ case X86::VRNDSCALEPSZ128rri:
+ case X86::VRNDSCALEPSZ128rmi:
+ case X86::VRNDSCALEPDZ256rri:
+ case X86::VRNDSCALEPDZ256rmi:
+ case X86::VRNDSCALEPSZ256rri:
+ case X86::VRNDSCALEPSZ256rmi:
+ case X86::VRNDSCALESDZr:
+ case X86::VRNDSCALESDZm:
+ case X86::VRNDSCALESSZr:
+ case X86::VRNDSCALESSZm:
+ case X86::VRNDSCALESDZr_Int:
+ case X86::VRNDSCALESDZm_Int:
+ case X86::VRNDSCALESSZr_Int:
+ case X86::VRNDSCALESSZm_Int:
+ const MachineOperand &Imm = MI.getOperand(MI.getNumExplicitOperands() - 1);
+ int64_t ImmVal = Imm.getImm();
+ // Ensure that only bits 3:0 of the immediate are used.
+ if ((ImmVal & 0xf) != ImmVal)
+ return false;
+ break;
+ }
+
+ return true;
+}
+
+static bool CompressEVEXImpl(MachineInstr &MI, const X86Subtarget &ST) {
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+
+ // Check for EVEX instructions only.
+ if ((TSFlags & X86II::EncodingMask) != X86II::EVEX)
+ return false;
+
+ // Instructions with mask or 512-bit vector can't be converted to VEX.
+ if (TSFlags & (X86II::EVEX_K | X86II::EVEX_L2))
+ return false;
+
+ // EVEX_B has several meanings.
+ // AVX512:
+ // register form: rounding control or SAE
+ // memory form: broadcast
+ //
+ // APX:
+ // MAP4: NDD
+ //
+ // For AVX512 cases, EVEX prefix is needed in order to carry this information
+ // thus preventing the transformation to VEX encoding.
+ if (TSFlags & X86II::EVEX_B)
+ return false;
+
+ ArrayRef<X86CompressEVEXTableEntry> Table = ArrayRef(X86CompressEVEXTable);
+
+ unsigned Opc = MI.getOpcode();
+ const auto *I = llvm::lower_bound(Table, Opc);
+ if (I == Table.end() || I->OldOpc != Opc)
+ return false;
+
+ if (usesExtendedRegister(MI) || !checkVEXInstPredicate(Opc, ST) ||
+ !performCustomAdjustments(MI, I->NewOpc))
+ return false;
+
+ const MCInstrDesc &NewDesc = ST.getInstrInfo()->get(I->NewOpc);
+ MI.setDesc(NewDesc);
+ uint64_t Encoding = NewDesc.TSFlags & X86II::EncodingMask;
+ auto AsmComment =
+ (Encoding == X86II::VEX) ? X86::AC_EVEX_2_VEX : X86::AC_EVEX_2_LEGACY;
+ MI.setAsmPrinterFlag(AsmComment);
+ return true;
+}
+
+bool CompressEVEXPass::runOnMachineFunction(MachineFunction &MF) {
+#ifndef NDEBUG
+ // Make sure the tables are sorted.
+ static std::atomic<bool> TableChecked(false);
+ if (!TableChecked.load(std::memory_order_relaxed)) {
+ assert(llvm::is_sorted(X86CompressEVEXTable) &&
+ "X86CompressEVEXTable is not sorted!");
+ TableChecked.store(true, std::memory_order_relaxed);
+ }
+#endif
+ const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
+ if (!ST.hasAVX512() && !ST.hasEGPR() && !ST.hasNDD())
+ return false;
+
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF) {
+ // Traverse the basic block.
+ for (MachineInstr &MI : MBB)
+ Changed |= CompressEVEXImpl(MI, ST);
+ }
+
+ return Changed;
+}
+
+INITIALIZE_PASS(CompressEVEXPass, COMP_EVEX_NAME, COMP_EVEX_DESC, false, false)
+
+FunctionPass *llvm::createX86CompressEVEXPass() {
+ return new CompressEVEXPass();
+}