diff options
Diffstat (limited to 'lib/Target/X86/Utils')
-rw-r--r-- | lib/Target/X86/Utils/Makefile | 15 | ||||
-rw-r--r-- | lib/Target/X86/Utils/X86ShuffleDecode.cpp | 162 | ||||
-rw-r--r-- | lib/Target/X86/Utils/X86ShuffleDecode.h | 94 |
3 files changed, 212 insertions, 59 deletions
diff --git a/lib/Target/X86/Utils/Makefile b/lib/Target/X86/Utils/Makefile deleted file mode 100644 index 1df6f0f561d45..0000000000000 --- a/lib/Target/X86/Utils/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/X86/Utils/Makefile -----------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMX86Utils - -# Hack: we need to include 'main' x86 target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 619f7c8d25df9..18f71675437bf 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "X86ShuffleDecode.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/CodeGen/MachineValueType.h" //===----------------------------------------------------------------------===// @@ -44,6 +45,17 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; } +void DecodeInsertElementMask(MVT VT, unsigned Idx, unsigned Len, + SmallVectorImpl<int> &ShuffleMask) { + unsigned NumElts = VT.getVectorNumElements(); + assert((Idx + Len) <= NumElts && "Insertion out of range"); + + for (unsigned i = 0; i != NumElts; ++i) + ShuffleMask.push_back(i); + for (unsigned i = 0; i != Len; ++i) + ShuffleMask[Idx + i] = NumElts + i; +} + // <3,1> or <6,7,2,3> void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { for (unsigned i = NElts / 2; i != NElts; ++i) @@ -263,6 +275,25 @@ void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { } } +/// Decodes a broadcast of the first element of a vector. +void DecodeVectorBroadcast(MVT DstVT, SmallVectorImpl<int> &ShuffleMask) { + unsigned NumElts = DstVT.getVectorNumElements(); + ShuffleMask.append(NumElts, 0); +} + +/// Decodes a broadcast of a subvector to a larger vector type. +void DecodeSubVectorBroadcast(MVT DstVT, MVT SrcVT, + SmallVectorImpl<int> &ShuffleMask) { + assert(SrcVT.getScalarType() == DstVT.getScalarType() && + "Non matching vector element types"); + unsigned NumElts = SrcVT.getVectorNumElements(); + unsigned Scale = DstVT.getSizeInBits() / SrcVT.getSizeInBits(); + + for (unsigned i = 0; i != Scale; ++i) + for (unsigned j = 0; j != NumElts; ++j) + ShuffleMask.push_back(j); +} + /// \brief Decode a shuffle packed values at 128-bit granularity /// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2) /// immediate mask into a shuffle mask. @@ -303,9 +334,9 @@ void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, ShuffleMask.push_back(M); continue; } - // For AVX vectors with 32 bytes the base of the shuffle is the half of - // the vector we're inside. - int Base = i < 16 ? 0 : 16; + // For 256/512-bit vectors the base of the shuffle is the 128-bit + // subvector we're inside. + int Base = (i / 16) * 16; // If the high bit (7) of the byte is set, the element is zeroed. if (M & (1 << 7)) ShuffleMask.push_back(SM_SentinelZero); @@ -331,23 +362,62 @@ void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { } } -/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD. -/// No VT provided since it only works on 256-bit, 4 element vectors. -void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { - for (unsigned i = 0; i != 4; ++i) { - ShuffleMask.push_back((Imm >> (2 * i)) & 3); +void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, + SmallVectorImpl<int> &ShuffleMask) { + assert(RawMask.size() == 16 && "Illegal VPPERM shuffle mask size"); + + // VPPERM Operation + // Bits[4:0] - Byte Index (0 - 31) + // Bits[7:5] - Permute Operation + // + // Permute Operation: + // 0 - Source byte (no logical operation). + // 1 - Invert source byte. + // 2 - Bit reverse of source byte. + // 3 - Bit reverse of inverted source byte. + // 4 - 00h (zero - fill). + // 5 - FFh (ones - fill). + // 6 - Most significant bit of source byte replicated in all bit positions. + // 7 - Invert most significant bit of source byte and replicate in all bit positions. + for (int i = 0, e = RawMask.size(); i < e; ++i) { + uint64_t M = RawMask[i]; + if (M == (uint64_t)SM_SentinelUndef) { + ShuffleMask.push_back(M); + continue; + } + + uint64_t PermuteOp = (M >> 5) & 0x7; + if (PermuteOp == 4) { + ShuffleMask.push_back(SM_SentinelZero); + continue; + } + if (PermuteOp != 0) { + ShuffleMask.clear(); + return; + } + + uint64_t Index = M & 0x1F; + ShuffleMask.push_back((int)Index); } } -void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl<int> &Mask) { +/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD. +void DecodeVPERMMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { + assert((VT.is256BitVector() || VT.is512BitVector()) && + (VT.getScalarSizeInBits() == 64) && "Unexpected vector value type"); + unsigned NumElts = VT.getVectorNumElements(); + for (unsigned l = 0; l != NumElts; l += 4) + for (unsigned i = 0; i != 4; ++i) + ShuffleMask.push_back(l + ((Imm >> (2 * i)) & 3)); +} + +void DecodeZeroExtendMask(MVT SrcScalarVT, MVT DstVT, SmallVectorImpl<int> &Mask) { unsigned NumDstElts = DstVT.getVectorNumElements(); - unsigned SrcScalarBits = SrcVT.getScalarSizeInBits(); + unsigned SrcScalarBits = SrcScalarVT.getSizeInBits(); unsigned DstScalarBits = DstVT.getScalarSizeInBits(); unsigned Scale = DstScalarBits / SrcScalarBits; assert(SrcScalarBits < DstScalarBits && "Expected zero extension mask to increase scalar size"); - assert(SrcVT.getVectorNumElements() >= NumDstElts && - "Too many zero extension lanes"); for (unsigned i = 0; i != NumDstElts; i++) { Mask.push_back(i); @@ -445,18 +515,78 @@ void DecodeINSERTQIMask(int Len, int Idx, ShuffleMask.push_back(SM_SentinelUndef); } +void DecodeVPERMILPMask(MVT VT, ArrayRef<uint64_t> RawMask, + SmallVectorImpl<int> &ShuffleMask) { + unsigned VecSize = VT.getSizeInBits(); + unsigned EltSize = VT.getScalarSizeInBits(); + unsigned NumLanes = VecSize / 128; + unsigned NumEltsPerLane = VT.getVectorNumElements() / NumLanes; + assert((VecSize == 128 || VecSize == 256 || VecSize == 512) && + "Unexpected vector size"); + assert((EltSize == 32 || EltSize == 64) && "Unexpected element size"); + + for (unsigned i = 0, e = RawMask.size(); i < e; ++i) { + uint64_t M = RawMask[i]; + M = (EltSize == 64 ? ((M >> 1) & 0x1) : (M & 0x3)); + unsigned LaneOffset = i & ~(NumEltsPerLane - 1); + ShuffleMask.push_back((int)(LaneOffset + M)); + } +} + +void DecodeVPERMIL2PMask(MVT VT, unsigned M2Z, ArrayRef<uint64_t> RawMask, + SmallVectorImpl<int> &ShuffleMask) { + unsigned VecSize = VT.getSizeInBits(); + unsigned EltSize = VT.getScalarSizeInBits(); + unsigned NumLanes = VecSize / 128; + unsigned NumEltsPerLane = VT.getVectorNumElements() / NumLanes; + assert((VecSize == 128 || VecSize == 256) && + "Unexpected vector size"); + assert((EltSize == 32 || EltSize == 64) && "Unexpected element size"); + + for (unsigned i = 0, e = RawMask.size(); i < e; ++i) { + // VPERMIL2 Operation. + // Bits[3] - Match Bit. + // Bits[2:1] - (Per Lane) PD Shuffle Mask. + // Bits[2:0] - (Per Lane) PS Shuffle Mask. + uint64_t Selector = RawMask[i]; + unsigned MatchBit = (Selector >> 3) & 0x1; + + // M2Z[0:1] MatchBit + // 0Xb X Source selected by Selector index. + // 10b 0 Source selected by Selector index. + // 10b 1 Zero. + // 11b 0 Zero. + // 11b 1 Source selected by Selector index. + if ((M2Z & 0x2) != 0 && MatchBit != (M2Z & 0x1)) { + ShuffleMask.push_back(SM_SentinelZero); + continue; + } + + unsigned Index = i & ~(NumEltsPerLane - 1); + if (EltSize == 64) + Index += (Selector >> 1) & 0x1; + else + Index += Selector & 0x3; + + unsigned SrcOffset = (Selector >> 2) & 1; + ShuffleMask.push_back((int)(SrcOffset + Index)); + } +} + void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, SmallVectorImpl<int> &ShuffleMask) { - for (int i = 0, e = RawMask.size(); i < e; ++i) { - uint64_t M = RawMask[i]; + uint64_t EltMaskSize = RawMask.size() - 1; + for (auto M : RawMask) { + M &= EltMaskSize; ShuffleMask.push_back((int)M); } } void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, SmallVectorImpl<int> &ShuffleMask) { - for (int i = 0, e = RawMask.size(); i < e; ++i) { - uint64_t M = RawMask[i]; + uint64_t EltMaskSize = (RawMask.size() * 2) - 1; + for (auto M : RawMask) { + M &= EltMaskSize; ShuffleMask.push_back((int)M); } } diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 72db6a81912b6..dc21c19752c35 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -16,23 +16,31 @@ #define LLVM_LIB_TARGET_X86_UTILS_X86SHUFFLEDECODE_H #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/ArrayRef.h" //===----------------------------------------------------------------------===// // Vector Mask Decoding //===----------------------------------------------------------------------===// namespace llvm { +template <typename T> class ArrayRef; class MVT; enum { SM_SentinelUndef = -1, SM_SentinelZero = -2 }; +/// Decode a 128-bit INSERTPS instruction as a v4f32 shuffle mask. void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask); -// <3,1> or <6,7,2,3> +// Insert the bottom Len elements from a second source into a vector starting at +// element Idx. +void DecodeInsertElementMask(MVT VT, unsigned Idx, unsigned Len, + SmallVectorImpl<int> &ShuffleMask); + +/// Decode a MOVHLPS instruction as a v2f64/v4f32 shuffle mask. +/// i.e. <3,1> or <6,7,2,3> void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask); -// <0,2> or <0,1,4,5> +/// Decode a MOVLHPS instruction as a v2f64/v4f32 shuffle mask. +/// i.e. <0,2> or <0,1,4,5> void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask); void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask); @@ -47,74 +55,104 @@ void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); +/// Decodes the shuffle masks for pshufd/pshufw/vpermilpd/vpermilps. +/// VT indicates the type of the vector allowing it to handle different +/// datatypes and vector widths. void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); +/// Decodes the shuffle masks for pshufhw. +/// VT indicates the type of the vector allowing it to handle different +/// datatypes and vector widths. void DecodePSHUFHWMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); -void DecodePSHUFLWMask(MVT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); +/// Decodes the shuffle masks for pshuflw. +/// VT indicates the type of the vector allowing it to handle different +/// datatypes and vector widths. +void DecodePSHUFLWMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); -/// \brief Decodes a PSWAPD 3DNow! instruction. +/// Decodes a PSWAPD 3DNow! instruction. void DecodePSWAPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask); -/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates -/// the type of the vector allowing it to handle different datatypes and vector -/// widths. +/// Decodes the shuffle masks for shufp*. +/// VT indicates the type of the vector allowing it to handle different +/// datatypes and vector widths. void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); -/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd -/// and punpckh*. VT indicates the type of the vector allowing it to handle -/// different datatypes and vector widths. +/// Decodes the shuffle masks for unpckhps/unpckhpd and punpckh*. +/// VT indicates the type of the vector allowing it to handle different +/// datatypes and vector widths. void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask); -/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd -/// and punpckl*. VT indicates the type of the vector allowing it to handle -/// different datatypes and vector widths. +/// Decodes the shuffle masks for unpcklps/unpcklpd and punpckl*. +/// VT indicates the type of the vector allowing it to handle different +/// datatypes and vector widths. void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask); -/// \brief Decode a PSHUFB mask from a raw array of constants such as from +/// Decodes a broadcast of the first element of a vector. +void DecodeVectorBroadcast(MVT DstVT, SmallVectorImpl<int> &ShuffleMask); + +/// Decodes a broadcast of a subvector to a larger vector type. +void DecodeSubVectorBroadcast(MVT DstVT, MVT SrcVT, + SmallVectorImpl<int> &ShuffleMask); + +/// Decode a PSHUFB mask from a raw array of constants such as from /// BUILD_VECTOR. void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, SmallVectorImpl<int> &ShuffleMask); -/// \brief Decode a BLEND immediate mask into a shuffle mask. +/// Decode a BLEND immediate mask into a shuffle mask. void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); -/// \brief Decode a shuffle packed values at 128-bit granularity +/// Decode a shuffle packed values at 128-bit granularity /// immediate mask into a shuffle mask. void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); -/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD. -/// No VT provided since it only works on 256-bit, 4 element vectors. -void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask); +/// Decodes the shuffle masks for VPERMQ/VPERMPD. +void DecodeVPERMMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask); -/// \brief Decode a zero extension instruction as a shuffle mask. -void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, +/// Decode a VPPERM mask from a raw array of constants such as from +/// BUILD_VECTOR. +/// This can only basic masks (permutes + zeros), not any of the other +/// operations that VPPERM can perform. +void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, + SmallVectorImpl<int> &ShuffleMask); + +/// Decode a zero extension instruction as a shuffle mask. +void DecodeZeroExtendMask(MVT SrcScalarVT, MVT DstVT, SmallVectorImpl<int> &ShuffleMask); -/// \brief Decode a move lower and zero upper instruction as a shuffle mask. +/// Decode a move lower and zero upper instruction as a shuffle mask. void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask); -/// \brief Decode a scalar float move instruction as a shuffle mask. +/// Decode a scalar float move instruction as a shuffle mask. void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &ShuffleMask); -/// \brief Decode a SSE4A EXTRQ instruction as a v16i8 shuffle mask. +/// Decode a SSE4A EXTRQ instruction as a v16i8 shuffle mask. void DecodeEXTRQIMask(int Len, int Idx, SmallVectorImpl<int> &ShuffleMask); -/// \brief Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask. +/// Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask. void DecodeINSERTQIMask(int Len, int Idx, SmallVectorImpl<int> &ShuffleMask); -/// \brief Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants. +/// Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants. +void DecodeVPERMILPMask(MVT VT, ArrayRef<uint64_t> RawMask, + SmallVectorImpl<int> &ShuffleMask); + +/// Decode a VPERMIL2PD/VPERMIL2PS variable mask from a raw array of constants. +void DecodeVPERMIL2PMask(MVT VT, unsigned M2Z, ArrayRef<uint64_t> RawMask, + SmallVectorImpl<int> &ShuffleMask); + +/// Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants. void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, SmallVectorImpl<int> &ShuffleMask); -/// \brief Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants. +/// Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants. void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, SmallVectorImpl<int> &ShuffleMask); } // llvm namespace |