summaryrefslogtreecommitdiff
path: root/lib/Target/X86/X86InstrSSE.td
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86/X86InstrSSE.td')
-rw-r--r--lib/Target/X86/X86InstrSSE.td57
1 files changed, 54 insertions, 3 deletions
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 384238741b188..cce938baafe1c 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -4462,12 +4462,12 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
// Move Packed Doubleword Int first element to Doubleword Int
//
let SchedRW = [WriteMove] in {
-def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
- "vmov{d|q}\t{$src, $dst|$dst, $src}",
+def VMOVPQIto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
(iPTR 0)))],
IIC_SSE_MOVD_ToGP>,
- TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>;
+ VEX;
def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"mov{d|q}\t{$src, $dst|$dst, $src}",
@@ -5094,6 +5094,16 @@ multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
Sched<[WriteVecALULd]>;
}
+// Helper fragments to match sext vXi1 to vXiY.
+def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
+ VR128:$src))>;
+def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i32 15)))>;
+def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i32 31)))>;
+def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
+ VR256:$src))>;
+def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i32 15)))>;
+def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i32 31)))>;
+
let Predicates = [HasAVX] in {
defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb",
int_x86_ssse3_pabs_b_128>, VEX;
@@ -5101,6 +5111,19 @@ let Predicates = [HasAVX] in {
int_x86_ssse3_pabs_w_128>, VEX;
defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd",
int_x86_ssse3_pabs_d_128>, VEX;
+
+ def : Pat<(xor
+ (bc_v2i64 (v16i1sextv16i8)),
+ (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
+ (VPABSBrr128 VR128:$src)>;
+ def : Pat<(xor
+ (bc_v2i64 (v8i1sextv8i16)),
+ (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
+ (VPABSWrr128 VR128:$src)>;
+ def : Pat<(xor
+ (bc_v2i64 (v4i1sextv4i32)),
+ (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
+ (VPABSDrr128 VR128:$src)>;
}
let Predicates = [HasAVX2] in {
@@ -5110,6 +5133,19 @@ let Predicates = [HasAVX2] in {
int_x86_avx2_pabs_w>, VEX, VEX_L;
defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd",
int_x86_avx2_pabs_d>, VEX, VEX_L;
+
+ def : Pat<(xor
+ (bc_v4i64 (v32i1sextv32i8)),
+ (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))),
+ (VPABSBrr256 VR256:$src)>;
+ def : Pat<(xor
+ (bc_v4i64 (v16i1sextv16i16)),
+ (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))),
+ (VPABSWrr256 VR256:$src)>;
+ def : Pat<(xor
+ (bc_v4i64 (v8i1sextv8i32)),
+ (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))),
+ (VPABSDrr256 VR256:$src)>;
}
defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb",
@@ -5119,6 +5155,21 @@ defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw",
defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
int_x86_ssse3_pabs_d_128>;
+let Predicates = [HasSSSE3] in {
+ def : Pat<(xor
+ (bc_v2i64 (v16i1sextv16i8)),
+ (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
+ (PABSBrr128 VR128:$src)>;
+ def : Pat<(xor
+ (bc_v2i64 (v8i1sextv8i16)),
+ (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
+ (PABSWrr128 VR128:$src)>;
+ def : Pat<(xor
+ (bc_v2i64 (v4i1sextv4i32)),
+ (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
+ (PABSDrr128 VR128:$src)>;
+}
+
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Binary Operator Instructions
//===---------------------------------------------------------------------===//