aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCInstrAltivec.td')
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrAltivec.td59
1 files changed, 39 insertions, 20 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index f94816a35f79..920eeed9d41f 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -341,7 +341,7 @@ class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
//===----------------------------------------------------------------------===//
// Instruction Definitions.
-def HasAltivec : Predicate<"PPCSubTarget->hasAltivec()">;
+def HasAltivec : Predicate<"Subtarget->hasAltivec()">;
let Predicates = [HasAltivec] in {
def DSS : DSS_Form<0, 822, (outs), (ins u5imm:$STRM),
@@ -491,7 +491,7 @@ let isCommutable = 1 in {
def VADDFP : VXForm_1<10, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vaddfp $vD, $vA, $vB", IIC_VecFP,
[(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>;
-
+
def VADDUBM : VXForm_1<0, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vaddubm $vD, $vA, $vB", IIC_VecGeneral,
[(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>;
@@ -501,7 +501,7 @@ def VADDUHM : VXForm_1<64, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
def VADDUWM : VXForm_1<128, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vadduwm $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>;
-
+
def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>;
def VADDSBS : VX1_Int_Ty<768, "vaddsbs", int_ppc_altivec_vaddsbs, v16i8>;
def VADDSHS : VX1_Int_Ty<832, "vaddshs", int_ppc_altivec_vaddshs, v8i16>;
@@ -635,7 +635,7 @@ def VMULOUB : VX1_Int_Ty2< 8, "vmuloub", int_ppc_altivec_vmuloub,
def VMULOUH : VX1_Int_Ty2< 72, "vmulouh", int_ppc_altivec_vmulouh,
v4i32, v8i16>;
} // isCommutable
-
+
def VREFP : VX2_Int_SP<266, "vrefp", int_ppc_altivec_vrefp>;
def VRFIM : VX2_Int_SP<714, "vrfim", int_ppc_altivec_vrfim>;
def VRFIN : VX2_Int_SP<522, "vrfin", int_ppc_altivec_vrfin>;
@@ -657,7 +657,7 @@ def VSUBUHM : VXForm_1<1088, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
def VSUBUWM : VXForm_1<1152, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vsubuwm $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>;
-
+
def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>;
def VSUBSHS : VX1_Int_Ty<1856, "vsubshs" , int_ppc_altivec_vsubshs, v8i16>;
def VSUBSWS : VX1_Int_Ty<1920, "vsubsws" , int_ppc_altivec_vsubsws, v4i32>;
@@ -869,6 +869,26 @@ def : Pat<(v8i16 (rotl v8i16:$vA, v8i16:$vB)),
def : Pat<(v4i32 (rotl v4i32:$vA, v4i32:$vB)),
(v4i32 (VRLW v4i32:$vA, v4i32:$vB))>;
+// Multiply
+def : Pat<(mul v8i16:$vA, v8i16:$vB), (VMLADDUHM $vA, $vB, (v8i16(V_SET0H)))>;
+
+// Add
+def : Pat<(add (mul v8i16:$vA, v8i16:$vB), v8i16:$vC), (VMLADDUHM $vA, $vB, $vC)>;
+
+// Saturating adds/subtracts.
+def : Pat<(v16i8 (saddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDSBS $vA, $vB))>;
+def : Pat<(v16i8 (uaddsat v16i8:$vA, v16i8:$vB)), (v16i8 (VADDUBS $vA, $vB))>;
+def : Pat<(v8i16 (saddsat v8i16:$vA, v8i16:$vB)), (v8i16 (VADDSHS $vA, $vB))>;
+def : Pat<(v8i16 (uaddsat v8i16:$vA, v8i16:$vB)), (v8i16 (VADDUHS $vA, $vB))>;
+def : Pat<(v4i32 (saddsat v4i32:$vA, v4i32:$vB)), (v4i32 (VADDSWS $vA, $vB))>;
+def : Pat<(v4i32 (uaddsat v4i32:$vA, v4i32:$vB)), (v4i32 (VADDUWS $vA, $vB))>;
+def : Pat<(v16i8 (ssubsat v16i8:$vA, v16i8:$vB)), (v16i8 (VSUBSBS $vA, $vB))>;
+def : Pat<(v16i8 (usubsat v16i8:$vA, v16i8:$vB)), (v16i8 (VSUBUBS $vA, $vB))>;
+def : Pat<(v8i16 (ssubsat v8i16:$vA, v8i16:$vB)), (v8i16 (VSUBSHS $vA, $vB))>;
+def : Pat<(v8i16 (usubsat v8i16:$vA, v8i16:$vB)), (v8i16 (VSUBUHS $vA, $vB))>;
+def : Pat<(v4i32 (ssubsat v4i32:$vA, v4i32:$vB)), (v4i32 (VSUBSWS $vA, $vB))>;
+def : Pat<(v4i32 (usubsat v4i32:$vA, v4i32:$vB)), (v4i32 (VSUBUWS $vA, $vB))>;
+
// Loads.
def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
@@ -1002,14 +1022,9 @@ def : Pat<(and v4i32:$A, (vnot_ppc v4i32:$B)),
def : Pat<(fmul v4f32:$vA, v4f32:$vB),
(VMADDFP $vA, $vB,
- (v4i32 (VSLW (v4i32 (V_SETALLONES)), (v4i32 (V_SETALLONES)))))>;
+ (v4i32 (VSLW (v4i32 (V_SETALLONES)), (v4i32 (V_SETALLONES)))))>;
-// Fused multiply add and multiply sub for packed float. These are represented
-// separately from the real instructions above, for operations that must have
-// the additional precision, such as Newton-Rhapson (used by divide, sqrt)
-def : Pat<(PPCvmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
- (VMADDFP $A, $B, $C)>;
-def : Pat<(PPCvnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
+def : Pat<(PPCfnmsub v4f32:$A, v4f32:$B, v4f32:$C),
(VNMSUBFP $A, $B, $C)>;
def : Pat<(int_ppc_altivec_vmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
@@ -1121,8 +1136,8 @@ def : Pat<(v16i8 (srl (sub v16i8:$vA, (v16i8 (bitconvert(vnot_ppc v4i32:$vB)))),
} // end HasAltivec
-def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
-def HasP8Crypto : Predicate<"PPCSubTarget->hasP8Crypto()">;
+def HasP8Altivec : Predicate<"Subtarget->hasP8Altivec()">;
+def HasP8Crypto : Predicate<"Subtarget->hasP8Crypto()">;
let Predicates = [HasP8Altivec] in {
let isCommutable = 1 in {
@@ -1143,7 +1158,7 @@ def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>;
def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>;
} // isCommutable
-// Vector merge
+// Vector merge
def VMRGEW : VXForm_1<1932, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"vmrgew $vD, $vA, $vB", IIC_VecFP,
[(set v16i8:$vD,
@@ -1251,16 +1266,16 @@ def VPOPCNTD : VXForm_2<1987, (outs vrrc:$vD), (ins vrrc:$vB),
[(set v2i64:$vD, (ctpop v2i64:$vB))]>;
let isCommutable = 1 in {
-// FIXME: Use AddedComplexity > 400 to ensure these patterns match before the
+// FIXME: Use AddedComplexity > 400 to ensure these patterns match before the
// VSX equivalents. We need to fix this up at some point. Two possible
// solutions for this problem:
// 1. Disable Altivec patterns that compete with VSX patterns using the
-// !HasVSX predicate. This essentially favours VSX over Altivec, in
-// hopes of reducing register pressure (larger register set using VSX
+// !HasVSX predicate. This essentially favours VSX over Altivec, in
+// hopes of reducing register pressure (larger register set using VSX
// instructions than VMX instructions)
// 2. Employ a more disciplined use of AddedComplexity, which would provide
// more fine-grained control than option 1. This would be beneficial
-// if we find situations where Altivec is really preferred over VSX.
+// if we find situations where Altivec is really preferred over VSX.
def VEQV : VXForm_1<1668, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
"veqv $vD, $vA, $vB", IIC_VecGeneral,
[(set v4i32:$vD, (vnot_ppc (xor v4i32:$vA, v4i32:$vB)))]>;
@@ -1339,9 +1354,13 @@ def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
} // HasP8Crypto
// The following altivec instructions were introduced in Power ISA 3.0
-def HasP9Altivec : Predicate<"PPCSubTarget->hasP9Altivec()">;
+def HasP9Altivec : Predicate<"Subtarget->hasP9Altivec()">;
let Predicates = [HasP9Altivec] in {
+// Vector Multiply-Sum
+def VMSUMUDM : VA1a_Int_Ty3<35, "vmsumudm", int_ppc_altivec_vmsumudm,
+ v1i128, v2i64, v1i128>;
+
// i8 element comparisons.
def VCMPNEB : VCMP < 7, "vcmpneb $vD, $vA, $vB" , v16i8>;
def VCMPNEB_rec : VCMPo < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>;