aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/recip-fastmath2.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86/recip-fastmath2.ll')
-rw-r--r--test/CodeGen/X86/recip-fastmath2.ll162
1 files changed, 81 insertions, 81 deletions
diff --git a/test/CodeGen/X86/recip-fastmath2.ll b/test/CodeGen/X86/recip-fastmath2.ll
index 440a6f0bef13..c82eab84757f 100644
--- a/test/CodeGen/X86/recip-fastmath2.ll
+++ b/test/CodeGen/X86/recip-fastmath2.ll
@@ -39,8 +39,8 @@ define float @f32_no_step_2(float %x) #3 {
; SANDY-LABEL: f32_no_step_2:
; SANDY: # BB#0:
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [5:1.00]
+; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: f32_no_step_2:
; HASWELL: # BB#0:
@@ -110,12 +110,12 @@ define float @f32_one_step_2(float %x) #1 {
; SANDY: # BB#0:
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [5:1.00]
+; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: f32_one_step_2:
; HASWELL: # BB#0:
@@ -198,13 +198,13 @@ define float @f32_one_step_2_divs(float %x) #1 {
; SANDY: # BB#0:
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00]
+; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [5:1.00]
+; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: f32_one_step_2_divs:
; HASWELL: # BB#0:
@@ -305,7 +305,7 @@ define float @f32_two_step_2(float %x) #2 {
; SANDY: # BB#0:
; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50]
; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@@ -313,8 +313,8 @@ define float @f32_two_step_2(float %x) #2 {
; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [5:1.00]
+; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: f32_two_step_2:
; HASWELL: # BB#0:
@@ -403,19 +403,19 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
;
; SANDY-LABEL: v4f32_one_step2:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00]
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [5:1.00]
+; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: v4f32_one_step2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
@@ -425,7 +425,7 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
@@ -435,7 +435,7 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
; KNL-LABEL: v4f32_one_step2:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
@@ -501,20 +501,20 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
;
; SANDY-LABEL: v4f32_one_step_2_divs:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00]
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00]
+; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [5:1.00]
+; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: v4f32_one_step_2_divs:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
@@ -525,7 +525,7 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
@@ -536,7 +536,7 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
; KNL-LABEL: v4f32_one_step_2_divs:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
@@ -619,9 +619,9 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
;
; SANDY-LABEL: v4f32_two_step2:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00]
; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50]
; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@@ -629,13 +629,13 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [5:1.00]
+; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
+; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: v4f32_two_step2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
@@ -648,7 +648,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm3 # sched: [4:0.50]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1] sched: [4:0.50]
; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
@@ -662,7 +662,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; KNL-LABEL: v4f32_two_step2:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
@@ -674,7 +674,7 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; SKX-LABEL: v4f32_two_step2:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
-; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50]
; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
@@ -729,29 +729,29 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
; BTVER2-LABEL: v8f32_one_step2:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:1.00]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
+; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00]
+; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v8f32_one_step2:
; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [5:1.00]
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
+; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: v8f32_one_step2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
@@ -761,7 +761,7 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
@@ -771,7 +771,7 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
; KNL-LABEL: v8f32_one_step2:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
@@ -835,31 +835,31 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
; BTVER2-LABEL: v8f32_one_step_2_divs:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [7:1.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:1.00]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
+; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00]
+; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [7:2.00]
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v8f32_one_step_2_divs:
; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:1.00]
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [5:1.00]
+; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: v8f32_one_step_2_divs:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
@@ -870,7 +870,7 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
@@ -881,7 +881,7 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
; KNL-LABEL: v8f32_one_step_2_divs:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
@@ -964,23 +964,23 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
; BTVER2-LABEL: v8f32_two_step2:
; BTVER2: # BB#0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [2:1.00]
-; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:1.00]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [2:2.00]
+; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:2.00]
+; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [2:2.00]
+; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:2.00]
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
+; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00]
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00]
+; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v8f32_two_step2:
; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50]
; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
@@ -988,13 +988,13 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [5:1.00]
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
+; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: v8f32_two_step2:
; HASWELL: # BB#0:
; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
@@ -1007,7 +1007,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
; HASWELL-NO-FMA: # BB#0:
; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm3 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
@@ -1021,7 +1021,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
; KNL-LABEL: v8f32_two_step2:
; KNL: # BB#0:
; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
@@ -1033,7 +1033,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
; SKX-LABEL: v8f32_two_step2:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
-; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
@@ -1064,13 +1064,13 @@ define <8 x float> @v8f32_no_step(<8 x float> %x) #3 {
;
; BTVER2-LABEL: v8f32_no_step:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:1.00]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v8f32_no_step:
; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [5:1.00]
+; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: v8f32_no_step:
; HASWELL: # BB#0:
@@ -1118,15 +1118,15 @@ define <8 x float> @v8f32_no_step2(<8 x float> %x) #3 {
;
; BTVER2-LABEL: v8f32_no_step2:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:1.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:1.00]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v8f32_no_step2:
; SANDY: # BB#0:
; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [5:1.00]
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
+; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: v8f32_no_step2:
; HASWELL: # BB#0: