diff options
Diffstat (limited to 'test/CodeGen/X86/recip-fastmath.ll')
-rw-r--r-- | test/CodeGen/X86/recip-fastmath.ll | 116 |
1 files changed, 58 insertions, 58 deletions
diff --git a/test/CodeGen/X86/recip-fastmath.ll b/test/CodeGen/X86/recip-fastmath.ll index 16e261bf3c5e0..02a968c6f27d1 100644 --- a/test/CodeGen/X86/recip-fastmath.ll +++ b/test/CodeGen/X86/recip-fastmath.ll @@ -45,9 +45,9 @@ define float @f32_no_estimate(float %x) #0 { ; ; SANDY-LABEL: f32_no_estimate: ; SANDY: # BB#0: -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] -; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [14:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: f32_no_estimate: ; HASWELL: # BB#0: @@ -113,11 +113,11 @@ define float @f32_one_step(float %x) #1 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: f32_one_step: ; HASWELL: # BB#0: @@ -207,7 +207,7 @@ define float @f32_two_step(float %x) #2 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -215,7 +215,7 @@ define float @f32_two_step(float %x) #2 { ; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: f32_two_step: ; HASWELL: # BB#0: @@ -284,25 +284,25 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 { ; ; SANDY-LABEL: v4f32_no_estimate: ; SANDY: # BB#0: -; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] -; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] +; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [14:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v4f32_no_estimate: ; HASWELL: # BB#0: -; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50] +; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [4:0.50] ; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00] ; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: v4f32_no_estimate: ; HASWELL-NO-FMA: # BB#0: -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] ; HASWELL-NO-FMA-NEXT: vdivps %xmm0, %xmm1, %xmm0 ; HASWELL-NO-FMA-NEXT: retq ; ; AVX512-LABEL: v4f32_no_estimate: ; AVX512: # BB#0: -; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50] +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] sched: [4:0.50] ; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00] ; AVX512-NEXT: retq # sched: [1:1.00] %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x @@ -350,18 +350,18 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 { ; ; SANDY-LABEL: v4f32_one_step: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] ; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v4f32_one_step: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] +; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50] ; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 ; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 ; HASWELL-NEXT: retq # sched: [1:1.00] @@ -370,7 +370,7 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 { ; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 ; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] ; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 ; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 ; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 @@ -379,7 +379,7 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 { ; KNL-LABEL: v4f32_one_step: ; KNL: # BB#0: ; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] +; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50] ; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 ; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 ; KNL-NEXT: retq # sched: [1:1.00] @@ -453,9 +453,9 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 { ; ; SANDY-LABEL: v4f32_two_step: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] ; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -463,12 +463,12 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 { ; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v4f32_two_step: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] +; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50] ; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] ; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 ; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 @@ -480,7 +480,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 { ; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 ; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2 -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm3 +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1,1,1,1] ; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2 ; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2 ; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1 @@ -493,7 +493,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 { ; KNL-LABEL: v4f32_two_step: ; KNL: # BB#0: ; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] +; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50] ; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] ; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 ; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 @@ -504,7 +504,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 { ; SKX-LABEL: v4f32_two_step: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %xmm0, %xmm1 -; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] +; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [4:0.50] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] ; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 ; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 @@ -541,30 +541,30 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 { ; BTVER2-LABEL: v8f32_no_estimate: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00] -; BTVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:19.00] +; BTVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [38:38.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: v8f32_no_estimate: ; SANDY: # BB#0: -; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] -; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] +; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [29:3.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v8f32_no_estimate: ; HASWELL: # BB#0: -; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00] +; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [5:1.00] ; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00] ; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_no_estimate: ; HASWELL-NO-FMA: # BB#0: -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] ; HASWELL-NO-FMA-NEXT: vdivps %ymm0, %ymm1, %ymm0 ; HASWELL-NO-FMA-NEXT: retq ; ; AVX512-LABEL: v8f32_no_estimate: ; AVX512: # BB#0: -; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00] +; AVX512-NEXT: vbroadcastss {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [5:1.00] ; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00] ; AVX512-NEXT: retq # sched: [1:1.00] %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x @@ -610,27 +610,27 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 { ; BTVER2-LABEL: v8f32_one_step: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00] -; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:1.00] -; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:1.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00] +; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00] +; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00] +; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: v8f32_one_step: ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] ; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v8f32_one_step: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] -; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] +; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00] ; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 ; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 ; HASWELL-NEXT: retq # sched: [1:1.00] @@ -639,7 +639,7 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 { ; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 ; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] ; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 ; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 ; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 @@ -648,7 +648,7 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 { ; KNL-LABEL: v8f32_one_step: ; KNL: # BB#0: ; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] -; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] +; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00] ; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 ; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 ; KNL-NEXT: retq # sched: [1:1.00] @@ -722,22 +722,22 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 { ; BTVER2-LABEL: v8f32_two_step: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00] -; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:1.00] -; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [2:1.00] -; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00] -; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [2:1.00] -; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00] -; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] -; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:1.00] -; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00] +; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [2:2.00] +; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:2.00] +; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [2:2.00] +; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:2.00] +; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00] +; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00] +; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: v8f32_two_step: ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] ; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00] @@ -745,12 +745,12 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 { ; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v8f32_two_step: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] -; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] +; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00] ; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] ; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 ; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 @@ -762,7 +762,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 { ; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 ; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm3 +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] ; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2 ; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 ; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1 @@ -775,7 +775,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 { ; KNL-LABEL: v8f32_two_step: ; KNL: # BB#0: ; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] -; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] +; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00] ; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] ; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 ; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 @@ -786,7 +786,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 { ; SKX-LABEL: v8f32_two_step: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %ymm0, %ymm1 -; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] +; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00] ; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] ; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 ; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 |