diff options
Diffstat (limited to 'test/CodeGen/X86/stack-folding-fp-sse42.ll')
-rw-r--r-- | test/CodeGen/X86/stack-folding-fp-sse42.ll | 28 |
1 files changed, 18 insertions, 10 deletions
diff --git a/test/CodeGen/X86/stack-folding-fp-sse42.ll b/test/CodeGen/X86/stack-folding-fp-sse42.ll index f9fcbaabdebb..105115bc7d25 100644 --- a/test/CodeGen/X86/stack-folding-fp-sse42.ll +++ b/test/CodeGen/X86/stack-folding-fp-sse42.ll @@ -1,4 +1,4 @@ -; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.2 < %s | FileCheck %s +; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" @@ -314,7 +314,7 @@ define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) { } declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone -define float @stack_fold_cvtsd2ss(double %a0) optsize { +define float @stack_fold_cvtsd2ss(double %a0) minsize { ;CHECK-LABEL: stack_fold_cvtsd2ss ;CHECK: cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -331,7 +331,7 @@ define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) optsize { } declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone -define double @stack_fold_cvtsi2sd(i32 %a0) optsize { +define double @stack_fold_cvtsi2sd(i32 %a0) minsize { ;CHECK-LABEL: stack_fold_cvtsi2sd ;CHECK: cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -365,7 +365,7 @@ define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) { } declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone -define float @stack_fold_cvtsi2ss(i32 %a0) optsize { +define float @stack_fold_cvtsi2ss(i32 %a0) minsize { ;CHECK-LABEL: stack_fold_cvtsi2ss ;CHECK: cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() @@ -399,7 +399,7 @@ define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) { } declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone -define double @stack_fold_cvtss2sd(float %a0) optsize { +define double @stack_fold_cvtss2sd(float %a0) minsize { ;CHECK-LABEL: stack_fold_cvtss2sd ;CHECK: cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() @@ -637,7 +637,15 @@ define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) { } declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone -; TODO stack_fold_insertps +define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) { + ;CHECK-LABEL: stack_fold_insertps + ;CHECK: insertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload + ;CHECK-NEXT: {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3] + %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() + %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209) + ret <4 x float> %2 +} +declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) { ;CHECK-LABEL: stack_fold_maxpd @@ -886,7 +894,7 @@ declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone define double @stack_fold_roundsd(double %a0) optsize { ;CHECK-LABEL: stack_fold_roundsd - ;CHECK: roundsd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload + ;CHECK: roundsd $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() %2 = call double @llvm.floor.f64(double %a0) ret double %2 @@ -896,9 +904,9 @@ declare double @llvm.floor.f64(double) nounwind readnone ; TODO stack_fold_roundsd_int declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone -define float @stack_fold_roundss(float %a0) optsize { +define float @stack_fold_roundss(float %a0) minsize { ;CHECK-LABEL: stack_fold_roundss - ;CHECK: roundss $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload + ;CHECK: roundss $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() %2 = call float @llvm.floor.f32(float %a0) ret float %2 @@ -968,7 +976,7 @@ declare double @llvm.sqrt.f64(double) nounwind readnone ; TODO stack_fold_sqrtsd_int declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone -define float @stack_fold_sqrtss(float %a0) optsize { +define float @stack_fold_sqrtss(float %a0) minsize { ;CHECK-LABEL: stack_fold_sqrtss ;CHECK: sqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() |