diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2010-09-17 15:48:55 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2010-09-17 15:48:55 +0000 |
commit | d39c594d39df7f283c2fb8a704a3f31c501180d9 (patch) | |
tree | 36453626c792cccd91f783a38a169d610a6b9db9 /lib/Target/X86/README-SSE.txt | |
parent | 6144c1de6a7674dad94290650e4e14f24d42e421 (diff) | |
download | src-d39c594d39df7f283c2fb8a704a3f31c501180d9.tar.gz src-d39c594d39df7f283c2fb8a704a3f31c501180d9.zip |
Notes
Diffstat (limited to 'lib/Target/X86/README-SSE.txt')
-rw-r--r-- | lib/Target/X86/README-SSE.txt | 42 |
1 files changed, 40 insertions, 2 deletions
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index b6aba93f3738..f96b22f1e204 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -2,8 +2,46 @@ // Random ideas for the X86 backend: SSE-specific stuff. //===---------------------------------------------------------------------===// -- Consider eliminating the unaligned SSE load intrinsics, replacing them with - unaligned LLVM load instructions. +//===---------------------------------------------------------------------===// + +SSE Variable shift can be custom lowered to something like this, which uses a +small table + unaligned load + shuffle instead of going through memory. + +__m128i_shift_right: + .byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + .byte -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 + +... +__m128i shift_right(__m128i value, unsigned long offset) { + return _mm_shuffle_epi8(value, + _mm_loadu_si128((__m128 *) (___m128i_shift_right + offset))); +} + +//===---------------------------------------------------------------------===// + +SSE has instructions for doing operations on complex numbers, we should pattern +match them. Compiling this: + +_Complex float f32(_Complex float A, _Complex float B) { + return A+B; +} + +into: + +_f32: + movdqa %xmm0, %xmm2 + addss %xmm1, %xmm2 + pshufd $16, %xmm2, %xmm2 + pshufd $1, %xmm1, %xmm1 + pshufd $1, %xmm0, %xmm0 + addss %xmm1, %xmm0 + pshufd $16, %xmm0, %xmm1 + movdqa %xmm2, %xmm0 + unpcklps %xmm1, %xmm0 + ret + +seems silly. + //===---------------------------------------------------------------------===// |