aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/X86/README-SSE.txt
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2010-09-17 15:48:55 +0000
committerDimitry Andric <dim@FreeBSD.org>2010-09-17 15:48:55 +0000
commitd39c594d39df7f283c2fb8a704a3f31c501180d9 (patch)
tree36453626c792cccd91f783a38a169d610a6b9db9 /lib/Target/X86/README-SSE.txt
parent6144c1de6a7674dad94290650e4e14f24d42e421 (diff)
downloadsrc-d39c594d39df7f283c2fb8a704a3f31c501180d9.tar.gz
src-d39c594d39df7f283c2fb8a704a3f31c501180d9.zip
Notes
Diffstat (limited to 'lib/Target/X86/README-SSE.txt')
-rw-r--r--lib/Target/X86/README-SSE.txt42
1 files changed, 40 insertions, 2 deletions
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index b6aba93f3738..f96b22f1e204 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -2,8 +2,46 @@
// Random ideas for the X86 backend: SSE-specific stuff.
//===---------------------------------------------------------------------===//
-- Consider eliminating the unaligned SSE load intrinsics, replacing them with
- unaligned LLVM load instructions.
+//===---------------------------------------------------------------------===//
+
+SSE Variable shift can be custom lowered to something like this, which uses a
+small table + unaligned load + shuffle instead of going through memory.
+
+__m128i_shift_right:
+ .byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ .byte -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+
+...
+__m128i shift_right(__m128i value, unsigned long offset) {
+ return _mm_shuffle_epi8(value,
+ _mm_loadu_si128((__m128 *) (___m128i_shift_right + offset)));
+}
+
+//===---------------------------------------------------------------------===//
+
+SSE has instructions for doing operations on complex numbers, we should pattern
+match them. Compiling this:
+
+_Complex float f32(_Complex float A, _Complex float B) {
+ return A+B;
+}
+
+into:
+
+_f32:
+ movdqa %xmm0, %xmm2
+ addss %xmm1, %xmm2
+ pshufd $16, %xmm2, %xmm2
+ pshufd $1, %xmm1, %xmm1
+ pshufd $1, %xmm0, %xmm0
+ addss %xmm1, %xmm0
+ pshufd $16, %xmm0, %xmm1
+ movdqa %xmm2, %xmm0
+ unpcklps %xmm1, %xmm0
+ ret
+
+seems silly.
+
//===---------------------------------------------------------------------===//