summaryrefslogtreecommitdiff
path: root/lib/Target/README.txt
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/README.txt')
-rw-r--r--lib/Target/README.txt125
1 files changed, 46 insertions, 79 deletions
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index f68cf0e40df0..89ea9d0afc42 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -197,13 +197,6 @@ _bar: addic r3,r3,-1
//===---------------------------------------------------------------------===//
-Legalize should lower ctlz like this:
- ctlz(x) = popcnt((x-1) & ~x)
-
-on targets that have popcnt but not ctlz. itanium, what else?
-
-//===---------------------------------------------------------------------===//
-
quantum_sigma_x in 462.libquantum contains the following loop:
for(i=0; i<reg->size; i++)
@@ -227,7 +220,20 @@ so cool to turn it into something like:
... which would only do one 32-bit XOR per loop iteration instead of two.
It would also be nice to recognize the reg->size doesn't alias reg->node[i], but
-alas...
+alas.
+
+//===---------------------------------------------------------------------===//
+
+This should be optimized to one 'and' and one 'or', from PR4216:
+
+define i32 @test_bitfield(i32 %bf.prev.low) nounwind ssp {
+entry:
+ %bf.prev.lo.cleared10 = or i32 %bf.prev.low, 32962 ; <i32> [#uses=1]
+ %0 = and i32 %bf.prev.low, -65536 ; <i32> [#uses=1]
+ %1 = and i32 %bf.prev.lo.cleared10, 40186 ; <i32> [#uses=1]
+ %2 = or i32 %1, %0 ; <i32> [#uses=1]
+ ret i32 %2
+}
//===---------------------------------------------------------------------===//
@@ -335,11 +341,6 @@ when it is declared U32.
//===---------------------------------------------------------------------===//
-Promote for i32 bswap can use i64 bswap + shr. Useful on targets with 64-bit
-regs and bswap, like itanium.
-
-//===---------------------------------------------------------------------===//
-
LSR should know what GPR types a target has. This code:
volatile short X, Y; // globals
@@ -349,24 +350,22 @@ void foo(int N) {
for (i = 0; i < N; i++) { X = i; Y = i*4; }
}
-produces two identical IV's (after promotion) on PPC/ARM:
+produces two near identical IV's (after promotion) on PPC/ARM:
-LBB1_1: @bb.preheader
- mov r3, #0
- mov r2, r3
- mov r1, r3
-LBB1_2: @bb
- ldr r12, LCPI1_0
- ldr r12, [r12]
- strh r2, [r12]
- ldr r12, LCPI1_1
- ldr r12, [r12]
- strh r3, [r12]
- add r1, r1, #1 <- [0,+,1]
- add r3, r3, #4
- add r2, r2, #1 <- [0,+,1]
- cmp r1, r0
- bne LBB1_2 @bb
+LBB1_2:
+ ldr r3, LCPI1_0
+ ldr r3, [r3]
+ strh r2, [r3]
+ ldr r3, LCPI1_1
+ ldr r3, [r3]
+ strh r1, [r3]
+ add r1, r1, #4
+ add r2, r2, #1 <- [0,+,1]
+ sub r0, r0, #1 <- [0,-,1]
+ cmp r0, #0
+ bne LBB1_2
+
+LSR should reuse the "+" IV for the exit test.
//===---------------------------------------------------------------------===//
@@ -600,25 +599,6 @@ implementations of ceil/floor/rint.
//===---------------------------------------------------------------------===//
-This GCC bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34043
-contains a testcase that compiles down to:
-
- %struct.XMM128 = type { <4 x float> }
-..
- %src = alloca %struct.XMM128
-..
- %tmp6263 = bitcast %struct.XMM128* %src to <2 x i64>*
- %tmp65 = getelementptr %struct.XMM128* %src, i32 0, i32 0
- store <2 x i64> %tmp5899, <2 x i64>* %tmp6263, align 16
- %tmp66 = load <4 x float>* %tmp65, align 16
- %tmp71 = add <4 x float> %tmp66, %tmp66
-
-If the mid-level optimizer turned the bitcast of pointer + store of tmp5899
-into a bitcast of the vector value and a store to the pointer, then the
-store->load could be easily removed.
-
-//===---------------------------------------------------------------------===//
-
Consider:
int test() {
@@ -1123,16 +1103,6 @@ optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
//===---------------------------------------------------------------------===//
-We would like to do the following transform in the instcombiner:
-
- -X/C -> X/-C
-
-However, this isn't valid if (-X) overflows. We can implement this when we
-have the concept of a "C signed subtraction" operator that which is undefined
-on overflow.
-
-//===---------------------------------------------------------------------===//
-
This was noticed in the entryblock for grokdeclarator in 403.gcc:
%tmp = icmp eq i32 %decl_context, 4
@@ -1311,6 +1281,8 @@ http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35287 [LPRE crit edge splitting]
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34677 (licm does this, LPRE crit edge)
llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | opt -mem2reg -simplifycfg -gvn | llvm-dis
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16799 [BITCAST PHI TRANS]
+
//===---------------------------------------------------------------------===//
Type based alias analysis:
@@ -1318,31 +1290,25 @@ http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14705
//===---------------------------------------------------------------------===//
-When GVN/PRE finds a store of float* to a must aliases pointer when expecting
-an int*, it should turn it into a bitcast. This is a nice generalization of
-the SROA hack that would apply to other cases, e.g.:
-
-int foo(int C, int *P, float X) {
- if (C) {
- bar();
- *P = 42;
- } else
- *(float*)P = X;
-
- return *P;
-}
-
-
-One example (that requires crazy phi translation) is:
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16799 [BITCAST PHI TRANS]
-
-//===---------------------------------------------------------------------===//
-
A/B get pinned to the stack because we turn an if/then into a select instead
of PRE'ing the load/store. This may be fixable in instcombine:
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37892
+struct X { int i; };
+int foo (int x) {
+ struct X a;
+ struct X b;
+ struct X *p;
+ a.i = 1;
+ b.i = 2;
+ if (x)
+ p = &a;
+ else
+ p = &b;
+ return p->i;
+}
+//===---------------------------------------------------------------------===//
Interesting missed case because of control flow flattening (should be 2 loads):
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26629
@@ -1675,5 +1641,6 @@ entry:
Instcombine should be able to optimize away the loads (and thus the globals).
+See also PR4973
//===---------------------------------------------------------------------===//