diff options
author | Roman Divacky <rdivacky@FreeBSD.org> | 2009-10-14 17:57:32 +0000 |
---|---|---|
committer | Roman Divacky <rdivacky@FreeBSD.org> | 2009-10-14 17:57:32 +0000 |
commit | 59850d0874429601812bc13408cb1f776649027c (patch) | |
tree | b21f6de4e08b89bb7931806bab798fc2a5e3a686 /lib/Target/README.txt | |
parent | 18f153bdb9db52e7089a2d5293b96c45a3124a26 (diff) |
Notes
Diffstat (limited to 'lib/Target/README.txt')
-rw-r--r-- | lib/Target/README.txt | 125 |
1 files changed, 46 insertions, 79 deletions
diff --git a/lib/Target/README.txt b/lib/Target/README.txt index f68cf0e40df0..89ea9d0afc42 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -197,13 +197,6 @@ _bar: addic r3,r3,-1 //===---------------------------------------------------------------------===// -Legalize should lower ctlz like this: - ctlz(x) = popcnt((x-1) & ~x) - -on targets that have popcnt but not ctlz. itanium, what else? - -//===---------------------------------------------------------------------===// - quantum_sigma_x in 462.libquantum contains the following loop: for(i=0; i<reg->size; i++) @@ -227,7 +220,20 @@ so cool to turn it into something like: ... which would only do one 32-bit XOR per loop iteration instead of two. It would also be nice to recognize the reg->size doesn't alias reg->node[i], but -alas... +alas. + +//===---------------------------------------------------------------------===// + +This should be optimized to one 'and' and one 'or', from PR4216: + +define i32 @test_bitfield(i32 %bf.prev.low) nounwind ssp { +entry: + %bf.prev.lo.cleared10 = or i32 %bf.prev.low, 32962 ; <i32> [#uses=1] + %0 = and i32 %bf.prev.low, -65536 ; <i32> [#uses=1] + %1 = and i32 %bf.prev.lo.cleared10, 40186 ; <i32> [#uses=1] + %2 = or i32 %1, %0 ; <i32> [#uses=1] + ret i32 %2 +} //===---------------------------------------------------------------------===// @@ -335,11 +341,6 @@ when it is declared U32. //===---------------------------------------------------------------------===// -Promote for i32 bswap can use i64 bswap + shr. Useful on targets with 64-bit -regs and bswap, like itanium. - -//===---------------------------------------------------------------------===// - LSR should know what GPR types a target has. This code: volatile short X, Y; // globals @@ -349,24 +350,22 @@ void foo(int N) { for (i = 0; i < N; i++) { X = i; Y = i*4; } } -produces two identical IV's (after promotion) on PPC/ARM: +produces two near identical IV's (after promotion) on PPC/ARM: -LBB1_1: @bb.preheader - mov r3, #0 - mov r2, r3 - mov r1, r3 -LBB1_2: @bb - ldr r12, LCPI1_0 - ldr r12, [r12] - strh r2, [r12] - ldr r12, LCPI1_1 - ldr r12, [r12] - strh r3, [r12] - add r1, r1, #1 <- [0,+,1] - add r3, r3, #4 - add r2, r2, #1 <- [0,+,1] - cmp r1, r0 - bne LBB1_2 @bb +LBB1_2: + ldr r3, LCPI1_0 + ldr r3, [r3] + strh r2, [r3] + ldr r3, LCPI1_1 + ldr r3, [r3] + strh r1, [r3] + add r1, r1, #4 + add r2, r2, #1 <- [0,+,1] + sub r0, r0, #1 <- [0,-,1] + cmp r0, #0 + bne LBB1_2 + +LSR should reuse the "+" IV for the exit test. //===---------------------------------------------------------------------===// @@ -600,25 +599,6 @@ implementations of ceil/floor/rint. //===---------------------------------------------------------------------===// -This GCC bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34043 -contains a testcase that compiles down to: - - %struct.XMM128 = type { <4 x float> } -.. - %src = alloca %struct.XMM128 -.. - %tmp6263 = bitcast %struct.XMM128* %src to <2 x i64>* - %tmp65 = getelementptr %struct.XMM128* %src, i32 0, i32 0 - store <2 x i64> %tmp5899, <2 x i64>* %tmp6263, align 16 - %tmp66 = load <4 x float>* %tmp65, align 16 - %tmp71 = add <4 x float> %tmp66, %tmp66 - -If the mid-level optimizer turned the bitcast of pointer + store of tmp5899 -into a bitcast of the vector value and a store to the pointer, then the -store->load could be easily removed. - -//===---------------------------------------------------------------------===// - Consider: int test() { @@ -1123,16 +1103,6 @@ optimized with "clang -emit-llvm-bc | opt -std-compile-opts". //===---------------------------------------------------------------------===// -We would like to do the following transform in the instcombiner: - - -X/C -> X/-C - -However, this isn't valid if (-X) overflows. We can implement this when we -have the concept of a "C signed subtraction" operator that which is undefined -on overflow. - -//===---------------------------------------------------------------------===// - This was noticed in the entryblock for grokdeclarator in 403.gcc: %tmp = icmp eq i32 %decl_context, 4 @@ -1311,6 +1281,8 @@ http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35287 [LPRE crit edge splitting] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34677 (licm does this, LPRE crit edge) llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | opt -mem2reg -simplifycfg -gvn | llvm-dis +http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16799 [BITCAST PHI TRANS] + //===---------------------------------------------------------------------===// Type based alias analysis: @@ -1318,31 +1290,25 @@ http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14705 //===---------------------------------------------------------------------===// -When GVN/PRE finds a store of float* to a must aliases pointer when expecting -an int*, it should turn it into a bitcast. This is a nice generalization of -the SROA hack that would apply to other cases, e.g.: - -int foo(int C, int *P, float X) { - if (C) { - bar(); - *P = 42; - } else - *(float*)P = X; - - return *P; -} - - -One example (that requires crazy phi translation) is: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16799 [BITCAST PHI TRANS] - -//===---------------------------------------------------------------------===// - A/B get pinned to the stack because we turn an if/then into a select instead of PRE'ing the load/store. This may be fixable in instcombine: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37892 +struct X { int i; }; +int foo (int x) { + struct X a; + struct X b; + struct X *p; + a.i = 1; + b.i = 2; + if (x) + p = &a; + else + p = &b; + return p->i; +} +//===---------------------------------------------------------------------===// Interesting missed case because of control flow flattening (should be 2 loads): http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26629 @@ -1675,5 +1641,6 @@ entry: Instcombine should be able to optimize away the loads (and thus the globals). +See also PR4973 //===---------------------------------------------------------------------===// |