src - FreeBSD source tree

diff options


context:
space:
mode:

author	Roman Divacky <rdivacky@FreeBSD.org>	2009-10-14 17:57:32 +0000
committer	Roman Divacky <rdivacky@FreeBSD.org>	2009-10-14 17:57:32 +0000
commit	59850d0874429601812bc13408cb1f776649027c (patch)
tree	b21f6de4e08b89bb7931806bab798fc2a5e3a686 /lib/Target/README.txt
parent	18f153bdb9db52e7089a2d5293b96c45a3124a26 (diff)

vendor/llvm/llvm-r84119

Notes

Diffstat (limited to 'lib/Target/README.txt')

-rw-r--r--

lib/Target/README.txt

125

1 files changed, 46 insertions, 79 deletions

diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index f68cf0e40df0..89ea9d0afc42 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt

@@ -197,13 +197,6 @@ _bar: addic r3,r3,-1

//===---------------------------------------------------------------------===//

-Legalize should lower ctlz like this:

- ctlz(x) = popcnt((x-1) & ~x)

-on targets that have popcnt but not ctlz. itanium, what else?

-//===---------------------------------------------------------------------===//

quantum_sigma_x in 462.libquantum contains the following loop:

for(i=0; i<reg->size; i++)

@@ -227,7 +220,20 @@ so cool to turn it into something like:

... which would only do one 32-bit XOR per loop iteration instead of two.

It would also be nice to recognize the reg->size doesn't alias reg->node[i], but

-alas...

+alas.

+//===---------------------------------------------------------------------===//

+This should be optimized to one 'and' and one 'or', from PR4216:

+define i32 @test_bitfield(i32 %bf.prev.low) nounwind ssp {

+entry:

+ %bf.prev.lo.cleared10 = or i32 %bf.prev.low, 32962 ; <i32> [#uses=1]

+ %0 = and i32 %bf.prev.low, -65536 ; <i32> [#uses=1]

+ %1 = and i32 %bf.prev.lo.cleared10, 40186 ; <i32> [#uses=1]

+ %2 = or i32 %1, %0 ; <i32> [#uses=1]

+ ret i32 %2

//===---------------------------------------------------------------------===//

@@ -335,11 +341,6 @@ when it is declared U32.

//===---------------------------------------------------------------------===//

-Promote for i32 bswap can use i64 bswap + shr. Useful on targets with 64-bit

-regs and bswap, like itanium.

-//===---------------------------------------------------------------------===//

LSR should know what GPR types a target has. This code:

volatile short X, Y; // globals

@@ -349,24 +350,22 @@ void foo(int N) {

for (i = 0; i < N; i++) { X = i; Y = i*4; }

}

-produces two identical IV's (after promotion) on PPC/ARM:

+produces two near identical IV's (after promotion) on PPC/ARM:

-LBB1_1: @bb.preheader

- mov r3, #0

- mov r2, r3

- mov r1, r3

-LBB1_2: @bb

- ldr r12, LCPI1_0

- ldr r12, [r12]

- strh r2, [r12]

- ldr r12, LCPI1_1

- ldr r12, [r12]

- strh r3, [r12]

- add r1, r1, #1 <- [0,+,1]

- add r3, r3, #4

- add r2, r2, #1 <- [0,+,1]

- cmp r1, r0

- bne LBB1_2 @bb

+LBB1_2:

+ ldr r3, LCPI1_0

+ ldr r3, [r3]

+ strh r2, [r3]

+ ldr r3, LCPI1_1

+ ldr r3, [r3]

+ strh r1, [r3]

+ add r1, r1, #4

+ add r2, r2, #1 <- [0,+,1]

+ sub r0, r0, #1 <- [0,-,1]

+ cmp r0, #0

+ bne LBB1_2

+LSR should reuse the "+" IV for the exit test.

//===---------------------------------------------------------------------===//

@@ -600,25 +599,6 @@ implementations of ceil/floor/rint.

//===---------------------------------------------------------------------===//

-This GCC bug: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34043

-contains a testcase that compiles down to:

- %struct.XMM128 = type { <4 x float> }

-..

- %src = alloca %struct.XMM128

-..

- %tmp6263 = bitcast %struct.XMM128* %src to <2 x i64>*

- %tmp65 = getelementptr %struct.XMM128* %src, i32 0, i32 0

- store <2 x i64> %tmp5899, <2 x i64>* %tmp6263, align 16

- %tmp66 = load <4 x float>* %tmp65, align 16

- %tmp71 = add <4 x float> %tmp66, %tmp66

-If the mid-level optimizer turned the bitcast of pointer + store of tmp5899

-into a bitcast of the vector value and a store to the pointer, then the

-store->load could be easily removed.

-//===---------------------------------------------------------------------===//

Consider:

int test() {

@@ -1123,16 +1103,6 @@ optimized with "clang -emit-llvm-bc | opt -std-compile-opts".

//===---------------------------------------------------------------------===//

-We would like to do the following transform in the instcombiner:

- -X/C -> X/-C

-However, this isn't valid if (-X) overflows. We can implement this when we

-have the concept of a "C signed subtraction" operator that which is undefined

-on overflow.

-//===---------------------------------------------------------------------===//

This was noticed in the entryblock for grokdeclarator in 403.gcc:

%tmp = icmp eq i32 %decl_context, 4

@@ -1311,6 +1281,8 @@ http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35287 [LPRE crit edge splitting]

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34677 (licm does this, LPRE crit edge)

llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | opt -mem2reg -simplifycfg -gvn | llvm-dis

+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16799 [BITCAST PHI TRANS]

//===---------------------------------------------------------------------===//

Type based alias analysis:

@@ -1318,31 +1290,25 @@ http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14705

//===---------------------------------------------------------------------===//

-When GVN/PRE finds a store of float* to a must aliases pointer when expecting

-an int*, it should turn it into a bitcast. This is a nice generalization of

-the SROA hack that would apply to other cases, e.g.:

-int foo(int C, int *P, float X) {

- if (C) {

- bar();

- *P = 42;

- } else

- *(float*)P = X;

- return *P;

-One example (that requires crazy phi translation) is:

-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16799 [BITCAST PHI TRANS]

-//===---------------------------------------------------------------------===//

A/B get pinned to the stack because we turn an if/then into a select instead

of PRE'ing the load/store. This may be fixable in instcombine:

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37892

+struct X { int i; };

+int foo (int x) {

+ struct X a;

+ struct X b;

+ struct X *p;

+ a.i = 1;

+ b.i = 2;

+ if (x)

+ p = &a;

+ else

+ p = &b;

+ return p->i;

+//===---------------------------------------------------------------------===//

Interesting missed case because of control flow flattening (should be 2 loads):

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26629

@@ -1675,5 +1641,6 @@ entry:

Instcombine should be able to optimize away the loads (and thus the globals).

+See also PR4973

//===---------------------------------------------------------------------===//