Remove upstream files and directories from vendor/llvm/dist that we do - src

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2019-08-20 17:58:59 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2019-08-20 17:58:59 +0000
commit	1a56a5ead7a2e84bee8240f5f6b033b5f1707154 (patch)
tree	2f526c9cfcb089e51c33d6e1f0d51b10bda34714 /lib/Target/PowerPC
parent	d8e91e46262bc44006913e6796843909f1ac7bcd (diff)

Notes

Diffstat (limited to 'lib/Target/PowerPC')

-rw-r--r--

lib/Target/PowerPC/AsmParser/CMakeLists.txt

-rw-r--r--

lib/Target/PowerPC/AsmParser/LLVMBuild.txt

-rw-r--r--

lib/Target/PowerPC/CMakeLists.txt

-rw-r--r--

lib/Target/PowerPC/Disassembler/CMakeLists.txt

-rw-r--r--

lib/Target/PowerPC/Disassembler/LLVMBuild.txt

-rw-r--r--

lib/Target/PowerPC/InstPrinter/CMakeLists.txt

-rw-r--r--

lib/Target/PowerPC/InstPrinter/LLVMBuild.txt

-rw-r--r--

lib/Target/PowerPC/LLVMBuild.txt

-rw-r--r--

lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt

-rw-r--r--

lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt

-rw-r--r--

lib/Target/PowerPC/README.txt

665

-rw-r--r--

lib/Target/PowerPC/README_ALTIVEC.txt

343

-rw-r--r--

lib/Target/PowerPC/TargetInfo/CMakeLists.txt

-rw-r--r--

lib/Target/PowerPC/TargetInfo/LLVMBuild.txt

14 files changed, 0 insertions, 1235 deletions

diff --git a/lib/Target/PowerPC/AsmParser/CMakeLists.txt b/lib/Target/PowerPC/AsmParser/CMakeLists.txt
deleted file mode 100644
index 408858e424d5..000000000000
--- a/lib/Target/PowerPC/AsmParser/CMakeLists.txt
+++ /dev/null

@@ -1,3 +0,0 @@

-add_llvm_library(LLVMPowerPCAsmParser

- PPCAsmParser.cpp

- )

diff --git a/lib/Target/PowerPC/AsmParser/LLVMBuild.txt b/lib/Target/PowerPC/AsmParser/LLVMBuild.txt
deleted file mode 100644
index 801f27bb7bc3..000000000000
--- a/lib/Target/PowerPC/AsmParser/LLVMBuild.txt
+++ /dev/null

@@ -1,23 +0,0 @@

-;===- ./lib/Target/PowerPC/AsmParser/LLVMBuild.txt -------------*- Conf -*--===;

-; The LLVM Compiler Infrastructure

-; This file is distributed under the University of Illinois Open Source

-; License. See LICENSE.TXT for details.

-;===------------------------------------------------------------------------===;

-; This is an LLVMBuild description file for the components in this subdirectory.

-; For more information on the LLVMBuild system, please see:

-; http://llvm.org/docs/LLVMBuild.html

-;===------------------------------------------------------------------------===;

-[component_0]

-type = Library

-name = PowerPCAsmParser

-parent = PowerPC

-required_libraries = MC MCParser PowerPCDesc PowerPCInfo Support

-add_to_library_groups = PowerPC

diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
deleted file mode 100644
index 3130d10fa5ed..000000000000
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ /dev/null

@@ -1,55 +0,0 @@

-set(LLVM_TARGET_DEFINITIONS PPC.td)

-tablegen(LLVM PPCGenAsmMatcher.inc -gen-asm-matcher)

-tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer)

-tablegen(LLVM PPCGenCallingConv.inc -gen-callingconv)

-tablegen(LLVM PPCGenDAGISel.inc -gen-dag-isel)

-tablegen(LLVM PPCGenDisassemblerTables.inc -gen-disassembler)

-tablegen(LLVM PPCGenFastISel.inc -gen-fast-isel)

-tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info)

-tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter)

-tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info)

-tablegen(LLVM PPCGenSubtargetInfo.inc -gen-subtarget)

-tablegen(LLVM PPCGenExegesis.inc -gen-exegesis)

-add_public_tablegen_target(PowerPCCommonTableGen)

-add_llvm_target(PowerPCCodeGen

- PPCBoolRetToInt.cpp

- PPCAsmPrinter.cpp

- PPCBranchSelector.cpp

- PPCBranchCoalescing.cpp

- PPCCCState.cpp

- PPCCTRLoops.cpp

- PPCHazardRecognizers.cpp

- PPCInstrInfo.cpp

- PPCISelDAGToDAG.cpp

- PPCISelLowering.cpp

- PPCEarlyReturn.cpp

- PPCFastISel.cpp

- PPCFrameLowering.cpp

- PPCLoopPreIncPrep.cpp

- PPCMCInstLower.cpp

- PPCMachineFunctionInfo.cpp

- PPCMIPeephole.cpp

- PPCRegisterInfo.cpp

- PPCQPXLoadSplat.cpp

- PPCSubtarget.cpp

- PPCTargetMachine.cpp

- PPCTargetObjectFile.cpp

- PPCTargetTransformInfo.cpp

- PPCTOCRegDeps.cpp

- PPCTLSDynamicCall.cpp

- PPCVSXCopy.cpp

- PPCReduceCRLogicals.cpp

- PPCVSXFMAMutate.cpp

- PPCVSXSwapRemoval.cpp

- PPCExpandISEL.cpp

- PPCPreEmitPeephole.cpp

- )

-add_subdirectory(AsmParser)

-add_subdirectory(Disassembler)

-add_subdirectory(InstPrinter)

-add_subdirectory(MCTargetDesc)

-add_subdirectory(TargetInfo)

diff --git a/lib/Target/PowerPC/Disassembler/CMakeLists.txt b/lib/Target/PowerPC/Disassembler/CMakeLists.txt
deleted file mode 100644
index ca457df88d3e..000000000000
--- a/lib/Target/PowerPC/Disassembler/CMakeLists.txt
+++ /dev/null

@@ -1,3 +0,0 @@

-add_llvm_library(LLVMPowerPCDisassembler

- PPCDisassembler.cpp

- )

diff --git a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
deleted file mode 100644
index ea3e7eaf839d..000000000000
--- a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
+++ /dev/null

@@ -1,23 +0,0 @@

-;===-- ./lib/Target/PowerPC/Disassembler/LLVMBuild.txt ---------*- Conf -*--===;

-; The LLVM Compiler Infrastructure

-; This file is distributed under the University of Illinois Open Source

-; License. See LICENSE.TXT for details.

-;===------------------------------------------------------------------------===;

-; This is an LLVMBuild description file for the components in this subdirectory.

-; For more information on the LLVMBuild system, please see:

-; http://llvm.org/docs/LLVMBuild.html

-;===------------------------------------------------------------------------===;

-[component_0]

-type = Library

-name = PowerPCDisassembler

-parent = PowerPC

-required_libraries = MCDisassembler PowerPCInfo Support

-add_to_library_groups = PowerPC

diff --git a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
deleted file mode 100644
index ab30a110f40e..000000000000
--- a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
+++ /dev/null

@@ -1,3 +0,0 @@

-add_llvm_library(LLVMPowerPCAsmPrinter

- PPCInstPrinter.cpp

- )

diff --git a/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt b/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
deleted file mode 100644
index 7c691deafccf..000000000000
--- a/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
+++ /dev/null

@@ -1,23 +0,0 @@

-;===- ./lib/Target/PowerPC/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;

-; The LLVM Compiler Infrastructure

-; This file is distributed under the University of Illinois Open Source

-; License. See LICENSE.TXT for details.

-;===------------------------------------------------------------------------===;

-; This is an LLVMBuild description file for the components in this subdirectory.

-; For more information on the LLVMBuild system, please see:

-; http://llvm.org/docs/LLVMBuild.html

-;===------------------------------------------------------------------------===;

-[component_0]

-type = Library

-name = PowerPCAsmPrinter

-parent = PowerPC

-required_libraries = MC Support

-add_to_library_groups = PowerPC

diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt
deleted file mode 100644
index fd5fa560912f..000000000000
--- a/lib/Target/PowerPC/LLVMBuild.txt
+++ /dev/null

@@ -1,35 +0,0 @@

-;===- ./lib/Target/PowerPC/LLVMBuild.txt -----------------------*- Conf -*--===;

-; The LLVM Compiler Infrastructure

-; This file is distributed under the University of Illinois Open Source

-; License. See LICENSE.TXT for details.

-;===------------------------------------------------------------------------===;

-; This is an LLVMBuild description file for the components in this subdirectory.

-; For more information on the LLVMBuild system, please see:

-; http://llvm.org/docs/LLVMBuild.html

-;===------------------------------------------------------------------------===;

-[common]

-subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo

-[component_0]

-type = TargetGroup

-name = PowerPC

-parent = Target

-has_asmparser = 1

-has_asmprinter = 1

-has_disassembler = 1

-has_jit = 1

-[component_1]

-type = Library

-name = PowerPCCodeGen

-parent = PowerPC

-required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo Scalar SelectionDAG Support Target TransformUtils

-add_to_library_groups = PowerPC

diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index 3cea65ee4de6..000000000000
--- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
+++ /dev/null

@@ -1,10 +0,0 @@

-add_llvm_library(LLVMPowerPCDesc

- PPCAsmBackend.cpp

- PPCMCTargetDesc.cpp

- PPCMCAsmInfo.cpp

- PPCMCCodeEmitter.cpp

- PPCMCExpr.cpp

- PPCPredicates.cpp

- PPCMachObjectWriter.cpp

- PPCELFObjectWriter.cpp

- )

diff --git a/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt b/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
deleted file mode 100644
index d3a567d1581d..000000000000
--- a/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
+++ /dev/null

@@ -1,23 +0,0 @@

-;===- ./lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;

-; The LLVM Compiler Infrastructure

-; This file is distributed under the University of Illinois Open Source

-; License. See LICENSE.TXT for details.

-;===------------------------------------------------------------------------===;

-; This is an LLVMBuild description file for the components in this subdirectory.

-; For more information on the LLVMBuild system, please see:

-; http://llvm.org/docs/LLVMBuild.html

-;===------------------------------------------------------------------------===;

-[component_0]

-type = Library

-name = PowerPCDesc

-parent = PowerPC

-required_libraries = MC PowerPCAsmPrinter PowerPCInfo Support

-add_to_library_groups = PowerPC

diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
deleted file mode 100644
index b4bf635dc2c7..000000000000
--- a/lib/Target/PowerPC/README.txt
+++ /dev/null

@@ -1,665 +0,0 @@

-//===- README.txt - Notes for improving PowerPC-specific code gen ---------===//

-TODO:

-* lmw/stmw pass a la arm load store optimizer for prolog/epilog

-===-------------------------------------------------------------------------===

-This code:

-unsigned add32carry(unsigned sum, unsigned x) {

- unsigned z = sum + x;

- if (sum + x < x)

- z++;

- return z;

-Should compile to something like:

- addc r3,r3,r4

- addze r3,r3

-instead we get:

- add r3, r4, r3

- cmplw cr7, r3, r4

- mfcr r4 ; 1

- rlwinm r4, r4, 29, 31, 31

- add r3, r3, r4

-Ick.

-===-------------------------------------------------------------------------===

-We compile the hottest inner loop of viterbi to:

- li r6, 0

- b LBB1_84 ;bb432.i

-LBB1_83: ;bb420.i

- lbzx r8, r5, r7

- addi r6, r7, 1

- stbx r8, r4, r7

-LBB1_84: ;bb432.i

- mr r7, r6

- cmplwi cr0, r7, 143

- bne cr0, LBB1_83 ;bb420.i

-The CBE manages to produce:

- li r0, 143

- mtctr r0

-loop:

- lbzx r2, r2, r11

- stbx r0, r2, r9

- addi r2, r2, 1

- bdz later

- b loop

-This could be much better (bdnz instead of bdz) but it still beats us. If we

-produced this with bdnz, the loop would be a single dispatch group.

-===-------------------------------------------------------------------------===

-Lump the constant pool for each function into ONE pic object, and reference

-pieces of it as offsets from the start. For functions like this (contrived

-to have lots of constants obviously):

-double X(double Y) { return (Y*1.23 + 4.512)*2.34 + 14.38; }

-We generate:

-_X:

- lis r2, ha16(.CPI_X_0)

- lfd f0, lo16(.CPI_X_0)(r2)

- lis r2, ha16(.CPI_X_1)

- lfd f2, lo16(.CPI_X_1)(r2)

- fmadd f0, f1, f0, f2

- lis r2, ha16(.CPI_X_2)

- lfd f1, lo16(.CPI_X_2)(r2)

- lis r2, ha16(.CPI_X_3)

- lfd f2, lo16(.CPI_X_3)(r2)

- fmadd f1, f0, f1, f2

- blr

-It would be better to materialize .CPI_X into a register, then use immediates

-off of the register to avoid the lis's. This is even more important in PIC

-mode.

-Note that this (and the static variable version) is discussed here for GCC:

-http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html

-Here's another example (the sgn function):

-double testf(double a) {

- return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0);

-it produces a BB like this:

-LBB1_1: ; cond_true

- lis r2, ha16(LCPI1_0)

- lfs f0, lo16(LCPI1_0)(r2)

- lis r2, ha16(LCPI1_1)

- lis r3, ha16(LCPI1_2)

- lfs f2, lo16(LCPI1_2)(r3)

- lfs f3, lo16(LCPI1_1)(r2)

- fsub f0, f0, f1

- fsel f1, f0, f2, f3

- blr

-===-------------------------------------------------------------------------===

-PIC Code Gen IPO optimization:

-Squish small scalar globals together into a single global struct, allowing the

-address of the struct to be CSE'd, avoiding PIC accesses (also reduces the size

-of the GOT on targets with one).

-Note that this is discussed here for GCC:

-http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html

-===-------------------------------------------------------------------------===

-Darwin Stub removal:

-We still generate calls to foo$stub, and stubs, on Darwin. This is not

-necessary when building with the Leopard (10.5) or later linker, as stubs are

-generated by ld when necessary. Parameterizing this based on the deployment

-target (-mmacosx-version-min) is probably enough. x86-32 does this right, see

-its logic.

-===-------------------------------------------------------------------------===

-Darwin Stub LICM optimization:

-Loops like this:

- for (...) bar();

-Have to go through an indirect stub if bar is external or linkonce. It would

-be better to compile it as:

- fp = &bar;

- for (...) fp();

-which only computes the address of bar once (instead of each time through the

-stub). This is Darwin specific and would have to be done in the code generator.

-Probably not a win on x86.

-===-------------------------------------------------------------------------===

-Simple IPO for argument passing, change:

- void foo(int X, double Y, int Z) -> void foo(int X, int Z, double Y)

-the Darwin ABI specifies that any integer arguments in the first 32 bytes worth

-of arguments get assigned to r3 through r10. That is, if you have a function

-foo(int, double, int) you get r3, f1, r6, since the 64 bit double ate up the

-argument bytes for r4 and r5. The trick then would be to shuffle the argument

-order for functions we can internalize so that the maximum number of

-integers/pointers get passed in regs before you see any of the fp arguments.

-Instead of implementing this, it would actually probably be easier to just

-implement a PPC fastcc, where we could do whatever we wanted to the CC,

-including having this work sanely.

-===-------------------------------------------------------------------------===

-Fix Darwin FP-In-Integer Registers ABI

-Darwin passes doubles in structures in integer registers, which is very very

-bad. Add something like a BITCAST to LLVM, then do an i-p transformation that

-percolates these things out of functions.

-Check out how horrible this is:

-http://gcc.gnu.org/ml/gcc/2005-10/msg01036.html

-This is an extension of "interprocedural CC unmunging" that can't be done with

-just fastcc.

-===-------------------------------------------------------------------------===

-Fold add and sub with constant into non-extern, non-weak addresses so this:

-static int a;

-void bar(int b) { a = b; }

-void foo(unsigned char *c) {

- *c = a;

-So that

-_foo:

- lis r2, ha16(_a)

- la r2, lo16(_a)(r2)

- lbz r2, 3(r2)

- stb r2, 0(r3)

- blr

-Becomes

-_foo:

- lis r2, ha16(_a+3)

- lbz r2, lo16(_a+3)(r2)

- stb r2, 0(r3)

- blr

-===-------------------------------------------------------------------------===

-We should compile these two functions to the same thing:

-#include <stdlib.h>

-void f(int a, int b, int *P) {

- *P = (a-b)>=0?(a-b):(b-a);

-void g(int a, int b, int *P) {

- *P = abs(a-b);

-Further, they should compile to something better than:

-_g:

- subf r2, r4, r3

- subfic r3, r2, 0

- cmpwi cr0, r2, -1

- bgt cr0, LBB2_2 ; entry

-LBB2_1: ; entry

- mr r2, r3

-LBB2_2: ; entry

- stw r2, 0(r5)

- blr

-GCC produces:

-_g:

- subf r4,r4,r3

- srawi r2,r4,31

- xor r0,r2,r4

- subf r0,r2,r0

- stw r0,0(r5)

- blr

-... which is much nicer.

-This theoretically may help improve twolf slightly (used in dimbox.c:142?).

-===-------------------------------------------------------------------------===

-PR5945: This:

-define i32 @clamp0g(i32 %a) {

-entry:

- %cmp = icmp slt i32 %a, 0

- %sel = select i1 %cmp, i32 0, i32 %a

- ret i32 %sel

-Is compile to this with the PowerPC (32-bit) backend:

-_clamp0g:

- cmpwi cr0, r3, 0

- li r2, 0

- blt cr0, LBB1_2

-; %bb.1: ; %entry

- mr r2, r3

-LBB1_2: ; %entry

- mr r3, r2

- blr

-This could be reduced to the much simpler:

-_clamp0g:

- srawi r2, r3, 31

- andc r3, r3, r2

- blr

-===-------------------------------------------------------------------------===

-int foo(int N, int ***W, int **TK, int X) {

- int t, i;

- for (t = 0; t < N; ++t)

- for (i = 0; i < 4; ++i)

- W[t / X][i][t % X] = TK[i][t];

- return 5;

-We generate relatively atrocious code for this loop compared to gcc.

-We could also strength reduce the rem and the div:

-http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf

-===-------------------------------------------------------------------------===

-We generate ugly code for this:

-void func(unsigned int *ret, float dx, float dy, float dz, float dw) {

- unsigned code = 0;

- if(dx < -dw) code |= 1;

- if(dx > dw) code |= 2;

- if(dy < -dw) code |= 4;

- if(dy > dw) code |= 8;

- if(dz < -dw) code |= 16;

- if(dz > dw) code |= 32;

- *ret = code;

-===-------------------------------------------------------------------------===

-%struct.B = type { i8, [3 x i8] }

-define void @bar(%struct.B* %b) {

-entry:

- %tmp = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1]

- %tmp = load i32* %tmp ; <uint> [#uses=1]

- %tmp3 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1]

- %tmp4 = load i32* %tmp3 ; <uint> [#uses=1]

- %tmp8 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=2]

- %tmp9 = load i32* %tmp8 ; <uint> [#uses=1]

- %tmp4.mask17 = shl i32 %tmp4, i8 1 ; <uint> [#uses=1]

- %tmp1415 = and i32 %tmp4.mask17, 2147483648 ; <uint> [#uses=1]

- %tmp.masked = and i32 %tmp, 2147483648 ; <uint> [#uses=1]

- %tmp11 = or i32 %tmp1415, %tmp.masked ; <uint> [#uses=1]

- %tmp12 = and i32 %tmp9, 2147483647 ; <uint> [#uses=1]

- %tmp13 = or i32 %tmp12, %tmp11 ; <uint> [#uses=1]

- store i32 %tmp13, i32* %tmp8

- ret void

-We emit:

-_foo:

- lwz r2, 0(r3)

- slwi r4, r2, 1

- or r4, r4, r2

- rlwimi r2, r4, 0, 0, 0

- stw r2, 0(r3)

- blr

-We could collapse a bunch of those ORs and ANDs and generate the following

-equivalent code:

-_foo:

- lwz r2, 0(r3)

- rlwinm r4, r2, 1, 0, 0

- or r2, r2, r4

- stw r2, 0(r3)

- blr

-===-------------------------------------------------------------------------===

-Consider a function like this:

-float foo(float X) { return X + 1234.4123f; }

-The FP constant ends up in the constant pool, so we need to get the LR register.

- This ends up producing code like this:

-_foo:

-.LBB_foo_0: ; entry

- mflr r11

-*** stw r11, 8(r1)

- bl "L00000$pb"

-"L00000$pb":

- mflr r2

- addis r2, r2, ha16(.CPI_foo_0-"L00000$pb")

- lfs f0, lo16(.CPI_foo_0-"L00000$pb")(r2)

- fadds f1, f1, f0

-*** lwz r11, 8(r1)

- mtlr r11

- blr

-This is functional, but there is no reason to spill the LR register all the way

-to the stack (the two marked instrs): spilling it to a GPR is quite enough.

-Implementing this will require some codegen improvements. Nate writes:

-"So basically what we need to support the "no stack frame save and restore" is a

-generalization of the LR optimization to "callee-save regs".

-Currently, we have LR marked as a callee-save reg. The register allocator sees

-that it's callee save, and spills it directly to the stack.

-Ideally, something like this would happen:

-LR would be in a separate register class from the GPRs. The class of LR would be

-marked "unspillable". When the register allocator came across an unspillable

-reg, it would ask "what is the best class to copy this into that I *can* spill"

-If it gets a class back, which it will in this case (the gprs), it grabs a free

-register of that class. If it is then later necessary to spill that reg, so be

-it.

-===-------------------------------------------------------------------------===

-We compile this:

-int test(_Bool X) {

- return X ? 524288 : 0;

-to:

-_test:

- cmplwi cr0, r3, 0

- lis r2, 8

- li r3, 0

- beq cr0, LBB1_2 ;entry

-LBB1_1: ;entry

- mr r3, r2

-LBB1_2: ;entry

- blr

-instead of:

-_test:

- addic r2,r3,-1

- subfe r0,r2,r3

- slwi r3,r0,19

- blr

-This sort of thing occurs a lot due to globalopt.

-===-------------------------------------------------------------------------===

-We compile:

-define i32 @bar(i32 %x) nounwind readnone ssp {

-entry:

- %0 = icmp eq i32 %x, 0 ; <i1> [#uses=1]

- %neg = sext i1 %0 to i32 ; <i32> [#uses=1]

- ret i32 %neg

-to:

-_bar:

- cntlzw r2, r3

- slwi r2, r2, 26

- srawi r3, r2, 31

- blr

-it would be better to produce:

-_bar:

- addic r3,r3,-1

- subfe r3,r3,r3

- blr

-===-------------------------------------------------------------------------===

-We generate horrible ppc code for this:

-#define N 2000000

-double a[N],c[N];

-void simpleloop() {

- int j;

- for (j=0; j<N; j++)

- c[j] = a[j];

-LBB1_1: ;bb

- lfdx f0, r3, r4

- addi r5, r5, 1 ;; Extra IV for the exit value compare.

- stfdx f0, r2, r4

- addi r4, r4, 8

- xoris r6, r5, 30 ;; This is due to a large immediate.

- cmplwi cr0, r6, 33920

- bne cr0, LBB1_1

-//===---------------------------------------------------------------------===//

-This:

- #include <algorithm>

- inline std::pair<unsigned, bool> full_add(unsigned a, unsigned b)

- { return std::make_pair(a + b, a + b < a); }

- bool no_overflow(unsigned a, unsigned b)

- { return !full_add(a, b).second; }

-Should compile to:

-__Z11no_overflowjj:

- add r4,r3,r4

- subfc r3,r3,r4

- li r3,0

- adde r3,r3,r3

- blr

-(or better) not:

-__Z11no_overflowjj:

- add r2, r4, r3

- cmplw cr7, r2, r3

- mfcr r2

- rlwinm r2, r2, 29, 31, 31

- xori r3, r2, 1

- blr

-//===---------------------------------------------------------------------===//

-We compile some FP comparisons into an mfcr with two rlwinms and an or. For

-example:

-#include <math.h>

-int test(double x, double y) { return islessequal(x, y);}

-int test2(double x, double y) { return islessgreater(x, y);}

-int test3(double x, double y) { return !islessequal(x, y);}

-Compiles into (all three are similar, but the bits differ):

-_test:

- fcmpu cr7, f1, f2

- mfcr r2

- rlwinm r3, r2, 29, 31, 31

- rlwinm r2, r2, 31, 31, 31

- or r3, r2, r3

- blr

-GCC compiles this into:

- _test:

- fcmpu cr7,f1,f2

- cror 30,28,30

- mfcr r3

- rlwinm r3,r3,31,1

- blr

-which is more efficient and can use mfocr. See PR642 for some more context.

-//===---------------------------------------------------------------------===//

-void foo(float *data, float d) {

- long i;

- for (i = 0; i < 8000; i++)

- data[i] = d;

-void foo2(float *data, float d) {

- long i;

- data--;

- for (i = 0; i < 8000; i++) {

- data[1] = d;

- data++;

- }

-These compile to:

-_foo:

- li r2, 0

-LBB1_1: ; bb

- addi r4, r2, 4

- stfsx f1, r3, r2

- cmplwi cr0, r4, 32000

- mr r2, r4

- bne cr0, LBB1_1 ; bb

- blr

-_foo2:

- li r2, 0

-LBB2_1: ; bb

- addi r4, r2, 4

- stfsx f1, r3, r2

- cmplwi cr0, r4, 32000

- mr r2, r4

- bne cr0, LBB2_1 ; bb

- blr

-The 'mr' could be eliminated to folding the add into the cmp better.

-//===---------------------------------------------------------------------===//

-Codegen for the following (low-probability) case deteriorated considerably

-when the correctness fixes for unordered comparisons went in (PR 642, 58871).

-It should be possible to recover the code quality described in the comments.

-; RUN: llvm-as < %s | llc -march=ppc32 | grep or | count 3

-; This should produce one 'or' or 'cror' instruction per function.

-; RUN: llvm-as < %s | llc -march=ppc32 | grep mfcr | count 3

-; PR2964

-define i32 @test(double %x, double %y) nounwind {

-entry:

- %tmp3 = fcmp ole double %x, %y ; <i1> [#uses=1]

- %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]

- ret i32 %tmp345

-define i32 @test2(double %x, double %y) nounwind {

-entry:

- %tmp3 = fcmp one double %x, %y ; <i1> [#uses=1]

- %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]

- ret i32 %tmp345

-define i32 @test3(double %x, double %y) nounwind {

-entry:

- %tmp3 = fcmp ugt double %x, %y ; <i1> [#uses=1]

- %tmp34 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]

- ret i32 %tmp34

-//===---------------------------------------------------------------------===//

-for the following code:

-void foo (float *__restrict__ a, int *__restrict__ b, int n) {

- a[n] = b[n] * 2.321;

-we load b[n] to GPR, then move it VSX register and convert it float. We should

-use vsx scalar integer load instructions to avoid direct moves

-//===----------------------------------------------------------------------===//

-; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg

-; This could generate FSEL with appropriate flags (FSEL is not IEEE-safe, and

-; should not be generated except with -enable-finite-only-fp-math or the like).

-; With the correctness fixes for PR642 (58871) LowerSELECT_CC would need to

-; recognize a more elaborate tree than a simple SETxx.

-define double @test_FNEG_sel(double %A, double %B, double %C) {

- %D = fsub double -0.000000e+00, %A ; <double> [#uses=1]

- %Cond = fcmp ugt double %D, -0.000000e+00 ; <i1> [#uses=1]

- %E = select i1 %Cond, double %B, double %C ; <double> [#uses=1]

- ret double %E

-//===----------------------------------------------------------------------===//

-The save/restore sequence for CR in prolog/epilog is terrible:

-- Each CR subreg is saved individually, rather than doing one save as a unit.

-- On Darwin, the save is done after the decrement of SP, which means the offset

-from SP of the save slot can be too big for a store instruction, which means we

-need an additional register (currently hacked in 96015+96020; the solution there

-is correct, but poor).

-- On SVR4 the same thing can happen, and I don't think saving before the SP

-decrement is safe on that target, as there is no red zone. This is currently

-broken AFAIK, although it's not a target I can exercise.

-The following demonstrates the problem:

-extern void bar(char *p);

-void foo() {

- char x[100000];

- bar(x);

- __asm__("" ::: "cr2");

-//===-------------------------------------------------------------------------===

-Naming convention for instruction formats is very haphazard.

-We have agreed on a naming scheme as follows:

-<INST_form>{_<OP_type><OP_len>}+

-Where:

-INST_form is the instruction format (X-form, etc.)

-OP_type is the operand type - one of OPC (opcode), RD (register destination),

- RS (register source),

- RDp (destination register pair),

- RSp (source register pair), IM (immediate),

- XO (extended opcode)

-OP_len is the length of the operand in bits

-VSX register operands would be of length 6 (split across two fields),

-condition register fields of length 3.

-We would not need denote reserved fields in names of instruction formats.

-//===----------------------------------------------------------------------===//

-Instruction fusion was introduced in ISA 2.06 and more opportunities added in

-ISA 2.07. LLVM needs to add infrastructure to recognize fusion opportunities

-and force instruction pairs to be scheduled together.

------------------------------------------------------------------------------

-More general handling of any_extend and zero_extend:

-See https://reviews.llvm.org/D24924#555306

diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt
deleted file mode 100644
index c38e01923161..000000000000
--- a/lib/Target/PowerPC/README_ALTIVEC.txt
+++ /dev/null

@@ -1,343 +0,0 @@

-//===- README_ALTIVEC.txt - Notes for improving Altivec code gen ----------===//

-Implement PPCInstrInfo::isLoadFromStackSlot/isStoreToStackSlot for vector

-registers, to generate better spill code.

-//===----------------------------------------------------------------------===//

-The first should be a single lvx from the constant pool, the second should be

-a xor/stvx:

-void foo(void) {

- int x[8] __attribute__((aligned(128))) = { 1, 1, 1, 17, 1, 1, 1, 1 };

- bar (x);

-#include <string.h>

-void foo(void) {

- int x[8] __attribute__((aligned(128)));

- memset (x, 0, sizeof (x));

- bar (x);

-//===----------------------------------------------------------------------===//

-Altivec: Codegen'ing MUL with vector FMADD should add -0.0, not 0.0:

-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8763

-When -ffast-math is on, we can use 0.0.

-//===----------------------------------------------------------------------===//

- Consider this:

- v4f32 Vector;

- v4f32 Vector2 = { Vector.X, Vector.X, Vector.X, Vector.X };

-Since we know that "Vector" is 16-byte aligned and we know the element offset

-of ".X", we should change the load into a lve*x instruction, instead of doing

-a load/store/lve*x sequence.

-//===----------------------------------------------------------------------===//

-For functions that use altivec AND have calls, we are VRSAVE'ing all call

-clobbered regs.

-//===----------------------------------------------------------------------===//

-Implement passing vectors by value into calls and receiving them as arguments.

-//===----------------------------------------------------------------------===//

-GCC apparently tries to codegen { C1, C2, Variable, C3 } as a constant pool load

-of C1/C2/C3, then a load and vperm of Variable.

-//===----------------------------------------------------------------------===//

-We need a way to teach tblgen that some operands of an intrinsic are required to

-be constants. The verifier should enforce this constraint.

-//===----------------------------------------------------------------------===//

-We currently codegen SCALAR_TO_VECTOR as a store of the scalar to a 16-byte

-aligned stack slot, followed by a load/vperm. We should probably just store it

-to a scalar stack slot, then use lvsl/vperm to load it. If the value is already

-in memory this is a big win.

-//===----------------------------------------------------------------------===//

-extract_vector_elt of an arbitrary constant vector can be done with the

-following instructions:

-vTemp = vec_splat(v0,2); // 2 is the element the src is in.

-vec_ste(&destloc,0,vTemp);

-We can do an arbitrary non-constant value by using lvsr/perm/ste.

-//===----------------------------------------------------------------------===//

-If we want to tie instruction selection into the scheduler, we can do some

-constant formation with different instructions. For example, we can generate

-"vsplti -1" with "vcmpequw R,R" and 1,1,1,1 with "vsubcuw R,R", and 0,0,0,0 with

-"vsplti 0" or "vxor", each of which use different execution units, thus could

-help scheduling.

-This is probably only reasonable for a post-pass scheduler.

-//===----------------------------------------------------------------------===//

-For this function:

-void test(vector float *A, vector float *B) {

- vector float C = (vector float)vec_cmpeq(*A, *B);

- if (!vec_any_eq(*A, *B))

- *B = (vector float){0,0,0,0};

- *A = C;

-we get the following basic block:

- ...

- lvx v2, 0, r4

- lvx v3, 0, r3

- vcmpeqfp v4, v3, v2

- vcmpeqfp. v2, v3, v2

- bne cr6, LBB1_2 ; cond_next

-The vcmpeqfp/vcmpeqfp. instructions currently cannot be merged when the

-vcmpeqfp. result is used by a branch. This can be improved.

-//===----------------------------------------------------------------------===//

-The code generated for this is truly aweful:

-vector float test(float a, float b) {

- return (vector float){ 0.0, a, 0.0, 0.0};

-LCPI1_0: ; float

- .space 4

- .text

- .globl _test

- .align 4

-_test:

- mfspr r2, 256

- oris r3, r2, 4096

- mtspr 256, r3

- lis r3, ha16(LCPI1_0)

- addi r4, r1, -32

- stfs f1, -16(r1)

- addi r5, r1, -16

- lfs f0, lo16(LCPI1_0)(r3)

- stfs f0, -32(r1)

- lvx v2, 0, r4

- lvx v3, 0, r5

- vmrghw v3, v3, v2

- vspltw v2, v2, 0

- vmrghw v2, v2, v3

- mtspr 256, r2

- blr

-//===----------------------------------------------------------------------===//

-int foo(vector float *x, vector float *y) {

- if (vec_all_eq(*x,*y)) return 3245;

- else return 12;

-A predicate compare being used in a select_cc should have the same peephole

-applied to it as a predicate compare used by a br_cc. There should be no

-mfcr here:

-_foo:

- mfspr r2, 256

- oris r5, r2, 12288

- mtspr 256, r5

- li r5, 12

- li r6, 3245

- lvx v2, 0, r4

- lvx v3, 0, r3

- vcmpeqfp. v2, v3, v2

- mfcr r3, 2

- rlwinm r3, r3, 25, 31, 31

- cmpwi cr0, r3, 0

- bne cr0, LBB1_2 ; entry

-LBB1_1: ; entry

- mr r6, r5

-LBB1_2: ; entry

- mr r3, r6

- mtspr 256, r2

- blr

-//===----------------------------------------------------------------------===//

-CodeGen/PowerPC/vec_constants.ll has an and operation that should be

-codegen'd to andc. The issue is that the 'all ones' build vector is

-SelectNodeTo'd a VSPLTISB instruction node before the and/xor is selected

-which prevents the vnot pattern from matching.

-//===----------------------------------------------------------------------===//

-An alternative to the store/store/load approach for illegal insert element

-lowering would be:

-1. store element to any ol' slot

-2. lvx the slot

-3. lvsl 0; splat index; vcmpeq to generate a select mask

-4. lvsl slot + x; vperm to rotate result into correct slot

-5. vsel result together.

-//===----------------------------------------------------------------------===//

-Should codegen branches on vec_any/vec_all to avoid mfcr. Two examples:

-#include <altivec.h>

- int f(vector float a, vector float b)

- {

- int aa = 0;

- if (vec_all_ge(a, b))

- aa |= 0x1;

- if (vec_any_ge(a,b))

- aa |= 0x2;

- return aa;

-vector float f(vector float a, vector float b) {

- if (vec_any_eq(a, b))

- return a;

- else

- return b;

-//===----------------------------------------------------------------------===//

-We should do a little better with eliminating dead stores.

-The stores to the stack are dead since %a and %b are not needed

-; Function Attrs: nounwind

-define <16 x i8> @test_vpmsumb() #0 {

- entry:

- %a = alloca <16 x i8>, align 16

- %b = alloca <16 x i8>, align 16

- store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16

- store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16

- %0 = load <16 x i8>* %a, align 16

- %1 = load <16 x i8>* %b, align 16

- %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1)

- ret <16 x i8> %2

-; Function Attrs: nounwind readnone

-declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1

-Produces the following code with -mtriple=powerpc64-unknown-linux-gnu:

-# %bb.0: # %entry

- addis 3, 2, .LCPI0_0@toc@ha

- addis 4, 2, .LCPI0_1@toc@ha

- addi 3, 3, .LCPI0_0@toc@l

- addi 4, 4, .LCPI0_1@toc@l

- lxvw4x 0, 0, 3

- addi 3, 1, -16

- lxvw4x 35, 0, 4

- stxvw4x 0, 0, 3

- ori 2, 2, 0

- lxvw4x 34, 0, 3

- addi 3, 1, -32

- stxvw4x 35, 0, 3

- vpmsumb 2, 2, 3

- blr

- .long 0

- .quad 0

-The two stxvw4x instructions are not needed.

-With -mtriple=powerpc64le-unknown-linux-gnu, the associated permutes

-are present too.

-//===----------------------------------------------------------------------===//

-The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll:

-define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind {

- %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0

- %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1

- %result = add <2 x i64> %x, %tmpvec2

- ret <2 x i64> %result

-This will generate the following instruction sequence:

- std 5, -8(1)

- std 5, -16(1)

- addi 3, 1, -16

- ori 2, 2, 0

- lxvd2x 35, 0, 3

- vaddudm 2, 2, 3

- blr

-This will almost certainly cause a load-hit-store hazard.

-Since val is a value parameter, it should not need to be saved onto

-the stack, unless it's being done set up the vector register. Instead,

-it would be better to splat the value into a vector register, and then

-remove the (dead) stores to the stack.

-//===----------------------------------------------------------------------===//

-At the moment we always generate a lxsdx in preference to lfd, or stxsdx in

-preference to stfd. When we have a reg-immediate addressing mode, this is a

-poor choice, since we have to load the address into an index register. This

-should be fixed for P7/P8.

-//===----------------------------------------------------------------------===//

-Right now, ShuffleKind 0 is supported only on BE, and ShuffleKind 2 only on LE.

-However, we could actually support both kinds on either endianness, if we check

-for the appropriate shufflevector pattern for each case ... this would cause

-some additional shufflevectors to be recognized and implemented via the

-"swapped" form.

-//===----------------------------------------------------------------------===//

-There is a utility program called PerfectShuffle that generates a table of the

-shortest instruction sequence for implementing a shufflevector operation on

-PowerPC. However, this was designed for big-endian code generation. We could

-modify this program to create a little endian version of the table. The table

-is used in PPCISelLowering.cpp, PPCTargetLowering::LOWERVECTOR_SHUFFLE().

-//===----------------------------------------------------------------------===//

-Opportunies to use instructions from PPCInstrVSX.td during code gen

- - Conversion instructions (Sections 7.6.1.5 and 7.6.1.6 of ISA 2.07)

- - Scalar comparisons (xscmpodp and xscmpudp)

- - Min and max (xsmaxdp, xsmindp, xvmaxdp, xvmindp, xvmaxsp, xvminsp)

-Related to this: we currently do not generate the lxvw4x instruction for either

-v4f32 or v4i32, probably because adding a dag pattern to the recognizer requires

-a single target type. This should probably be addressed in the PPCISelDAGToDAG logic.

-//===----------------------------------------------------------------------===//

-Currently EXTRACT_VECTOR_ELT and INSERT_VECTOR_ELT are type-legal only

-for v2f64 with VSX available. We should create custom lowering

-support for the other vector types. Without this support, we generate

-sequences with load-hit-store hazards.

-v4f32 can be supported with VSX by shifting the correct element into

-big-endian lane 0, using xscvspdpn to produce a double-precision

-representation of the single-precision value in big-endian

-double-precision lane 0, and reinterpreting lane 0 as an FPR or

-vector-scalar register.

-v2i64 can be supported with VSX and P8Vector in the same manner as

-v2f64, followed by a direct move to a GPR.

-v4i32 can be supported with VSX and P8Vector by shifting the correct

-element into big-endian lane 1, using a direct move to a GPR, and

-sign-extending the 32-bit result to 64 bits.

-v8i16 can be supported with VSX and P8Vector by shifting the correct

-element into big-endian lane 3, using a direct move to a GPR, and

-sign-extending the 16-bit result to 64 bits.

-v16i8 can be supported with VSX and P8Vector by shifting the correct

-element into big-endian lane 7, using a direct move to a GPR, and

-sign-extending the 8-bit result to 64 bits.

diff --git a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
deleted file mode 100644
index c9548c7fe0cd..000000000000
--- a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
+++ /dev/null

@@ -1,3 +0,0 @@

-add_llvm_library(LLVMPowerPCInfo

- PowerPCTargetInfo.cpp

- )

diff --git a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
deleted file mode 100644
index 410234686400..000000000000
--- a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
+++ /dev/null

@@ -1,23 +0,0 @@

-;===- ./lib/Target/PowerPC/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;

-; The LLVM Compiler Infrastructure

-; This file is distributed under the University of Illinois Open Source

-; License. See LICENSE.TXT for details.

-;===------------------------------------------------------------------------===;

-; This is an LLVMBuild description file for the components in this subdirectory.

-; For more information on the LLVMBuild system, please see:

-; http://llvm.org/docs/LLVMBuild.html

-;===------------------------------------------------------------------------===;

-[component_0]

-type = Library

-name = PowerPCInfo

-parent = PowerPC

-required_libraries = Support

-add_to_library_groups = PowerPC