summaryrefslogtreecommitdiff
path: root/lib/Target/PowerPC
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-08-20 17:58:59 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-08-20 17:58:59 +0000
commit1a56a5ead7a2e84bee8240f5f6b033b5f1707154 (patch)
tree2f526c9cfcb089e51c33d6e1f0d51b10bda34714 /lib/Target/PowerPC
parentd8e91e46262bc44006913e6796843909f1ac7bcd (diff)
Notes
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r--lib/Target/PowerPC/AsmParser/CMakeLists.txt3
-rw-r--r--lib/Target/PowerPC/AsmParser/LLVMBuild.txt23
-rw-r--r--lib/Target/PowerPC/CMakeLists.txt55
-rw-r--r--lib/Target/PowerPC/Disassembler/CMakeLists.txt3
-rw-r--r--lib/Target/PowerPC/Disassembler/LLVMBuild.txt23
-rw-r--r--lib/Target/PowerPC/InstPrinter/CMakeLists.txt3
-rw-r--r--lib/Target/PowerPC/InstPrinter/LLVMBuild.txt23
-rw-r--r--lib/Target/PowerPC/LLVMBuild.txt35
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt10
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt23
-rw-r--r--lib/Target/PowerPC/README.txt665
-rw-r--r--lib/Target/PowerPC/README_ALTIVEC.txt343
-rw-r--r--lib/Target/PowerPC/TargetInfo/CMakeLists.txt3
-rw-r--r--lib/Target/PowerPC/TargetInfo/LLVMBuild.txt23
14 files changed, 0 insertions, 1235 deletions
diff --git a/lib/Target/PowerPC/AsmParser/CMakeLists.txt b/lib/Target/PowerPC/AsmParser/CMakeLists.txt
deleted file mode 100644
index 408858e424d5..000000000000
--- a/lib/Target/PowerPC/AsmParser/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-add_llvm_library(LLVMPowerPCAsmParser
- PPCAsmParser.cpp
- )
diff --git a/lib/Target/PowerPC/AsmParser/LLVMBuild.txt b/lib/Target/PowerPC/AsmParser/LLVMBuild.txt
deleted file mode 100644
index 801f27bb7bc3..000000000000
--- a/lib/Target/PowerPC/AsmParser/LLVMBuild.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-;===- ./lib/Target/PowerPC/AsmParser/LLVMBuild.txt -------------*- Conf -*--===;
-;
-; The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-; http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = PowerPCAsmParser
-parent = PowerPC
-required_libraries = MC MCParser PowerPCDesc PowerPCInfo Support
-add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
deleted file mode 100644
index 3130d10fa5ed..000000000000
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS PPC.td)
-
-tablegen(LLVM PPCGenAsmMatcher.inc -gen-asm-matcher)
-tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer)
-tablegen(LLVM PPCGenCallingConv.inc -gen-callingconv)
-tablegen(LLVM PPCGenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM PPCGenDisassemblerTables.inc -gen-disassembler)
-tablegen(LLVM PPCGenFastISel.inc -gen-fast-isel)
-tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info)
-tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter)
-tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info)
-tablegen(LLVM PPCGenSubtargetInfo.inc -gen-subtarget)
-tablegen(LLVM PPCGenExegesis.inc -gen-exegesis)
-
-add_public_tablegen_target(PowerPCCommonTableGen)
-
-add_llvm_target(PowerPCCodeGen
- PPCBoolRetToInt.cpp
- PPCAsmPrinter.cpp
- PPCBranchSelector.cpp
- PPCBranchCoalescing.cpp
- PPCCCState.cpp
- PPCCTRLoops.cpp
- PPCHazardRecognizers.cpp
- PPCInstrInfo.cpp
- PPCISelDAGToDAG.cpp
- PPCISelLowering.cpp
- PPCEarlyReturn.cpp
- PPCFastISel.cpp
- PPCFrameLowering.cpp
- PPCLoopPreIncPrep.cpp
- PPCMCInstLower.cpp
- PPCMachineFunctionInfo.cpp
- PPCMIPeephole.cpp
- PPCRegisterInfo.cpp
- PPCQPXLoadSplat.cpp
- PPCSubtarget.cpp
- PPCTargetMachine.cpp
- PPCTargetObjectFile.cpp
- PPCTargetTransformInfo.cpp
- PPCTOCRegDeps.cpp
- PPCTLSDynamicCall.cpp
- PPCVSXCopy.cpp
- PPCReduceCRLogicals.cpp
- PPCVSXFMAMutate.cpp
- PPCVSXSwapRemoval.cpp
- PPCExpandISEL.cpp
- PPCPreEmitPeephole.cpp
- )
-
-add_subdirectory(AsmParser)
-add_subdirectory(Disassembler)
-add_subdirectory(InstPrinter)
-add_subdirectory(MCTargetDesc)
-add_subdirectory(TargetInfo)
diff --git a/lib/Target/PowerPC/Disassembler/CMakeLists.txt b/lib/Target/PowerPC/Disassembler/CMakeLists.txt
deleted file mode 100644
index ca457df88d3e..000000000000
--- a/lib/Target/PowerPC/Disassembler/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-add_llvm_library(LLVMPowerPCDisassembler
- PPCDisassembler.cpp
- )
diff --git a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
deleted file mode 100644
index ea3e7eaf839d..000000000000
--- a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-;===-- ./lib/Target/PowerPC/Disassembler/LLVMBuild.txt ---------*- Conf -*--===;
-;
-; The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-; http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = PowerPCDisassembler
-parent = PowerPC
-required_libraries = MCDisassembler PowerPCInfo Support
-add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
deleted file mode 100644
index ab30a110f40e..000000000000
--- a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-add_llvm_library(LLVMPowerPCAsmPrinter
- PPCInstPrinter.cpp
- )
diff --git a/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt b/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
deleted file mode 100644
index 7c691deafccf..000000000000
--- a/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-;===- ./lib/Target/PowerPC/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
-;
-; The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-; http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = PowerPCAsmPrinter
-parent = PowerPC
-required_libraries = MC Support
-add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt
deleted file mode 100644
index fd5fa560912f..000000000000
--- a/lib/Target/PowerPC/LLVMBuild.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-;===- ./lib/Target/PowerPC/LLVMBuild.txt -----------------------*- Conf -*--===;
-;
-; The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-; http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[common]
-subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
-
-[component_0]
-type = TargetGroup
-name = PowerPC
-parent = Target
-has_asmparser = 1
-has_asmprinter = 1
-has_disassembler = 1
-has_jit = 1
-
-[component_1]
-type = Library
-name = PowerPCCodeGen
-parent = PowerPC
-required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo Scalar SelectionDAG Support Target TransformUtils
-add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index 3cea65ee4de6..000000000000
--- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-add_llvm_library(LLVMPowerPCDesc
- PPCAsmBackend.cpp
- PPCMCTargetDesc.cpp
- PPCMCAsmInfo.cpp
- PPCMCCodeEmitter.cpp
- PPCMCExpr.cpp
- PPCPredicates.cpp
- PPCMachObjectWriter.cpp
- PPCELFObjectWriter.cpp
- )
diff --git a/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt b/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
deleted file mode 100644
index d3a567d1581d..000000000000
--- a/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-;===- ./lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
-;
-; The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-; http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = PowerPCDesc
-parent = PowerPC
-required_libraries = MC PowerPCAsmPrinter PowerPCInfo Support
-add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
deleted file mode 100644
index b4bf635dc2c7..000000000000
--- a/lib/Target/PowerPC/README.txt
+++ /dev/null
@@ -1,665 +0,0 @@
-//===- README.txt - Notes for improving PowerPC-specific code gen ---------===//
-
-TODO:
-* lmw/stmw pass a la arm load store optimizer for prolog/epilog
-
-===-------------------------------------------------------------------------===
-
-This code:
-
-unsigned add32carry(unsigned sum, unsigned x) {
- unsigned z = sum + x;
- if (sum + x < x)
- z++;
- return z;
-}
-
-Should compile to something like:
-
- addc r3,r3,r4
- addze r3,r3
-
-instead we get:
-
- add r3, r4, r3
- cmplw cr7, r3, r4
- mfcr r4 ; 1
- rlwinm r4, r4, 29, 31, 31
- add r3, r3, r4
-
-Ick.
-
-===-------------------------------------------------------------------------===
-
-We compile the hottest inner loop of viterbi to:
-
- li r6, 0
- b LBB1_84 ;bb432.i
-LBB1_83: ;bb420.i
- lbzx r8, r5, r7
- addi r6, r7, 1
- stbx r8, r4, r7
-LBB1_84: ;bb432.i
- mr r7, r6
- cmplwi cr0, r7, 143
- bne cr0, LBB1_83 ;bb420.i
-
-The CBE manages to produce:
-
- li r0, 143
- mtctr r0
-loop:
- lbzx r2, r2, r11
- stbx r0, r2, r9
- addi r2, r2, 1
- bdz later
- b loop
-
-This could be much better (bdnz instead of bdz) but it still beats us. If we
-produced this with bdnz, the loop would be a single dispatch group.
-
-===-------------------------------------------------------------------------===
-
-Lump the constant pool for each function into ONE pic object, and reference
-pieces of it as offsets from the start. For functions like this (contrived
-to have lots of constants obviously):
-
-double X(double Y) { return (Y*1.23 + 4.512)*2.34 + 14.38; }
-
-We generate:
-
-_X:
- lis r2, ha16(.CPI_X_0)
- lfd f0, lo16(.CPI_X_0)(r2)
- lis r2, ha16(.CPI_X_1)
- lfd f2, lo16(.CPI_X_1)(r2)
- fmadd f0, f1, f0, f2
- lis r2, ha16(.CPI_X_2)
- lfd f1, lo16(.CPI_X_2)(r2)
- lis r2, ha16(.CPI_X_3)
- lfd f2, lo16(.CPI_X_3)(r2)
- fmadd f1, f0, f1, f2
- blr
-
-It would be better to materialize .CPI_X into a register, then use immediates
-off of the register to avoid the lis's. This is even more important in PIC
-mode.
-
-Note that this (and the static variable version) is discussed here for GCC:
-http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
-
-Here's another example (the sgn function):
-double testf(double a) {
- return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0);
-}
-
-it produces a BB like this:
-LBB1_1: ; cond_true
- lis r2, ha16(LCPI1_0)
- lfs f0, lo16(LCPI1_0)(r2)
- lis r2, ha16(LCPI1_1)
- lis r3, ha16(LCPI1_2)
- lfs f2, lo16(LCPI1_2)(r3)
- lfs f3, lo16(LCPI1_1)(r2)
- fsub f0, f0, f1
- fsel f1, f0, f2, f3
- blr
-
-===-------------------------------------------------------------------------===
-
-PIC Code Gen IPO optimization:
-
-Squish small scalar globals together into a single global struct, allowing the
-address of the struct to be CSE'd, avoiding PIC accesses (also reduces the size
-of the GOT on targets with one).
-
-Note that this is discussed here for GCC:
-http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
-
-===-------------------------------------------------------------------------===
-
-Darwin Stub removal:
-
-We still generate calls to foo$stub, and stubs, on Darwin. This is not
-necessary when building with the Leopard (10.5) or later linker, as stubs are
-generated by ld when necessary. Parameterizing this based on the deployment
-target (-mmacosx-version-min) is probably enough. x86-32 does this right, see
-its logic.
-
-===-------------------------------------------------------------------------===
-
-Darwin Stub LICM optimization:
-
-Loops like this:
-
- for (...) bar();
-
-Have to go through an indirect stub if bar is external or linkonce. It would
-be better to compile it as:
-
- fp = &bar;
- for (...) fp();
-
-which only computes the address of bar once (instead of each time through the
-stub). This is Darwin specific and would have to be done in the code generator.
-Probably not a win on x86.
-
-===-------------------------------------------------------------------------===
-
-Simple IPO for argument passing, change:
- void foo(int X, double Y, int Z) -> void foo(int X, int Z, double Y)
-
-the Darwin ABI specifies that any integer arguments in the first 32 bytes worth
-of arguments get assigned to r3 through r10. That is, if you have a function
-foo(int, double, int) you get r3, f1, r6, since the 64 bit double ate up the
-argument bytes for r4 and r5. The trick then would be to shuffle the argument
-order for functions we can internalize so that the maximum number of
-integers/pointers get passed in regs before you see any of the fp arguments.
-
-Instead of implementing this, it would actually probably be easier to just
-implement a PPC fastcc, where we could do whatever we wanted to the CC,
-including having this work sanely.
-
-===-------------------------------------------------------------------------===
-
-Fix Darwin FP-In-Integer Registers ABI
-
-Darwin passes doubles in structures in integer registers, which is very very
-bad. Add something like a BITCAST to LLVM, then do an i-p transformation that
-percolates these things out of functions.
-
-Check out how horrible this is:
-http://gcc.gnu.org/ml/gcc/2005-10/msg01036.html
-
-This is an extension of "interprocedural CC unmunging" that can't be done with
-just fastcc.
-
-===-------------------------------------------------------------------------===
-
-Fold add and sub with constant into non-extern, non-weak addresses so this:
-
-static int a;
-void bar(int b) { a = b; }
-void foo(unsigned char *c) {
- *c = a;
-}
-
-So that
-
-_foo:
- lis r2, ha16(_a)
- la r2, lo16(_a)(r2)
- lbz r2, 3(r2)
- stb r2, 0(r3)
- blr
-
-Becomes
-
-_foo:
- lis r2, ha16(_a+3)
- lbz r2, lo16(_a+3)(r2)
- stb r2, 0(r3)
- blr
-
-===-------------------------------------------------------------------------===
-
-We should compile these two functions to the same thing:
-
-#include <stdlib.h>
-void f(int a, int b, int *P) {
- *P = (a-b)>=0?(a-b):(b-a);
-}
-void g(int a, int b, int *P) {
- *P = abs(a-b);
-}
-
-Further, they should compile to something better than:
-
-_g:
- subf r2, r4, r3
- subfic r3, r2, 0
- cmpwi cr0, r2, -1
- bgt cr0, LBB2_2 ; entry
-LBB2_1: ; entry
- mr r2, r3
-LBB2_2: ; entry
- stw r2, 0(r5)
- blr
-
-GCC produces:
-
-_g:
- subf r4,r4,r3
- srawi r2,r4,31
- xor r0,r2,r4
- subf r0,r2,r0
- stw r0,0(r5)
- blr
-
-... which is much nicer.
-
-This theoretically may help improve twolf slightly (used in dimbox.c:142?).
-
-===-------------------------------------------------------------------------===
-
-PR5945: This:
-define i32 @clamp0g(i32 %a) {
-entry:
- %cmp = icmp slt i32 %a, 0
- %sel = select i1 %cmp, i32 0, i32 %a
- ret i32 %sel
-}
-
-Is compile to this with the PowerPC (32-bit) backend:
-
-_clamp0g:
- cmpwi cr0, r3, 0
- li r2, 0
- blt cr0, LBB1_2
-; %bb.1: ; %entry
- mr r2, r3
-LBB1_2: ; %entry
- mr r3, r2
- blr
-
-This could be reduced to the much simpler:
-
-_clamp0g:
- srawi r2, r3, 31
- andc r3, r3, r2
- blr
-
-===-------------------------------------------------------------------------===
-
-int foo(int N, int ***W, int **TK, int X) {
- int t, i;
-
- for (t = 0; t < N; ++t)
- for (i = 0; i < 4; ++i)
- W[t / X][i][t % X] = TK[i][t];
-
- return 5;
-}
-
-We generate relatively atrocious code for this loop compared to gcc.
-
-We could also strength reduce the rem and the div:
-http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf
-
-===-------------------------------------------------------------------------===
-
-We generate ugly code for this:
-
-void func(unsigned int *ret, float dx, float dy, float dz, float dw) {
- unsigned code = 0;
- if(dx < -dw) code |= 1;
- if(dx > dw) code |= 2;
- if(dy < -dw) code |= 4;
- if(dy > dw) code |= 8;
- if(dz < -dw) code |= 16;
- if(dz > dw) code |= 32;
- *ret = code;
-}
-
-===-------------------------------------------------------------------------===
-
-%struct.B = type { i8, [3 x i8] }
-
-define void @bar(%struct.B* %b) {
-entry:
- %tmp = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1]
- %tmp = load i32* %tmp ; <uint> [#uses=1]
- %tmp3 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1]
- %tmp4 = load i32* %tmp3 ; <uint> [#uses=1]
- %tmp8 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=2]
- %tmp9 = load i32* %tmp8 ; <uint> [#uses=1]
- %tmp4.mask17 = shl i32 %tmp4, i8 1 ; <uint> [#uses=1]
- %tmp1415 = and i32 %tmp4.mask17, 2147483648 ; <uint> [#uses=1]
- %tmp.masked = and i32 %tmp, 2147483648 ; <uint> [#uses=1]
- %tmp11 = or i32 %tmp1415, %tmp.masked ; <uint> [#uses=1]
- %tmp12 = and i32 %tmp9, 2147483647 ; <uint> [#uses=1]
- %tmp13 = or i32 %tmp12, %tmp11 ; <uint> [#uses=1]
- store i32 %tmp13, i32* %tmp8
- ret void
-}
-
-We emit:
-
-_foo:
- lwz r2, 0(r3)
- slwi r4, r2, 1
- or r4, r4, r2
- rlwimi r2, r4, 0, 0, 0
- stw r2, 0(r3)
- blr
-
-We could collapse a bunch of those ORs and ANDs and generate the following
-equivalent code:
-
-_foo:
- lwz r2, 0(r3)
- rlwinm r4, r2, 1, 0, 0
- or r2, r2, r4
- stw r2, 0(r3)
- blr
-
-===-------------------------------------------------------------------------===
-
-Consider a function like this:
-
-float foo(float X) { return X + 1234.4123f; }
-
-The FP constant ends up in the constant pool, so we need to get the LR register.
- This ends up producing code like this:
-
-_foo:
-.LBB_foo_0: ; entry
- mflr r11
-*** stw r11, 8(r1)
- bl "L00000$pb"
-"L00000$pb":
- mflr r2
- addis r2, r2, ha16(.CPI_foo_0-"L00000$pb")
- lfs f0, lo16(.CPI_foo_0-"L00000$pb")(r2)
- fadds f1, f1, f0
-*** lwz r11, 8(r1)
- mtlr r11
- blr
-
-This is functional, but there is no reason to spill the LR register all the way
-to the stack (the two marked instrs): spilling it to a GPR is quite enough.
-
-Implementing this will require some codegen improvements. Nate writes:
-
-"So basically what we need to support the "no stack frame save and restore" is a
-generalization of the LR optimization to "callee-save regs".
-
-Currently, we have LR marked as a callee-save reg. The register allocator sees
-that it's callee save, and spills it directly to the stack.
-
-Ideally, something like this would happen:
-
-LR would be in a separate register class from the GPRs. The class of LR would be
-marked "unspillable". When the register allocator came across an unspillable
-reg, it would ask "what is the best class to copy this into that I *can* spill"
-If it gets a class back, which it will in this case (the gprs), it grabs a free
-register of that class. If it is then later necessary to spill that reg, so be
-it.
-
-===-------------------------------------------------------------------------===
-
-We compile this:
-int test(_Bool X) {
- return X ? 524288 : 0;
-}
-
-to:
-_test:
- cmplwi cr0, r3, 0
- lis r2, 8
- li r3, 0
- beq cr0, LBB1_2 ;entry
-LBB1_1: ;entry
- mr r3, r2
-LBB1_2: ;entry
- blr
-
-instead of:
-_test:
- addic r2,r3,-1
- subfe r0,r2,r3
- slwi r3,r0,19
- blr
-
-This sort of thing occurs a lot due to globalopt.
-
-===-------------------------------------------------------------------------===
-
-We compile:
-
-define i32 @bar(i32 %x) nounwind readnone ssp {
-entry:
- %0 = icmp eq i32 %x, 0 ; <i1> [#uses=1]
- %neg = sext i1 %0 to i32 ; <i32> [#uses=1]
- ret i32 %neg
-}
-
-to:
-
-_bar:
- cntlzw r2, r3
- slwi r2, r2, 26
- srawi r3, r2, 31
- blr
-
-it would be better to produce:
-
-_bar:
- addic r3,r3,-1
- subfe r3,r3,r3
- blr
-
-===-------------------------------------------------------------------------===
-
-We generate horrible ppc code for this:
-
-#define N 2000000
-double a[N],c[N];
-void simpleloop() {
- int j;
- for (j=0; j<N; j++)
- c[j] = a[j];
-}
-
-LBB1_1: ;bb
- lfdx f0, r3, r4
- addi r5, r5, 1 ;; Extra IV for the exit value compare.
- stfdx f0, r2, r4
- addi r4, r4, 8
-
- xoris r6, r5, 30 ;; This is due to a large immediate.
- cmplwi cr0, r6, 33920
- bne cr0, LBB1_1
-
-//===---------------------------------------------------------------------===//
-
-This:
- #include <algorithm>
- inline std::pair<unsigned, bool> full_add(unsigned a, unsigned b)
- { return std::make_pair(a + b, a + b < a); }
- bool no_overflow(unsigned a, unsigned b)
- { return !full_add(a, b).second; }
-
-Should compile to:
-
-__Z11no_overflowjj:
- add r4,r3,r4
- subfc r3,r3,r4
- li r3,0
- adde r3,r3,r3
- blr
-
-(or better) not:
-
-__Z11no_overflowjj:
- add r2, r4, r3
- cmplw cr7, r2, r3
- mfcr r2
- rlwinm r2, r2, 29, 31, 31
- xori r3, r2, 1
- blr
-
-//===---------------------------------------------------------------------===//
-
-We compile some FP comparisons into an mfcr with two rlwinms and an or. For
-example:
-#include <math.h>
-int test(double x, double y) { return islessequal(x, y);}
-int test2(double x, double y) { return islessgreater(x, y);}
-int test3(double x, double y) { return !islessequal(x, y);}
-
-Compiles into (all three are similar, but the bits differ):
-
-_test:
- fcmpu cr7, f1, f2
- mfcr r2
- rlwinm r3, r2, 29, 31, 31
- rlwinm r2, r2, 31, 31, 31
- or r3, r2, r3
- blr
-
-GCC compiles this into:
-
- _test:
- fcmpu cr7,f1,f2
- cror 30,28,30
- mfcr r3
- rlwinm r3,r3,31,1
- blr
-
-which is more efficient and can use mfocr. See PR642 for some more context.
-
-//===---------------------------------------------------------------------===//
-
-void foo(float *data, float d) {
- long i;
- for (i = 0; i < 8000; i++)
- data[i] = d;
-}
-void foo2(float *data, float d) {
- long i;
- data--;
- for (i = 0; i < 8000; i++) {
- data[1] = d;
- data++;
- }
-}
-
-These compile to:
-
-_foo:
- li r2, 0
-LBB1_1: ; bb
- addi r4, r2, 4
- stfsx f1, r3, r2
- cmplwi cr0, r4, 32000
- mr r2, r4
- bne cr0, LBB1_1 ; bb
- blr
-_foo2:
- li r2, 0
-LBB2_1: ; bb
- addi r4, r2, 4
- stfsx f1, r3, r2
- cmplwi cr0, r4, 32000
- mr r2, r4
- bne cr0, LBB2_1 ; bb
- blr
-
-The 'mr' could be eliminated to folding the add into the cmp better.
-
-//===---------------------------------------------------------------------===//
-Codegen for the following (low-probability) case deteriorated considerably
-when the correctness fixes for unordered comparisons went in (PR 642, 58871).
-It should be possible to recover the code quality described in the comments.
-
-; RUN: llvm-as < %s | llc -march=ppc32 | grep or | count 3
-; This should produce one 'or' or 'cror' instruction per function.
-
-; RUN: llvm-as < %s | llc -march=ppc32 | grep mfcr | count 3
-; PR2964
-
-define i32 @test(double %x, double %y) nounwind {
-entry:
- %tmp3 = fcmp ole double %x, %y ; <i1> [#uses=1]
- %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
- ret i32 %tmp345
-}
-
-define i32 @test2(double %x, double %y) nounwind {
-entry:
- %tmp3 = fcmp one double %x, %y ; <i1> [#uses=1]
- %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
- ret i32 %tmp345
-}
-
-define i32 @test3(double %x, double %y) nounwind {
-entry:
- %tmp3 = fcmp ugt double %x, %y ; <i1> [#uses=1]
- %tmp34 = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
- ret i32 %tmp34
-}
-
-//===---------------------------------------------------------------------===//
-for the following code:
-
-void foo (float *__restrict__ a, int *__restrict__ b, int n) {
- a[n] = b[n] * 2.321;
-}
-
-we load b[n] to GPR, then move it VSX register and convert it float. We should
-use vsx scalar integer load instructions to avoid direct moves
-
-//===----------------------------------------------------------------------===//
-; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg
-
-; This could generate FSEL with appropriate flags (FSEL is not IEEE-safe, and
-; should not be generated except with -enable-finite-only-fp-math or the like).
-; With the correctness fixes for PR642 (58871) LowerSELECT_CC would need to
-; recognize a more elaborate tree than a simple SETxx.
-
-define double @test_FNEG_sel(double %A, double %B, double %C) {
- %D = fsub double -0.000000e+00, %A ; <double> [#uses=1]
- %Cond = fcmp ugt double %D, -0.000000e+00 ; <i1> [#uses=1]
- %E = select i1 %Cond, double %B, double %C ; <double> [#uses=1]
- ret double %E
-}
-
-//===----------------------------------------------------------------------===//
-The save/restore sequence for CR in prolog/epilog is terrible:
-- Each CR subreg is saved individually, rather than doing one save as a unit.
-- On Darwin, the save is done after the decrement of SP, which means the offset
-from SP of the save slot can be too big for a store instruction, which means we
-need an additional register (currently hacked in 96015+96020; the solution there
-is correct, but poor).
-- On SVR4 the same thing can happen, and I don't think saving before the SP
-decrement is safe on that target, as there is no red zone. This is currently
-broken AFAIK, although it's not a target I can exercise.
-The following demonstrates the problem:
-extern void bar(char *p);
-void foo() {
- char x[100000];
- bar(x);
- __asm__("" ::: "cr2");
-}
-
-//===-------------------------------------------------------------------------===
-Naming convention for instruction formats is very haphazard.
-We have agreed on a naming scheme as follows:
-
-<INST_form>{_<OP_type><OP_len>}+
-
-Where:
-INST_form is the instruction format (X-form, etc.)
-OP_type is the operand type - one of OPC (opcode), RD (register destination),
- RS (register source),
- RDp (destination register pair),
- RSp (source register pair), IM (immediate),
- XO (extended opcode)
-OP_len is the length of the operand in bits
-
-VSX register operands would be of length 6 (split across two fields),
-condition register fields of length 3.
-We would not need denote reserved fields in names of instruction formats.
-
-//===----------------------------------------------------------------------===//
-
-Instruction fusion was introduced in ISA 2.06 and more opportunities added in
-ISA 2.07. LLVM needs to add infrastructure to recognize fusion opportunities
-and force instruction pairs to be scheduled together.
-
------------------------------------------------------------------------------
-
-More general handling of any_extend and zero_extend:
-
-See https://reviews.llvm.org/D24924#555306
diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt
deleted file mode 100644
index c38e01923161..000000000000
--- a/lib/Target/PowerPC/README_ALTIVEC.txt
+++ /dev/null
@@ -1,343 +0,0 @@
-//===- README_ALTIVEC.txt - Notes for improving Altivec code gen ----------===//
-
-Implement PPCInstrInfo::isLoadFromStackSlot/isStoreToStackSlot for vector
-registers, to generate better spill code.
-
-//===----------------------------------------------------------------------===//
-
-The first should be a single lvx from the constant pool, the second should be
-a xor/stvx:
-
-void foo(void) {
- int x[8] __attribute__((aligned(128))) = { 1, 1, 1, 17, 1, 1, 1, 1 };
- bar (x);
-}
-
-#include <string.h>
-void foo(void) {
- int x[8] __attribute__((aligned(128)));
- memset (x, 0, sizeof (x));
- bar (x);
-}
-
-//===----------------------------------------------------------------------===//
-
-Altivec: Codegen'ing MUL with vector FMADD should add -0.0, not 0.0:
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8763
-
-When -ffast-math is on, we can use 0.0.
-
-//===----------------------------------------------------------------------===//
-
- Consider this:
- v4f32 Vector;
- v4f32 Vector2 = { Vector.X, Vector.X, Vector.X, Vector.X };
-
-Since we know that "Vector" is 16-byte aligned and we know the element offset
-of ".X", we should change the load into a lve*x instruction, instead of doing
-a load/store/lve*x sequence.
-
-//===----------------------------------------------------------------------===//
-
-For functions that use altivec AND have calls, we are VRSAVE'ing all call
-clobbered regs.
-
-//===----------------------------------------------------------------------===//
-
-Implement passing vectors by value into calls and receiving them as arguments.
-
-//===----------------------------------------------------------------------===//
-
-GCC apparently tries to codegen { C1, C2, Variable, C3 } as a constant pool load
-of C1/C2/C3, then a load and vperm of Variable.
-
-//===----------------------------------------------------------------------===//
-
-We need a way to teach tblgen that some operands of an intrinsic are required to
-be constants. The verifier should enforce this constraint.
-
-//===----------------------------------------------------------------------===//
-
-We currently codegen SCALAR_TO_VECTOR as a store of the scalar to a 16-byte
-aligned stack slot, followed by a load/vperm. We should probably just store it
-to a scalar stack slot, then use lvsl/vperm to load it. If the value is already
-in memory this is a big win.
-
-//===----------------------------------------------------------------------===//
-
-extract_vector_elt of an arbitrary constant vector can be done with the
-following instructions:
-
-vTemp = vec_splat(v0,2); // 2 is the element the src is in.
-vec_ste(&destloc,0,vTemp);
-
-We can do an arbitrary non-constant value by using lvsr/perm/ste.
-
-//===----------------------------------------------------------------------===//
-
-If we want to tie instruction selection into the scheduler, we can do some
-constant formation with different instructions. For example, we can generate
-"vsplti -1" with "vcmpequw R,R" and 1,1,1,1 with "vsubcuw R,R", and 0,0,0,0 with
-"vsplti 0" or "vxor", each of which use different execution units, thus could
-help scheduling.
-
-This is probably only reasonable for a post-pass scheduler.
-
-//===----------------------------------------------------------------------===//
-
-For this function:
-
-void test(vector float *A, vector float *B) {
- vector float C = (vector float)vec_cmpeq(*A, *B);
- if (!vec_any_eq(*A, *B))
- *B = (vector float){0,0,0,0};
- *A = C;
-}
-
-we get the following basic block:
-
- ...
- lvx v2, 0, r4
- lvx v3, 0, r3
- vcmpeqfp v4, v3, v2
- vcmpeqfp. v2, v3, v2
- bne cr6, LBB1_2 ; cond_next
-
-The vcmpeqfp/vcmpeqfp. instructions currently cannot be merged when the
-vcmpeqfp. result is used by a branch. This can be improved.
-
-//===----------------------------------------------------------------------===//
-
-The code generated for this is truly aweful:
-
-vector float test(float a, float b) {
- return (vector float){ 0.0, a, 0.0, 0.0};
-}
-
-LCPI1_0: ; float
- .space 4
- .text
- .globl _test
- .align 4
-_test:
- mfspr r2, 256
- oris r3, r2, 4096
- mtspr 256, r3
- lis r3, ha16(LCPI1_0)
- addi r4, r1, -32
- stfs f1, -16(r1)
- addi r5, r1, -16
- lfs f0, lo16(LCPI1_0)(r3)
- stfs f0, -32(r1)
- lvx v2, 0, r4
- lvx v3, 0, r5
- vmrghw v3, v3, v2
- vspltw v2, v2, 0
- vmrghw v2, v2, v3
- mtspr 256, r2
- blr
-
-//===----------------------------------------------------------------------===//
-
-int foo(vector float *x, vector float *y) {
- if (vec_all_eq(*x,*y)) return 3245;
- else return 12;
-}
-
-A predicate compare being used in a select_cc should have the same peephole
-applied to it as a predicate compare used by a br_cc. There should be no
-mfcr here:
-
-_foo:
- mfspr r2, 256
- oris r5, r2, 12288
- mtspr 256, r5
- li r5, 12
- li r6, 3245
- lvx v2, 0, r4
- lvx v3, 0, r3
- vcmpeqfp. v2, v3, v2
- mfcr r3, 2
- rlwinm r3, r3, 25, 31, 31
- cmpwi cr0, r3, 0
- bne cr0, LBB1_2 ; entry
-LBB1_1: ; entry
- mr r6, r5
-LBB1_2: ; entry
- mr r3, r6
- mtspr 256, r2
- blr
-
-//===----------------------------------------------------------------------===//
-
-CodeGen/PowerPC/vec_constants.ll has an and operation that should be
-codegen'd to andc. The issue is that the 'all ones' build vector is
-SelectNodeTo'd a VSPLTISB instruction node before the and/xor is selected
-which prevents the vnot pattern from matching.
-
-
-//===----------------------------------------------------------------------===//
-
-An alternative to the store/store/load approach for illegal insert element
-lowering would be:
-
-1. store element to any ol' slot
-2. lvx the slot
-3. lvsl 0; splat index; vcmpeq to generate a select mask
-4. lvsl slot + x; vperm to rotate result into correct slot
-5. vsel result together.
-
-//===----------------------------------------------------------------------===//
-
-Should codegen branches on vec_any/vec_all to avoid mfcr. Two examples:
-
-#include <altivec.h>
- int f(vector float a, vector float b)
- {
- int aa = 0;
- if (vec_all_ge(a, b))
- aa |= 0x1;
- if (vec_any_ge(a,b))
- aa |= 0x2;
- return aa;
-}
-
-vector float f(vector float a, vector float b) {
- if (vec_any_eq(a, b))
- return a;
- else
- return b;
-}
-
-//===----------------------------------------------------------------------===//
-
-We should do a little better with eliminating dead stores.
-The stores to the stack are dead since %a and %b are not needed
-
-; Function Attrs: nounwind
-define <16 x i8> @test_vpmsumb() #0 {
- entry:
- %a = alloca <16 x i8>, align 16
- %b = alloca <16 x i8>, align 16
- store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16
- store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16
- %0 = load <16 x i8>* %a, align 16
- %1 = load <16 x i8>* %b, align 16
- %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1)
- ret <16 x i8> %2
-}
-
-
-; Function Attrs: nounwind readnone
-declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1
-
-
-Produces the following code with -mtriple=powerpc64-unknown-linux-gnu:
-# %bb.0: # %entry
- addis 3, 2, .LCPI0_0@toc@ha
- addis 4, 2, .LCPI0_1@toc@ha
- addi 3, 3, .LCPI0_0@toc@l
- addi 4, 4, .LCPI0_1@toc@l
- lxvw4x 0, 0, 3
- addi 3, 1, -16
- lxvw4x 35, 0, 4
- stxvw4x 0, 0, 3
- ori 2, 2, 0
- lxvw4x 34, 0, 3
- addi 3, 1, -32
- stxvw4x 35, 0, 3
- vpmsumb 2, 2, 3
- blr
- .long 0
- .quad 0
-
-The two stxvw4x instructions are not needed.
-With -mtriple=powerpc64le-unknown-linux-gnu, the associated permutes
-are present too.
-
-//===----------------------------------------------------------------------===//
-
-The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll:
-
-define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind {
- %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0
- %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1
- %result = add <2 x i64> %x, %tmpvec2
- ret <2 x i64> %result
-
-This will generate the following instruction sequence:
- std 5, -8(1)
- std 5, -16(1)
- addi 3, 1, -16
- ori 2, 2, 0
- lxvd2x 35, 0, 3
- vaddudm 2, 2, 3
- blr
-
-This will almost certainly cause a load-hit-store hazard.
-Since val is a value parameter, it should not need to be saved onto
-the stack, unless it's being done set up the vector register. Instead,
-it would be better to splat the value into a vector register, and then
-remove the (dead) stores to the stack.
-
-//===----------------------------------------------------------------------===//
-
-At the moment we always generate a lxsdx in preference to lfd, or stxsdx in
-preference to stfd. When we have a reg-immediate addressing mode, this is a
-poor choice, since we have to load the address into an index register. This
-should be fixed for P7/P8.
-
-//===----------------------------------------------------------------------===//
-
-Right now, ShuffleKind 0 is supported only on BE, and ShuffleKind 2 only on LE.
-However, we could actually support both kinds on either endianness, if we check
-for the appropriate shufflevector pattern for each case ... this would cause
-some additional shufflevectors to be recognized and implemented via the
-"swapped" form.
-
-//===----------------------------------------------------------------------===//
-
-There is a utility program called PerfectShuffle that generates a table of the
-shortest instruction sequence for implementing a shufflevector operation on
-PowerPC. However, this was designed for big-endian code generation. We could
-modify this program to create a little endian version of the table. The table
-is used in PPCISelLowering.cpp, PPCTargetLowering::LOWERVECTOR_SHUFFLE().
-
-//===----------------------------------------------------------------------===//
-
-Opportunies to use instructions from PPCInstrVSX.td during code gen
- - Conversion instructions (Sections 7.6.1.5 and 7.6.1.6 of ISA 2.07)
- - Scalar comparisons (xscmpodp and xscmpudp)
- - Min and max (xsmaxdp, xsmindp, xvmaxdp, xvmindp, xvmaxsp, xvminsp)
-
-Related to this: we currently do not generate the lxvw4x instruction for either
-v4f32 or v4i32, probably because adding a dag pattern to the recognizer requires
-a single target type. This should probably be addressed in the PPCISelDAGToDAG logic.
-
-//===----------------------------------------------------------------------===//
-
-Currently EXTRACT_VECTOR_ELT and INSERT_VECTOR_ELT are type-legal only
-for v2f64 with VSX available. We should create custom lowering
-support for the other vector types. Without this support, we generate
-sequences with load-hit-store hazards.
-
-v4f32 can be supported with VSX by shifting the correct element into
-big-endian lane 0, using xscvspdpn to produce a double-precision
-representation of the single-precision value in big-endian
-double-precision lane 0, and reinterpreting lane 0 as an FPR or
-vector-scalar register.
-
-v2i64 can be supported with VSX and P8Vector in the same manner as
-v2f64, followed by a direct move to a GPR.
-
-v4i32 can be supported with VSX and P8Vector by shifting the correct
-element into big-endian lane 1, using a direct move to a GPR, and
-sign-extending the 32-bit result to 64 bits.
-
-v8i16 can be supported with VSX and P8Vector by shifting the correct
-element into big-endian lane 3, using a direct move to a GPR, and
-sign-extending the 16-bit result to 64 bits.
-
-v16i8 can be supported with VSX and P8Vector by shifting the correct
-element into big-endian lane 7, using a direct move to a GPR, and
-sign-extending the 8-bit result to 64 bits.
diff --git a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
deleted file mode 100644
index c9548c7fe0cd..000000000000
--- a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-add_llvm_library(LLVMPowerPCInfo
- PowerPCTargetInfo.cpp
- )
diff --git a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
deleted file mode 100644
index 410234686400..000000000000
--- a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-;===- ./lib/Target/PowerPC/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
-;
-; The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-; http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = PowerPCInfo
-parent = PowerPC
-required_libraries = Support
-add_to_library_groups = PowerPC