diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 17:58:59 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2019-08-20 17:58:59 +0000 |
| commit | 1a56a5ead7a2e84bee8240f5f6b033b5f1707154 (patch) | |
| tree | 2f526c9cfcb089e51c33d6e1f0d51b10bda34714 /lib/Target/PowerPC | |
| parent | d8e91e46262bc44006913e6796843909f1ac7bcd (diff) | |
Notes
Diffstat (limited to 'lib/Target/PowerPC')
| -rw-r--r-- | lib/Target/PowerPC/AsmParser/CMakeLists.txt | 3 | ||||
| -rw-r--r-- | lib/Target/PowerPC/AsmParser/LLVMBuild.txt | 23 | ||||
| -rw-r--r-- | lib/Target/PowerPC/CMakeLists.txt | 55 | ||||
| -rw-r--r-- | lib/Target/PowerPC/Disassembler/CMakeLists.txt | 3 | ||||
| -rw-r--r-- | lib/Target/PowerPC/Disassembler/LLVMBuild.txt | 23 | ||||
| -rw-r--r-- | lib/Target/PowerPC/InstPrinter/CMakeLists.txt | 3 | ||||
| -rw-r--r-- | lib/Target/PowerPC/InstPrinter/LLVMBuild.txt | 23 | ||||
| -rw-r--r-- | lib/Target/PowerPC/LLVMBuild.txt | 35 | ||||
| -rw-r--r-- | lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt | 10 | ||||
| -rw-r--r-- | lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt | 23 | ||||
| -rw-r--r-- | lib/Target/PowerPC/README.txt | 665 | ||||
| -rw-r--r-- | lib/Target/PowerPC/README_ALTIVEC.txt | 343 | ||||
| -rw-r--r-- | lib/Target/PowerPC/TargetInfo/CMakeLists.txt | 3 | ||||
| -rw-r--r-- | lib/Target/PowerPC/TargetInfo/LLVMBuild.txt | 23 |
14 files changed, 0 insertions, 1235 deletions
diff --git a/lib/Target/PowerPC/AsmParser/CMakeLists.txt b/lib/Target/PowerPC/AsmParser/CMakeLists.txt deleted file mode 100644 index 408858e424d5..000000000000 --- a/lib/Target/PowerPC/AsmParser/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_library(LLVMPowerPCAsmParser - PPCAsmParser.cpp - ) diff --git a/lib/Target/PowerPC/AsmParser/LLVMBuild.txt b/lib/Target/PowerPC/AsmParser/LLVMBuild.txt deleted file mode 100644 index 801f27bb7bc3..000000000000 --- a/lib/Target/PowerPC/AsmParser/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/PowerPC/AsmParser/LLVMBuild.txt -------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = PowerPCAsmParser -parent = PowerPC -required_libraries = MC MCParser PowerPCDesc PowerPCInfo Support -add_to_library_groups = PowerPC diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt deleted file mode 100644 index 3130d10fa5ed..000000000000 --- a/lib/Target/PowerPC/CMakeLists.txt +++ /dev/null @@ -1,55 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS PPC.td) - -tablegen(LLVM PPCGenAsmMatcher.inc -gen-asm-matcher) -tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer) -tablegen(LLVM PPCGenCallingConv.inc -gen-callingconv) -tablegen(LLVM PPCGenDAGISel.inc -gen-dag-isel) -tablegen(LLVM PPCGenDisassemblerTables.inc -gen-disassembler) -tablegen(LLVM PPCGenFastISel.inc -gen-fast-isel) -tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info) -tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter) -tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info) -tablegen(LLVM PPCGenSubtargetInfo.inc -gen-subtarget) -tablegen(LLVM PPCGenExegesis.inc -gen-exegesis) - -add_public_tablegen_target(PowerPCCommonTableGen) - -add_llvm_target(PowerPCCodeGen - PPCBoolRetToInt.cpp - PPCAsmPrinter.cpp - PPCBranchSelector.cpp - PPCBranchCoalescing.cpp - PPCCCState.cpp - PPCCTRLoops.cpp - PPCHazardRecognizers.cpp - PPCInstrInfo.cpp - PPCISelDAGToDAG.cpp - PPCISelLowering.cpp - PPCEarlyReturn.cpp - PPCFastISel.cpp - PPCFrameLowering.cpp - PPCLoopPreIncPrep.cpp - PPCMCInstLower.cpp - PPCMachineFunctionInfo.cpp - PPCMIPeephole.cpp - PPCRegisterInfo.cpp - PPCQPXLoadSplat.cpp - PPCSubtarget.cpp - PPCTargetMachine.cpp - PPCTargetObjectFile.cpp - PPCTargetTransformInfo.cpp - PPCTOCRegDeps.cpp - PPCTLSDynamicCall.cpp - PPCVSXCopy.cpp - PPCReduceCRLogicals.cpp - PPCVSXFMAMutate.cpp - PPCVSXSwapRemoval.cpp - PPCExpandISEL.cpp - PPCPreEmitPeephole.cpp - ) - -add_subdirectory(AsmParser) -add_subdirectory(Disassembler) -add_subdirectory(InstPrinter) -add_subdirectory(MCTargetDesc) -add_subdirectory(TargetInfo) diff --git a/lib/Target/PowerPC/Disassembler/CMakeLists.txt b/lib/Target/PowerPC/Disassembler/CMakeLists.txt deleted file mode 100644 index ca457df88d3e..000000000000 --- a/lib/Target/PowerPC/Disassembler/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_library(LLVMPowerPCDisassembler - PPCDisassembler.cpp - ) diff --git a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt deleted file mode 100644 index ea3e7eaf839d..000000000000 --- a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===-- ./lib/Target/PowerPC/Disassembler/LLVMBuild.txt ---------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = PowerPCDisassembler -parent = PowerPC -required_libraries = MCDisassembler PowerPCInfo Support -add_to_library_groups = PowerPC diff --git a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt deleted file mode 100644 index ab30a110f40e..000000000000 --- a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_library(LLVMPowerPCAsmPrinter - PPCInstPrinter.cpp - ) diff --git a/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt b/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt deleted file mode 100644 index 7c691deafccf..000000000000 --- a/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/PowerPC/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = PowerPCAsmPrinter -parent = PowerPC -required_libraries = MC Support -add_to_library_groups = PowerPC diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt deleted file mode 100644 index fd5fa560912f..000000000000 --- a/lib/Target/PowerPC/LLVMBuild.txt +++ /dev/null @@ -1,35 +0,0 @@ -;===- ./lib/Target/PowerPC/LLVMBuild.txt -----------------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[common] -subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo - -[component_0] -type = TargetGroup -name = PowerPC -parent = Target -has_asmparser = 1 -has_asmprinter = 1 -has_disassembler = 1 -has_jit = 1 - -[component_1] -type = Library -name = PowerPCCodeGen -parent = PowerPC -required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo Scalar SelectionDAG Support Target TransformUtils -add_to_library_groups = PowerPC diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 3cea65ee4de6..000000000000 --- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -add_llvm_library(LLVMPowerPCDesc - PPCAsmBackend.cpp - PPCMCTargetDesc.cpp - PPCMCAsmInfo.cpp - PPCMCCodeEmitter.cpp - PPCMCExpr.cpp - PPCPredicates.cpp - PPCMachObjectWriter.cpp - PPCELFObjectWriter.cpp - ) diff --git a/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt b/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt deleted file mode 100644 index d3a567d1581d..000000000000 --- a/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = PowerPCDesc -parent = PowerPC -required_libraries = MC PowerPCAsmPrinter PowerPCInfo Support -add_to_library_groups = PowerPC diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt deleted file mode 100644 index b4bf635dc2c7..000000000000 --- a/lib/Target/PowerPC/README.txt +++ /dev/null @@ -1,665 +0,0 @@ -//===- README.txt - Notes for improving PowerPC-specific code gen ---------===// - -TODO: -* lmw/stmw pass a la arm load store optimizer for prolog/epilog - -===-------------------------------------------------------------------------=== - -This code: - -unsigned add32carry(unsigned sum, unsigned x) { - unsigned z = sum + x; - if (sum + x < x) - z++; - return z; -} - -Should compile to something like: - - addc r3,r3,r4 - addze r3,r3 - -instead we get: - - add r3, r4, r3 - cmplw cr7, r3, r4 - mfcr r4 ; 1 - rlwinm r4, r4, 29, 31, 31 - add r3, r3, r4 - -Ick. - -===-------------------------------------------------------------------------=== - -We compile the hottest inner loop of viterbi to: - - li r6, 0 - b LBB1_84 ;bb432.i -LBB1_83: ;bb420.i - lbzx r8, r5, r7 - addi r6, r7, 1 - stbx r8, r4, r7 -LBB1_84: ;bb432.i - mr r7, r6 - cmplwi cr0, r7, 143 - bne cr0, LBB1_83 ;bb420.i - -The CBE manages to produce: - - li r0, 143 - mtctr r0 -loop: - lbzx r2, r2, r11 - stbx r0, r2, r9 - addi r2, r2, 1 - bdz later - b loop - -This could be much better (bdnz instead of bdz) but it still beats us. If we -produced this with bdnz, the loop would be a single dispatch group. - -===-------------------------------------------------------------------------=== - -Lump the constant pool for each function into ONE pic object, and reference -pieces of it as offsets from the start. For functions like this (contrived -to have lots of constants obviously): - -double X(double Y) { return (Y*1.23 + 4.512)*2.34 + 14.38; } - -We generate: - -_X: - lis r2, ha16(.CPI_X_0) - lfd f0, lo16(.CPI_X_0)(r2) - lis r2, ha16(.CPI_X_1) - lfd f2, lo16(.CPI_X_1)(r2) - fmadd f0, f1, f0, f2 - lis r2, ha16(.CPI_X_2) - lfd f1, lo16(.CPI_X_2)(r2) - lis r2, ha16(.CPI_X_3) - lfd f2, lo16(.CPI_X_3)(r2) - fmadd f1, f0, f1, f2 - blr - -It would be better to materialize .CPI_X into a register, then use immediates -off of the register to avoid the lis's. This is even more important in PIC -mode. - -Note that this (and the static variable version) is discussed here for GCC: -http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html - -Here's another example (the sgn function): -double testf(double a) { - return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0); -} - -it produces a BB like this: -LBB1_1: ; cond_true - lis r2, ha16(LCPI1_0) - lfs f0, lo16(LCPI1_0)(r2) - lis r2, ha16(LCPI1_1) - lis r3, ha16(LCPI1_2) - lfs f2, lo16(LCPI1_2)(r3) - lfs f3, lo16(LCPI1_1)(r2) - fsub f0, f0, f1 - fsel f1, f0, f2, f3 - blr - -===-------------------------------------------------------------------------=== - -PIC Code Gen IPO optimization: - -Squish small scalar globals together into a single global struct, allowing the -address of the struct to be CSE'd, avoiding PIC accesses (also reduces the size -of the GOT on targets with one). - -Note that this is discussed here for GCC: -http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html - -===-------------------------------------------------------------------------=== - -Darwin Stub removal: - -We still generate calls to foo$stub, and stubs, on Darwin. This is not -necessary when building with the Leopard (10.5) or later linker, as stubs are -generated by ld when necessary. Parameterizing this based on the deployment -target (-mmacosx-version-min) is probably enough. x86-32 does this right, see -its logic. - -===-------------------------------------------------------------------------=== - -Darwin Stub LICM optimization: - -Loops like this: - - for (...) bar(); - -Have to go through an indirect stub if bar is external or linkonce. It would -be better to compile it as: - - fp = &bar; - for (...) fp(); - -which only computes the address of bar once (instead of each time through the -stub). This is Darwin specific and would have to be done in the code generator. -Probably not a win on x86. - -===-------------------------------------------------------------------------=== - -Simple IPO for argument passing, change: - void foo(int X, double Y, int Z) -> void foo(int X, int Z, double Y) - -the Darwin ABI specifies that any integer arguments in the first 32 bytes worth -of arguments get assigned to r3 through r10. That is, if you have a function -foo(int, double, int) you get r3, f1, r6, since the 64 bit double ate up the -argument bytes for r4 and r5. The trick then would be to shuffle the argument -order for functions we can internalize so that the maximum number of -integers/pointers get passed in regs before you see any of the fp arguments. - -Instead of implementing this, it would actually probably be easier to just -implement a PPC fastcc, where we could do whatever we wanted to the CC, -including having this work sanely. - -===-------------------------------------------------------------------------=== - -Fix Darwin FP-In-Integer Registers ABI - -Darwin passes doubles in structures in integer registers, which is very very -bad. Add something like a BITCAST to LLVM, then do an i-p transformation that -percolates these things out of functions. - -Check out how horrible this is: -http://gcc.gnu.org/ml/gcc/2005-10/msg01036.html - -This is an extension of "interprocedural CC unmunging" that can't be done with -just fastcc. - -===-------------------------------------------------------------------------=== - -Fold add and sub with constant into non-extern, non-weak addresses so this: - -static int a; -void bar(int b) { a = b; } -void foo(unsigned char *c) { - *c = a; -} - -So that - -_foo: - lis r2, ha16(_a) - la r2, lo16(_a)(r2) - lbz r2, 3(r2) - stb r2, 0(r3) - blr - -Becomes - -_foo: - lis r2, ha16(_a+3) - lbz r2, lo16(_a+3)(r2) - stb r2, 0(r3) - blr - -===-------------------------------------------------------------------------=== - -We should compile these two functions to the same thing: - -#include <stdlib.h> -void f(int a, int b, int *P) { - *P = (a-b)>=0?(a-b):(b-a); -} -void g(int a, int b, int *P) { - *P = abs(a-b); -} - -Further, they should compile to something better than: - -_g: - subf r2, r4, r3 - subfic r3, r2, 0 - cmpwi cr0, r2, -1 - bgt cr0, LBB2_2 ; entry -LBB2_1: ; entry - mr r2, r3 -LBB2_2: ; entry - stw r2, 0(r5) - blr - -GCC produces: - -_g: - subf r4,r4,r3 - srawi r2,r4,31 - xor r0,r2,r4 - subf r0,r2,r0 - stw r0,0(r5) - blr - -... which is much nicer. - -This theoretically may help improve twolf slightly (used in dimbox.c:142?). - -===-------------------------------------------------------------------------=== - -PR5945: This: -define i32 @clamp0g(i32 %a) { -entry: - %cmp = icmp slt i32 %a, 0 - %sel = select i1 %cmp, i32 0, i32 %a - ret i32 %sel -} - -Is compile to this with the PowerPC (32-bit) backend: - -_clamp0g: - cmpwi cr0, r3, 0 - li r2, 0 - blt cr0, LBB1_2 -; %bb.1: ; %entry - mr r2, r3 -LBB1_2: ; %entry - mr r3, r2 - blr - -This could be reduced to the much simpler: - -_clamp0g: - srawi r2, r3, 31 - andc r3, r3, r2 - blr - -===-------------------------------------------------------------------------=== - -int foo(int N, int ***W, int **TK, int X) { - int t, i; - - for (t = 0; t < N; ++t) - for (i = 0; i < 4; ++i) - W[t / X][i][t % X] = TK[i][t]; - - return 5; -} - -We generate relatively atrocious code for this loop compared to gcc. - -We could also strength reduce the rem and the div: -http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf - -===-------------------------------------------------------------------------=== - -We generate ugly code for this: - -void func(unsigned int *ret, float dx, float dy, float dz, float dw) { - unsigned code = 0; - if(dx < -dw) code |= 1; - if(dx > dw) code |= 2; - if(dy < -dw) code |= 4; - if(dy > dw) code |= 8; - if(dz < -dw) code |= 16; - if(dz > dw) code |= 32; - *ret = code; -} - -===-------------------------------------------------------------------------=== - -%struct.B = type { i8, [3 x i8] } - -define void @bar(%struct.B* %b) { -entry: - %tmp = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1] - %tmp = load i32* %tmp ; <uint> [#uses=1] - %tmp3 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=1] - %tmp4 = load i32* %tmp3 ; <uint> [#uses=1] - %tmp8 = bitcast %struct.B* %b to i32* ; <uint*> [#uses=2] - %tmp9 = load i32* %tmp8 ; <uint> [#uses=1] - %tmp4.mask17 = shl i32 %tmp4, i8 1 ; <uint> [#uses=1] - %tmp1415 = and i32 %tmp4.mask17, 2147483648 ; <uint> [#uses=1] - %tmp.masked = and i32 %tmp, 2147483648 ; <uint> [#uses=1] - %tmp11 = or i32 %tmp1415, %tmp.masked ; <uint> [#uses=1] - %tmp12 = and i32 %tmp9, 2147483647 ; <uint> [#uses=1] - %tmp13 = or i32 %tmp12, %tmp11 ; <uint> [#uses=1] - store i32 %tmp13, i32* %tmp8 - ret void -} - -We emit: - -_foo: - lwz r2, 0(r3) - slwi r4, r2, 1 - or r4, r4, r2 - rlwimi r2, r4, 0, 0, 0 - stw r2, 0(r3) - blr - -We could collapse a bunch of those ORs and ANDs and generate the following -equivalent code: - -_foo: - lwz r2, 0(r3) - rlwinm r4, r2, 1, 0, 0 - or r2, r2, r4 - stw r2, 0(r3) - blr - -===-------------------------------------------------------------------------=== - -Consider a function like this: - -float foo(float X) { return X + 1234.4123f; } - -The FP constant ends up in the constant pool, so we need to get the LR register. - This ends up producing code like this: - -_foo: -.LBB_foo_0: ; entry - mflr r11 -*** stw r11, 8(r1) - bl "L00000$pb" -"L00000$pb": - mflr r2 - addis r2, r2, ha16(.CPI_foo_0-"L00000$pb") - lfs f0, lo16(.CPI_foo_0-"L00000$pb")(r2) - fadds f1, f1, f0 -*** lwz r11, 8(r1) - mtlr r11 - blr - -This is functional, but there is no reason to spill the LR register all the way -to the stack (the two marked instrs): spilling it to a GPR is quite enough. - -Implementing this will require some codegen improvements. Nate writes: - -"So basically what we need to support the "no stack frame save and restore" is a -generalization of the LR optimization to "callee-save regs". - -Currently, we have LR marked as a callee-save reg. The register allocator sees -that it's callee save, and spills it directly to the stack. - -Ideally, something like this would happen: - -LR would be in a separate register class from the GPRs. The class of LR would be -marked "unspillable". When the register allocator came across an unspillable -reg, it would ask "what is the best class to copy this into that I *can* spill" -If it gets a class back, which it will in this case (the gprs), it grabs a free -register of that class. If it is then later necessary to spill that reg, so be -it. - -===-------------------------------------------------------------------------=== - -We compile this: -int test(_Bool X) { - return X ? 524288 : 0; -} - -to: -_test: - cmplwi cr0, r3, 0 - lis r2, 8 - li r3, 0 - beq cr0, LBB1_2 ;entry -LBB1_1: ;entry - mr r3, r2 -LBB1_2: ;entry - blr - -instead of: -_test: - addic r2,r3,-1 - subfe r0,r2,r3 - slwi r3,r0,19 - blr - -This sort of thing occurs a lot due to globalopt. - -===-------------------------------------------------------------------------=== - -We compile: - -define i32 @bar(i32 %x) nounwind readnone ssp { -entry: - %0 = icmp eq i32 %x, 0 ; <i1> [#uses=1] - %neg = sext i1 %0 to i32 ; <i32> [#uses=1] - ret i32 %neg -} - -to: - -_bar: - cntlzw r2, r3 - slwi r2, r2, 26 - srawi r3, r2, 31 - blr - -it would be better to produce: - -_bar: - addic r3,r3,-1 - subfe r3,r3,r3 - blr - -===-------------------------------------------------------------------------=== - -We generate horrible ppc code for this: - -#define N 2000000 -double a[N],c[N]; -void simpleloop() { - int j; - for (j=0; j<N; j++) - c[j] = a[j]; -} - -LBB1_1: ;bb - lfdx f0, r3, r4 - addi r5, r5, 1 ;; Extra IV for the exit value compare. - stfdx f0, r2, r4 - addi r4, r4, 8 - - xoris r6, r5, 30 ;; This is due to a large immediate. - cmplwi cr0, r6, 33920 - bne cr0, LBB1_1 - -//===---------------------------------------------------------------------===// - -This: - #include <algorithm> - inline std::pair<unsigned, bool> full_add(unsigned a, unsigned b) - { return std::make_pair(a + b, a + b < a); } - bool no_overflow(unsigned a, unsigned b) - { return !full_add(a, b).second; } - -Should compile to: - -__Z11no_overflowjj: - add r4,r3,r4 - subfc r3,r3,r4 - li r3,0 - adde r3,r3,r3 - blr - -(or better) not: - -__Z11no_overflowjj: - add r2, r4, r3 - cmplw cr7, r2, r3 - mfcr r2 - rlwinm r2, r2, 29, 31, 31 - xori r3, r2, 1 - blr - -//===---------------------------------------------------------------------===// - -We compile some FP comparisons into an mfcr with two rlwinms and an or. For -example: -#include <math.h> -int test(double x, double y) { return islessequal(x, y);} -int test2(double x, double y) { return islessgreater(x, y);} -int test3(double x, double y) { return !islessequal(x, y);} - -Compiles into (all three are similar, but the bits differ): - -_test: - fcmpu cr7, f1, f2 - mfcr r2 - rlwinm r3, r2, 29, 31, 31 - rlwinm r2, r2, 31, 31, 31 - or r3, r2, r3 - blr - -GCC compiles this into: - - _test: - fcmpu cr7,f1,f2 - cror 30,28,30 - mfcr r3 - rlwinm r3,r3,31,1 - blr - -which is more efficient and can use mfocr. See PR642 for some more context. - -//===---------------------------------------------------------------------===// - -void foo(float *data, float d) { - long i; - for (i = 0; i < 8000; i++) - data[i] = d; -} -void foo2(float *data, float d) { - long i; - data--; - for (i = 0; i < 8000; i++) { - data[1] = d; - data++; - } -} - -These compile to: - -_foo: - li r2, 0 -LBB1_1: ; bb - addi r4, r2, 4 - stfsx f1, r3, r2 - cmplwi cr0, r4, 32000 - mr r2, r4 - bne cr0, LBB1_1 ; bb - blr -_foo2: - li r2, 0 -LBB2_1: ; bb - addi r4, r2, 4 - stfsx f1, r3, r2 - cmplwi cr0, r4, 32000 - mr r2, r4 - bne cr0, LBB2_1 ; bb - blr - -The 'mr' could be eliminated to folding the add into the cmp better. - -//===---------------------------------------------------------------------===// -Codegen for the following (low-probability) case deteriorated considerably -when the correctness fixes for unordered comparisons went in (PR 642, 58871). -It should be possible to recover the code quality described in the comments. - -; RUN: llvm-as < %s | llc -march=ppc32 | grep or | count 3 -; This should produce one 'or' or 'cror' instruction per function. - -; RUN: llvm-as < %s | llc -march=ppc32 | grep mfcr | count 3 -; PR2964 - -define i32 @test(double %x, double %y) nounwind { -entry: - %tmp3 = fcmp ole double %x, %y ; <i1> [#uses=1] - %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] - ret i32 %tmp345 -} - -define i32 @test2(double %x, double %y) nounwind { -entry: - %tmp3 = fcmp one double %x, %y ; <i1> [#uses=1] - %tmp345 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] - ret i32 %tmp345 -} - -define i32 @test3(double %x, double %y) nounwind { -entry: - %tmp3 = fcmp ugt double %x, %y ; <i1> [#uses=1] - %tmp34 = zext i1 %tmp3 to i32 ; <i32> [#uses=1] - ret i32 %tmp34 -} - -//===---------------------------------------------------------------------===// -for the following code: - -void foo (float *__restrict__ a, int *__restrict__ b, int n) { - a[n] = b[n] * 2.321; -} - -we load b[n] to GPR, then move it VSX register and convert it float. We should -use vsx scalar integer load instructions to avoid direct moves - -//===----------------------------------------------------------------------===// -; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg - -; This could generate FSEL with appropriate flags (FSEL is not IEEE-safe, and -; should not be generated except with -enable-finite-only-fp-math or the like). -; With the correctness fixes for PR642 (58871) LowerSELECT_CC would need to -; recognize a more elaborate tree than a simple SETxx. - -define double @test_FNEG_sel(double %A, double %B, double %C) { - %D = fsub double -0.000000e+00, %A ; <double> [#uses=1] - %Cond = fcmp ugt double %D, -0.000000e+00 ; <i1> [#uses=1] - %E = select i1 %Cond, double %B, double %C ; <double> [#uses=1] - ret double %E -} - -//===----------------------------------------------------------------------===// -The save/restore sequence for CR in prolog/epilog is terrible: -- Each CR subreg is saved individually, rather than doing one save as a unit. -- On Darwin, the save is done after the decrement of SP, which means the offset -from SP of the save slot can be too big for a store instruction, which means we -need an additional register (currently hacked in 96015+96020; the solution there -is correct, but poor). -- On SVR4 the same thing can happen, and I don't think saving before the SP -decrement is safe on that target, as there is no red zone. This is currently -broken AFAIK, although it's not a target I can exercise. -The following demonstrates the problem: -extern void bar(char *p); -void foo() { - char x[100000]; - bar(x); - __asm__("" ::: "cr2"); -} - -//===-------------------------------------------------------------------------=== -Naming convention for instruction formats is very haphazard. -We have agreed on a naming scheme as follows: - -<INST_form>{_<OP_type><OP_len>}+ - -Where: -INST_form is the instruction format (X-form, etc.) -OP_type is the operand type - one of OPC (opcode), RD (register destination), - RS (register source), - RDp (destination register pair), - RSp (source register pair), IM (immediate), - XO (extended opcode) -OP_len is the length of the operand in bits - -VSX register operands would be of length 6 (split across two fields), -condition register fields of length 3. -We would not need denote reserved fields in names of instruction formats. - -//===----------------------------------------------------------------------===// - -Instruction fusion was introduced in ISA 2.06 and more opportunities added in -ISA 2.07. LLVM needs to add infrastructure to recognize fusion opportunities -and force instruction pairs to be scheduled together. - ------------------------------------------------------------------------------ - -More general handling of any_extend and zero_extend: - -See https://reviews.llvm.org/D24924#555306 diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt deleted file mode 100644 index c38e01923161..000000000000 --- a/lib/Target/PowerPC/README_ALTIVEC.txt +++ /dev/null @@ -1,343 +0,0 @@ -//===- README_ALTIVEC.txt - Notes for improving Altivec code gen ----------===// - -Implement PPCInstrInfo::isLoadFromStackSlot/isStoreToStackSlot for vector -registers, to generate better spill code. - -//===----------------------------------------------------------------------===// - -The first should be a single lvx from the constant pool, the second should be -a xor/stvx: - -void foo(void) { - int x[8] __attribute__((aligned(128))) = { 1, 1, 1, 17, 1, 1, 1, 1 }; - bar (x); -} - -#include <string.h> -void foo(void) { - int x[8] __attribute__((aligned(128))); - memset (x, 0, sizeof (x)); - bar (x); -} - -//===----------------------------------------------------------------------===// - -Altivec: Codegen'ing MUL with vector FMADD should add -0.0, not 0.0: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8763 - -When -ffast-math is on, we can use 0.0. - -//===----------------------------------------------------------------------===// - - Consider this: - v4f32 Vector; - v4f32 Vector2 = { Vector.X, Vector.X, Vector.X, Vector.X }; - -Since we know that "Vector" is 16-byte aligned and we know the element offset -of ".X", we should change the load into a lve*x instruction, instead of doing -a load/store/lve*x sequence. - -//===----------------------------------------------------------------------===// - -For functions that use altivec AND have calls, we are VRSAVE'ing all call -clobbered regs. - -//===----------------------------------------------------------------------===// - -Implement passing vectors by value into calls and receiving them as arguments. - -//===----------------------------------------------------------------------===// - -GCC apparently tries to codegen { C1, C2, Variable, C3 } as a constant pool load -of C1/C2/C3, then a load and vperm of Variable. - -//===----------------------------------------------------------------------===// - -We need a way to teach tblgen that some operands of an intrinsic are required to -be constants. The verifier should enforce this constraint. - -//===----------------------------------------------------------------------===// - -We currently codegen SCALAR_TO_VECTOR as a store of the scalar to a 16-byte -aligned stack slot, followed by a load/vperm. We should probably just store it -to a scalar stack slot, then use lvsl/vperm to load it. If the value is already -in memory this is a big win. - -//===----------------------------------------------------------------------===// - -extract_vector_elt of an arbitrary constant vector can be done with the -following instructions: - -vTemp = vec_splat(v0,2); // 2 is the element the src is in. -vec_ste(&destloc,0,vTemp); - -We can do an arbitrary non-constant value by using lvsr/perm/ste. - -//===----------------------------------------------------------------------===// - -If we want to tie instruction selection into the scheduler, we can do some -constant formation with different instructions. For example, we can generate -"vsplti -1" with "vcmpequw R,R" and 1,1,1,1 with "vsubcuw R,R", and 0,0,0,0 with -"vsplti 0" or "vxor", each of which use different execution units, thus could -help scheduling. - -This is probably only reasonable for a post-pass scheduler. - -//===----------------------------------------------------------------------===// - -For this function: - -void test(vector float *A, vector float *B) { - vector float C = (vector float)vec_cmpeq(*A, *B); - if (!vec_any_eq(*A, *B)) - *B = (vector float){0,0,0,0}; - *A = C; -} - -we get the following basic block: - - ... - lvx v2, 0, r4 - lvx v3, 0, r3 - vcmpeqfp v4, v3, v2 - vcmpeqfp. v2, v3, v2 - bne cr6, LBB1_2 ; cond_next - -The vcmpeqfp/vcmpeqfp. instructions currently cannot be merged when the -vcmpeqfp. result is used by a branch. This can be improved. - -//===----------------------------------------------------------------------===// - -The code generated for this is truly aweful: - -vector float test(float a, float b) { - return (vector float){ 0.0, a, 0.0, 0.0}; -} - -LCPI1_0: ; float - .space 4 - .text - .globl _test - .align 4 -_test: - mfspr r2, 256 - oris r3, r2, 4096 - mtspr 256, r3 - lis r3, ha16(LCPI1_0) - addi r4, r1, -32 - stfs f1, -16(r1) - addi r5, r1, -16 - lfs f0, lo16(LCPI1_0)(r3) - stfs f0, -32(r1) - lvx v2, 0, r4 - lvx v3, 0, r5 - vmrghw v3, v3, v2 - vspltw v2, v2, 0 - vmrghw v2, v2, v3 - mtspr 256, r2 - blr - -//===----------------------------------------------------------------------===// - -int foo(vector float *x, vector float *y) { - if (vec_all_eq(*x,*y)) return 3245; - else return 12; -} - -A predicate compare being used in a select_cc should have the same peephole -applied to it as a predicate compare used by a br_cc. There should be no -mfcr here: - -_foo: - mfspr r2, 256 - oris r5, r2, 12288 - mtspr 256, r5 - li r5, 12 - li r6, 3245 - lvx v2, 0, r4 - lvx v3, 0, r3 - vcmpeqfp. v2, v3, v2 - mfcr r3, 2 - rlwinm r3, r3, 25, 31, 31 - cmpwi cr0, r3, 0 - bne cr0, LBB1_2 ; entry -LBB1_1: ; entry - mr r6, r5 -LBB1_2: ; entry - mr r3, r6 - mtspr 256, r2 - blr - -//===----------------------------------------------------------------------===// - -CodeGen/PowerPC/vec_constants.ll has an and operation that should be -codegen'd to andc. The issue is that the 'all ones' build vector is -SelectNodeTo'd a VSPLTISB instruction node before the and/xor is selected -which prevents the vnot pattern from matching. - - -//===----------------------------------------------------------------------===// - -An alternative to the store/store/load approach for illegal insert element -lowering would be: - -1. store element to any ol' slot -2. lvx the slot -3. lvsl 0; splat index; vcmpeq to generate a select mask -4. lvsl slot + x; vperm to rotate result into correct slot -5. vsel result together. - -//===----------------------------------------------------------------------===// - -Should codegen branches on vec_any/vec_all to avoid mfcr. Two examples: - -#include <altivec.h> - int f(vector float a, vector float b) - { - int aa = 0; - if (vec_all_ge(a, b)) - aa |= 0x1; - if (vec_any_ge(a,b)) - aa |= 0x2; - return aa; -} - -vector float f(vector float a, vector float b) { - if (vec_any_eq(a, b)) - return a; - else - return b; -} - -//===----------------------------------------------------------------------===// - -We should do a little better with eliminating dead stores. -The stores to the stack are dead since %a and %b are not needed - -; Function Attrs: nounwind -define <16 x i8> @test_vpmsumb() #0 { - entry: - %a = alloca <16 x i8>, align 16 - %b = alloca <16 x i8>, align 16 - store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16 - store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16 - %0 = load <16 x i8>* %a, align 16 - %1 = load <16 x i8>* %b, align 16 - %2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1) - ret <16 x i8> %2 -} - - -; Function Attrs: nounwind readnone -declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1 - - -Produces the following code with -mtriple=powerpc64-unknown-linux-gnu: -# %bb.0: # %entry - addis 3, 2, .LCPI0_0@toc@ha - addis 4, 2, .LCPI0_1@toc@ha - addi 3, 3, .LCPI0_0@toc@l - addi 4, 4, .LCPI0_1@toc@l - lxvw4x 0, 0, 3 - addi 3, 1, -16 - lxvw4x 35, 0, 4 - stxvw4x 0, 0, 3 - ori 2, 2, 0 - lxvw4x 34, 0, 3 - addi 3, 1, -32 - stxvw4x 35, 0, 3 - vpmsumb 2, 2, 3 - blr - .long 0 - .quad 0 - -The two stxvw4x instructions are not needed. -With -mtriple=powerpc64le-unknown-linux-gnu, the associated permutes -are present too. - -//===----------------------------------------------------------------------===// - -The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll: - -define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind { - %tmpvec = insertelement <2 x i64> <i64 0, i64 0>, i64 %val, i32 0 - %tmpvec2 = insertelement <2 x i64> %tmpvec, i64 %val, i32 1 - %result = add <2 x i64> %x, %tmpvec2 - ret <2 x i64> %result - -This will generate the following instruction sequence: - std 5, -8(1) - std 5, -16(1) - addi 3, 1, -16 - ori 2, 2, 0 - lxvd2x 35, 0, 3 - vaddudm 2, 2, 3 - blr - -This will almost certainly cause a load-hit-store hazard. -Since val is a value parameter, it should not need to be saved onto -the stack, unless it's being done set up the vector register. Instead, -it would be better to splat the value into a vector register, and then -remove the (dead) stores to the stack. - -//===----------------------------------------------------------------------===// - -At the moment we always generate a lxsdx in preference to lfd, or stxsdx in -preference to stfd. When we have a reg-immediate addressing mode, this is a -poor choice, since we have to load the address into an index register. This -should be fixed for P7/P8. - -//===----------------------------------------------------------------------===// - -Right now, ShuffleKind 0 is supported only on BE, and ShuffleKind 2 only on LE. -However, we could actually support both kinds on either endianness, if we check -for the appropriate shufflevector pattern for each case ... this would cause -some additional shufflevectors to be recognized and implemented via the -"swapped" form. - -//===----------------------------------------------------------------------===// - -There is a utility program called PerfectShuffle that generates a table of the -shortest instruction sequence for implementing a shufflevector operation on -PowerPC. However, this was designed for big-endian code generation. We could -modify this program to create a little endian version of the table. The table -is used in PPCISelLowering.cpp, PPCTargetLowering::LOWERVECTOR_SHUFFLE(). - -//===----------------------------------------------------------------------===// - -Opportunies to use instructions from PPCInstrVSX.td during code gen - - Conversion instructions (Sections 7.6.1.5 and 7.6.1.6 of ISA 2.07) - - Scalar comparisons (xscmpodp and xscmpudp) - - Min and max (xsmaxdp, xsmindp, xvmaxdp, xvmindp, xvmaxsp, xvminsp) - -Related to this: we currently do not generate the lxvw4x instruction for either -v4f32 or v4i32, probably because adding a dag pattern to the recognizer requires -a single target type. This should probably be addressed in the PPCISelDAGToDAG logic. - -//===----------------------------------------------------------------------===// - -Currently EXTRACT_VECTOR_ELT and INSERT_VECTOR_ELT are type-legal only -for v2f64 with VSX available. We should create custom lowering -support for the other vector types. Without this support, we generate -sequences with load-hit-store hazards. - -v4f32 can be supported with VSX by shifting the correct element into -big-endian lane 0, using xscvspdpn to produce a double-precision -representation of the single-precision value in big-endian -double-precision lane 0, and reinterpreting lane 0 as an FPR or -vector-scalar register. - -v2i64 can be supported with VSX and P8Vector in the same manner as -v2f64, followed by a direct move to a GPR. - -v4i32 can be supported with VSX and P8Vector by shifting the correct -element into big-endian lane 1, using a direct move to a GPR, and -sign-extending the 32-bit result to 64 bits. - -v8i16 can be supported with VSX and P8Vector by shifting the correct -element into big-endian lane 3, using a direct move to a GPR, and -sign-extending the 16-bit result to 64 bits. - -v16i8 can be supported with VSX and P8Vector by shifting the correct -element into big-endian lane 7, using a direct move to a GPR, and -sign-extending the 8-bit result to 64 bits. diff --git a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt deleted file mode 100644 index c9548c7fe0cd..000000000000 --- a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_llvm_library(LLVMPowerPCInfo - PowerPCTargetInfo.cpp - ) diff --git a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt deleted file mode 100644 index 410234686400..000000000000 --- a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt +++ /dev/null @@ -1,23 +0,0 @@ -;===- ./lib/Target/PowerPC/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = PowerPCInfo -parent = PowerPC -required_libraries = Support -add_to_library_groups = PowerPC |
