src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2014-11-24 09:08:18 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2014-11-24 09:08:18 +0000
commit	5ca98fd98791947eba83a1ed3f2c8191ef7afa6c (patch)
tree	f5944309621cee4fe0976be6f9ac619b7ebfc4c2 /lib/Target/PowerPC/PPCISelLowering.cpp
parent	68bcb7db193e4bc81430063148253d30a791023e (diff)

vendor/llvm/llvm-release_350-r216957

Notes

Diffstat (limited to 'lib/Target/PowerPC/PPCISelLowering.cpp')

-rw-r--r--

lib/Target/PowerPC/PPCISelLowering.cpp

2699

1 files changed, 2019 insertions, 680 deletions

diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 25a7ca7f59a7..708d36f6f978 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp

@@ -18,6 +18,8 @@

#include "PPCTargetMachine.h"

#include "PPCTargetObjectFile.h"

#include "llvm/ADT/STLExtras.h"

+#include "llvm/ADT/StringSwitch.h"

+#include "llvm/ADT/Triple.h"

#include "llvm/CodeGen/CallingConvLower.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

#include "llvm/CodeGen/MachineFunction.h"

@@ -46,20 +48,21 @@ cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hi

static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",

cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);

-static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {

- if (TM.getSubtargetImpl()->isDarwin())

- return new TargetLoweringObjectFileMachO();

+// FIXME: Remove this once the bug has been fixed!

+extern cl::opt<bool> ANDIGlueBug;

- if (TM.getSubtargetImpl()->isSVR4ABI())

- return new PPC64LinuxTargetObjectFile();

+static TargetLoweringObjectFile *createTLOF(const Triple &TT) {

+ // If it isn't a Mach-O file then it's going to be a linux ELF

+ // object file.

+ if (TT.isOSDarwin())

+ return new TargetLoweringObjectFileMachO();

- return new TargetLoweringObjectFileELF();

+ return new PPC64LinuxTargetObjectFile();

}

PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

- : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {

- const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();

+ : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))),

+ Subtarget(*TM.getSubtargetImpl()) {

setPow2DivIsCheap();

// Use _setjmp/_longjmp instead of setjmp/longjmp.

@@ -68,7 +71,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all

// arguments are at least 4/8 bytes aligned.

- bool isPPC64 = Subtarget->isPPC64();

+ bool isPPC64 = Subtarget.isPPC64();

setMinStackArgumentAlignment(isPPC64 ? 8:4);

// Set up the register classes.

@@ -94,6 +97,39 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);

setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);

+ if (Subtarget.useCRBits()) {

+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);

+ if (isPPC64 || Subtarget.hasFPCVT()) {

+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);

+ AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,

+ isPPC64 ? MVT::i64 : MVT::i32);

+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);

+ AddPromotedToType (ISD::UINT_TO_FP, MVT::i1,

+ isPPC64 ? MVT::i64 : MVT::i32);

+ } else {

+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);

+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);

+ }

+ // PowerPC does not support direct load / store of condition registers

+ setOperationAction(ISD::LOAD, MVT::i1, Custom);

+ setOperationAction(ISD::STORE, MVT::i1, Custom);

+ // FIXME: Remove this once the ANDI glue bug is fixed:

+ if (ANDIGlueBug)

+ setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);

+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);

+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);

+ setTruncStoreAction(MVT::i64, MVT::i1, Expand);

+ setTruncStoreAction(MVT::i32, MVT::i1, Expand);

+ setTruncStoreAction(MVT::i16, MVT::i1, Expand);

+ setTruncStoreAction(MVT::i8, MVT::i1, Expand);

+ addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);

+ }

// This is used in the ppcf128->int sequence. Note it has different semantics

// from FP_ROUND: that rounds to nearest, this rounds to zero.

setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);

@@ -139,17 +175,17 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);

// If we're enabling GP optimizations, use hardware square root

- if (!Subtarget->hasFSQRT() &&

+ if (!Subtarget.hasFSQRT() &&

!(TM.Options.UnsafeFPMath &&

- Subtarget->hasFRSQRTE() && Subtarget->hasFRE()))

+ Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))

setOperationAction(ISD::FSQRT, MVT::f64, Expand);

- if (!Subtarget->hasFSQRT() &&

+ if (!Subtarget.hasFSQRT() &&

!(TM.Options.UnsafeFPMath &&

- Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))

+ Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))

setOperationAction(ISD::FSQRT, MVT::f32, Expand);

- if (Subtarget->hasFCPSGN()) {

+ if (Subtarget.hasFCPSGN()) {

setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);

setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);

} else {

@@ -157,7 +193,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);

}

- if (Subtarget->hasFPRND()) {

+ if (Subtarget.hasFPRND()) {

setOperationAction(ISD::FFLOOR, MVT::f64, Legal);

setOperationAction(ISD::FCEIL, MVT::f64, Legal);

setOperationAction(ISD::FTRUNC, MVT::f64, Legal);

@@ -179,7 +215,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);

setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);

- if (Subtarget->hasPOPCNTD()) {

+ if (Subtarget.hasPOPCNTD()) {

setOperationAction(ISD::CTPOP, MVT::i32 , Legal);

setOperationAction(ISD::CTPOP, MVT::i64 , Legal);

} else {

@@ -191,21 +227,25 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

setOperationAction(ISD::ROTR, MVT::i32 , Expand);

setOperationAction(ISD::ROTR, MVT::i64 , Expand);

- // PowerPC does not have Select

- setOperationAction(ISD::SELECT, MVT::i32, Expand);

- setOperationAction(ISD::SELECT, MVT::i64, Expand);

- setOperationAction(ISD::SELECT, MVT::f32, Expand);

- setOperationAction(ISD::SELECT, MVT::f64, Expand);

+ if (!Subtarget.useCRBits()) {

+ // PowerPC does not have Select

+ setOperationAction(ISD::SELECT, MVT::i32, Expand);

+ setOperationAction(ISD::SELECT, MVT::i64, Expand);

+ setOperationAction(ISD::SELECT, MVT::f32, Expand);

+ setOperationAction(ISD::SELECT, MVT::f64, Expand);

+ }

// PowerPC wants to turn select_cc of FP into fsel when possible.

setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);

setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);

// PowerPC wants to optimize integer setcc a bit

- setOperationAction(ISD::SETCC, MVT::i32, Custom);

+ if (!Subtarget.useCRBits())

+ setOperationAction(ISD::SETCC, MVT::i32, Custom);

// PowerPC does not have BRCOND which requires SetCC

- setOperationAction(ISD::BRCOND, MVT::Other, Expand);

+ if (!Subtarget.useCRBits())

+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);

setOperationAction(ISD::BR_JT, MVT::Other, Expand);

@@ -256,7 +296,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

// VASTART needs to be custom lowered to use the VarArgsFrameIndex

setOperationAction(ISD::VASTART , MVT::Other, Custom);

- if (Subtarget->isSVR4ABI()) {

+ if (Subtarget.isSVR4ABI()) {

if (isPPC64) {

// VAARG always uses double-word chunks, so promote anything smaller.

setOperationAction(ISD::VAARG, MVT::i1, Promote);

@@ -276,7 +316,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

} else

setOperationAction(ISD::VAARG, MVT::Other, Expand);

- if (Subtarget->isSVR4ABI() && !isPPC64)

+ if (Subtarget.isSVR4ABI() && !isPPC64)

// VACOPY is custom lowered with the 32-bit SVR4 ABI.

setOperationAction(ISD::VACOPY , MVT::Other, Custom);

else

@@ -309,7 +349,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

setCondCodeAction(ISD::SETONE, MVT::f32, Expand);

setCondCodeAction(ISD::SETONE, MVT::f64, Expand);

- if (Subtarget->has64BitSupport()) {

+ if (Subtarget.has64BitSupport()) {

// They also have instructions for converting between i64 and fp.

setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);

setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);

@@ -319,7 +359,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

// We cannot do this with Promote because i64 is not a legal type.

setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);

- if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64())

+ if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())

setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);

} else {

// PowerPC does not have FP_TO_UINT on 32-bit implementations.

@@ -327,8 +367,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

}

// With the instructions enabled under FPCVT, we can do everything.

- if (PPCSubTarget.hasFPCVT()) {

- if (Subtarget->has64BitSupport()) {

+ if (Subtarget.hasFPCVT()) {

+ if (Subtarget.has64BitSupport()) {

setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);

setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);

setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);

@@ -341,7 +381,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);

}

- if (Subtarget->use64BitRegs()) {

+ if (Subtarget.use64BitRegs()) {

// 64-bit PowerPC implementations can support i64 types directly

addRegisterClass(MVT::i64, &PPC::G8RCRegClass);

// BUILD_PAIR can't be handled natively, and should be expanded to shl/or

@@ -357,7 +397,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);

}

- if (Subtarget->hasAltivec()) {

+ if (Subtarget.hasAltivec()) {

// First set operation action for all vector types to expand. Then we

// will selectively turn on ones that can be effectively codegen'd.

for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;

@@ -413,12 +453,15 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);

setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);

setOperationAction(ISD::BUILD_VECTOR, VT, Expand);

+ setOperationAction(ISD::MULHU, VT, Expand);

+ setOperationAction(ISD::MULHS, VT, Expand);

setOperationAction(ISD::UMUL_LOHI, VT, Expand);

setOperationAction(ISD::SMUL_LOHI, VT, Expand);

setOperationAction(ISD::UDIVREM, VT, Expand);

setOperationAction(ISD::SDIVREM, VT, Expand);

setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);

setOperationAction(ISD::FPOW, VT, Expand);

+ setOperationAction(ISD::BSWAP, VT, Expand);

setOperationAction(ISD::CTPOP, VT, Expand);

setOperationAction(ISD::CTLZ, VT, Expand);

setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);

@@ -445,7 +488,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

setOperationAction(ISD::OR , MVT::v4i32, Legal);

setOperationAction(ISD::XOR , MVT::v4i32, Legal);

setOperationAction(ISD::LOAD , MVT::v4i32, Legal);

- setOperationAction(ISD::SELECT, MVT::v4i32, Expand);

+ setOperationAction(ISD::SELECT, MVT::v4i32,

+ Subtarget.useCRBits() ? Legal : Expand);

setOperationAction(ISD::STORE , MVT::v4i32, Legal);

setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);

setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);

@@ -464,7 +508,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

setOperationAction(ISD::MUL, MVT::v4f32, Legal);

setOperationAction(ISD::FMA, MVT::v4f32, Legal);

- if (TM.Options.UnsafeFPMath) {

+ if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {

setOperationAction(ISD::FDIV, MVT::v4f32, Legal);

setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);

}

@@ -484,16 +528,83 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

// Altivec does not contain unordered floating-point compare instructions

setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);

setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);

- setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand);

- setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);

- setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);

- setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);

setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);

setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);

+ if (Subtarget.hasVSX()) {

+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);

+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);

+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);

+ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);

+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);

+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);

+ setOperationAction(ISD::FROUND, MVT::v2f64, Legal);

+ setOperationAction(ISD::FROUND, MVT::v4f32, Legal);

+ setOperationAction(ISD::MUL, MVT::v2f64, Legal);

+ setOperationAction(ISD::FMA, MVT::v2f64, Legal);

+ setOperationAction(ISD::FDIV, MVT::v2f64, Legal);

+ setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);

+ setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);

+ setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);

+ setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);

+ setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);

+ setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);

+ // Share the Altivec comparison restrictions.

+ setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);

+ setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);

+ setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);

+ setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);

+ setOperationAction(ISD::LOAD, MVT::v2f64, Legal);

+ setOperationAction(ISD::STORE, MVT::v2f64, Legal);

+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);

+ addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);

+ addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);

+ addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);

+ // VSX v2i64 only supports non-arithmetic operations.

+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);

+ setOperationAction(ISD::SUB, MVT::v2i64, Expand);

+ setOperationAction(ISD::SHL, MVT::v2i64, Expand);

+ setOperationAction(ISD::SRA, MVT::v2i64, Expand);

+ setOperationAction(ISD::SRL, MVT::v2i64, Expand);

+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);

+ setOperationAction(ISD::LOAD, MVT::v2i64, Promote);

+ AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);

+ setOperationAction(ISD::STORE, MVT::v2i64, Promote);

+ AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);

+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);

+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);

+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);

+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);

+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);

+ // Vector operation legalization checks the result type of

+ // SIGN_EXTEND_INREG, overall legalization checks the inner type.

+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);

+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);

+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);

+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);

+ addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);

+ }

}

- if (Subtarget->has64BitSupport()) {

+ if (Subtarget.has64BitSupport()) {

setOperationAction(ISD::PREFETCH, MVT::Other, Legal);

setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);

}

@@ -507,6 +618,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

// Altivec instructions set fields to all zeros or all ones.

setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);

+ if (!isPPC64) {

+ // These libcalls are not available in 32-bit.

+ setLibcallName(RTLIB::SHL_I128, nullptr);

+ setLibcallName(RTLIB::SRL_I128, nullptr);

+ setLibcallName(RTLIB::SRA_I128, nullptr);

+ }

if (isPPC64) {

setStackPointerRegisterToSaveRestore(PPC::X1);

setExceptionPointerRegister(PPC::X3);

@@ -522,9 +640,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

setTargetDAGCombine(ISD::LOAD);

setTargetDAGCombine(ISD::STORE);

setTargetDAGCombine(ISD::BR_CC);

+ if (Subtarget.useCRBits())

+ setTargetDAGCombine(ISD::BRCOND);

setTargetDAGCombine(ISD::BSWAP);

setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);

+ setTargetDAGCombine(ISD::SIGN_EXTEND);

+ setTargetDAGCombine(ISD::ZERO_EXTEND);

+ setTargetDAGCombine(ISD::ANY_EXTEND);

+ if (Subtarget.useCRBits()) {

+ setTargetDAGCombine(ISD::TRUNCATE);

+ setTargetDAGCombine(ISD::SETCC);

+ setTargetDAGCombine(ISD::SELECT_CC);

+ }

// Use reciprocal estimates.

if (TM.Options.UnsafeFPMath) {

setTargetDAGCombine(ISD::FDIV);

@@ -532,7 +662,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

}

// Darwin long double math library functions have $LDBL128 appended.

- if (Subtarget->isDarwin()) {

+ if (Subtarget.isDarwin()) {

setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");

setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");

setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");

@@ -545,18 +675,23 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");

}

+ // With 32 condition bits, we don't need to sink (and duplicate) compares

+ // aggressively in CodeGenPrep.

+ if (Subtarget.useCRBits())

+ setHasMultipleConditionRegisters();

setMinFunctionAlignment(2);

- if (PPCSubTarget.isDarwin())

+ if (Subtarget.isDarwin())

setPrefFunctionAlignment(4);

- if (isPPC64 && Subtarget->isJITCodeModel())

+ if (isPPC64 && Subtarget.isJITCodeModel())

// Temporary workaround for the inability of PPC64 JIT to handle jump

// tables.

setSupportJumpTables(false);

setInsertFencesForAtomic(true);

- if (Subtarget->enableMachineScheduler())

+ if (Subtarget.enableMachineScheduler())

setSchedulingPreference(Sched::Source);

else

setSchedulingPreference(Sched::Hybrid);

@@ -565,8 +700,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)

// The Freescale cores does better with aggressive inlining of memcpy and

// friends. Gcc uses same threshold of 128 bytes (= 32 word stores).

- if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||

- Subtarget->getDarwinDirective() == PPC::DIR_E5500) {

+ if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||

+ Subtarget.getDarwinDirective() == PPC::DIR_E5500) {

MaxStoresPerMemset = 32;

MaxStoresPerMemsetOptSize = 16;

MaxStoresPerMemcpy = 32;

@@ -610,20 +745,20 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,

/// function arguments in the caller parameter area.

unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {

// Darwin passes everything on 4 byte boundary.

- if (PPCSubTarget.isDarwin())

+ if (Subtarget.isDarwin())

return 4;

// 16byte and wider vectors are passed on 16byte boundary.

// The rest is 8 on PPC64 and 4 on PPC32 boundary.

- unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4;

- if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX())

- getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16);

+ unsigned Align = Subtarget.isPPC64() ? 8 : 4;

+ if (Subtarget.hasAltivec() || Subtarget.hasQPX())

+ getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);

return Align;

}

const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {

switch (Opcode) {

- default: return 0;

+ default: return nullptr;

case PPCISD::FSEL: return "PPCISD::FSEL";

case PPCISD::FCFID: return "PPCISD::FCFID";

case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";

@@ -637,7 +772,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {

case PPCISD::Hi: return "PPCISD::Hi";

case PPCISD::Lo: return "PPCISD::Lo";

case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";

- case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE";

case PPCISD::LOAD: return "PPCISD::LOAD";

case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC";

case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";

@@ -670,6 +804,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {

case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA";

case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L";

case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L";

+ case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";

case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";

case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";

case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";

@@ -688,7 +823,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {

EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {

if (!VT.isVector())

- return MVT::i32;

+ return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;

return VT.changeVectorElementTypeToInteger();

}

@@ -717,15 +852,29 @@ static bool isConstantOrUndef(int Op, int Val) {

/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a

/// VPKUHUM instruction.

-bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {

- if (!isUnary) {

+/// The ShuffleKind distinguishes between big-endian operations with

+/// two different inputs (0), either-endian operations with two identical

+/// inputs (1), and little-endian operantion with two different inputs (2).

+/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).

+bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,

+ SelectionDAG &DAG) {

+ if (ShuffleKind == 0) {

+ if (DAG.getTarget().getDataLayout()->isLittleEndian())

+ return false;

for (unsigned i = 0; i != 16; ++i)

- if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))

+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))

return false;

- } else {

+ } else if (ShuffleKind == 2) {

+ if (!DAG.getTarget().getDataLayout()->isLittleEndian())

+ return false;

+ for (unsigned i = 0; i != 16; ++i)

+ if (!isConstantOrUndef(N->getMaskElt(i), i*2))

+ return false;

+ } else if (ShuffleKind == 1) {

+ unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 1;

for (unsigned i = 0; i != 8; ++i)

- if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) ||

- !isConstantOrUndef(N->getMaskElt(i+8), i*2+1))

+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||

+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))

return false;

}

return true;

@@ -733,18 +882,33 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {

/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a

/// VPKUWUM instruction.

-bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {

- if (!isUnary) {

+/// The ShuffleKind distinguishes between big-endian operations with

+/// two different inputs (0), either-endian operations with two identical

+/// inputs (1), and little-endian operantion with two different inputs (2).

+/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).

+bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,

+ SelectionDAG &DAG) {

+ if (ShuffleKind == 0) {

+ if (DAG.getTarget().getDataLayout()->isLittleEndian())

+ return false;

for (unsigned i = 0; i != 16; i += 2)

if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||

!isConstantOrUndef(N->getMaskElt(i+1), i*2+3))

return false;

- } else {

+ } else if (ShuffleKind == 2) {

+ if (!DAG.getTarget().getDataLayout()->isLittleEndian())

+ return false;

+ for (unsigned i = 0; i != 16; i += 2)

+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||

+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))

+ return false;

+ } else if (ShuffleKind == 1) {

+ unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 2;

for (unsigned i = 0; i != 8; i += 2)

- if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||

- !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) ||

- !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) ||

- !isConstantOrUndef(N->getMaskElt(i+9), i*2+3))

+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||

+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||

+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||

+ !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))

return false;

}

return true;

@@ -754,8 +918,8 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {

///

static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,

unsigned LHSStart, unsigned RHSStart) {

- assert(N->getValueType(0) == MVT::v16i8 &&

- "PPC only supports shuffles by bytes!");

+ if (N->getValueType(0) != MVT::v16i8)

+ return false;

assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&

"Unsupported merge size!");

@@ -771,29 +935,66 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,

}

/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for

-/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).

+/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).

+/// The ShuffleKind distinguishes between big-endian merges with two

+/// different inputs (0), either-endian merges with two identical inputs (1),

+/// and little-endian merges with two different inputs (2). For the latter,

+/// the input operands are swapped (see PPCInstrAltivec.td).

bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,

- bool isUnary) {

- if (!isUnary)

- return isVMerge(N, UnitSize, 8, 24);

- return isVMerge(N, UnitSize, 8, 8);

+ unsigned ShuffleKind, SelectionDAG &DAG) {

+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {

+ if (ShuffleKind == 1) // unary

+ return isVMerge(N, UnitSize, 0, 0);

+ else if (ShuffleKind == 2) // swapped

+ return isVMerge(N, UnitSize, 0, 16);

+ else

+ return false;

+ } else {

+ if (ShuffleKind == 1) // unary

+ return isVMerge(N, UnitSize, 8, 8);

+ else if (ShuffleKind == 0) // normal

+ return isVMerge(N, UnitSize, 8, 24);

+ else

+ return false;

+ }

}

/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for

-/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).

+/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).

+/// The ShuffleKind distinguishes between big-endian merges with two

+/// different inputs (0), either-endian merges with two identical inputs (1),

+/// and little-endian merges with two different inputs (2). For the latter,

+/// the input operands are swapped (see PPCInstrAltivec.td).

bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,

- bool isUnary) {

- if (!isUnary)

- return isVMerge(N, UnitSize, 0, 16);

- return isVMerge(N, UnitSize, 0, 0);

+ unsigned ShuffleKind, SelectionDAG &DAG) {

+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {

+ if (ShuffleKind == 1) // unary

+ return isVMerge(N, UnitSize, 8, 8);

+ else if (ShuffleKind == 2) // swapped

+ return isVMerge(N, UnitSize, 8, 24);

+ else

+ return false;

+ } else {

+ if (ShuffleKind == 1) // unary

+ return isVMerge(N, UnitSize, 0, 0);

+ else if (ShuffleKind == 0) // normal

+ return isVMerge(N, UnitSize, 0, 16);

+ else

+ return false;

+ }

}

/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift

/// amount, otherwise return -1.

-int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {

- assert(N->getValueType(0) == MVT::v16i8 &&

- "PPC only supports shuffles by bytes!");

+/// The ShuffleKind distinguishes between big-endian operations with two

+/// different inputs (0), either-endian operations with two identical inputs

+/// (1), and little-endian operations with two different inputs (2). For the

+/// latter, the input operands are swapped (see PPCInstrAltivec.td).

+int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,

+ SelectionDAG &DAG) {

+ if (N->getValueType(0) != MVT::v16i8)

+ return -1;

ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);

@@ -808,19 +1009,26 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {

// numbered from this value.

unsigned ShiftAmt = SVOp->getMaskElt(i);

if (ShiftAmt < i) return -1;

ShiftAmt -= i;

+ bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian();

- if (!isUnary) {

+ if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {

// Check the rest of the elements to see if they are consecutive.

for (++i; i != 16; ++i)

if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))

return -1;

- } else {

+ } else if (ShuffleKind == 1) {

// Check the rest of the elements to see if they are consecutive.

for (++i; i != 16; ++i)

if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))

return -1;

- }

+ } else

+ return -1;

+ if (ShuffleKind == 2 && isLE)

+ ShiftAmt = 16 - ShiftAmt;

return ShiftAmt;

}

@@ -872,10 +1080,14 @@ bool PPC::isAllNegativeZeroVector(SDNode *N) {

/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the

/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.

-unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {

+unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,

+ SelectionDAG &DAG) {

ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);

assert(isSplatShuffleMask(SVOp, EltSize));

- return SVOp->getMaskElt(0) / EltSize;

+ if (DAG.getTarget().getDataLayout()->isLittleEndian())

+ return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);

+ else

+ return SVOp->getMaskElt(0) / EltSize;

}

/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed

@@ -883,7 +1095,7 @@ unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {

/// the constant being splatted. The ByteSize field indicates the number of

/// bytes of each element [124] -> [bhw].

SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {

- SDValue OpVal(0, 0);

+ SDValue OpVal(nullptr, 0);

// If ByteSize of the splat is bigger than the element size of the

// build_vector, then we have a case where we are checking for a splat where

@@ -902,7 +1114,7 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {

if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();

- if (UniquedVals[i&(Multiple-1)].getNode() == 0)

+ if (!UniquedVals[i&(Multiple-1)].getNode())

UniquedVals[i&(Multiple-1)] = N->getOperand(i);

else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))

return SDValue(); // no match.

@@ -917,21 +1129,21 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {

bool LeadingZero = true;

bool LeadingOnes = true;

for (unsigned i = 0; i != Multiple-1; ++i) {

- if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs.

+ if (!UniquedVals[i].getNode()) continue; // Must have been undefs.

LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();

LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();

}

// Finally, check the least significant entry.

if (LeadingZero) {

- if (UniquedVals[Multiple-1].getNode() == 0)

+ if (!UniquedVals[Multiple-1].getNode())

return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef

int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();

if (Val < 16)

return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4)

}

if (LeadingOnes) {

- if (UniquedVals[Multiple-1].getNode() == 0)

+ if (!UniquedVals[Multiple-1].getNode())

return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef

int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();

if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)

@@ -944,13 +1156,13 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {

// Check to see if this buildvec has a single non-undef value in its elements.

for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {

if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;

- if (OpVal.getNode() == 0)

+ if (!OpVal.getNode())

OpVal = N->getOperand(i);

else if (OpVal != N->getOperand(i))

return SDValue();

}

- if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def.

+ if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.

unsigned ValSizeInBytes = EltSize;

uint64_t Value = 0;

@@ -999,7 +1211,7 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {

/// sign extension from a 16-bit value. If so, this returns true and the

/// immediate.

static bool isIntS16Immediate(SDNode *N, short &Imm) {

- if (N->getOpcode() != ISD::Constant)

+ if (!isa<ConstantSDNode>(N))

return false;

Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();

@@ -1038,12 +1250,12 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,

// disjoint.

APInt LHSKnownZero, LHSKnownOne;

APInt RHSKnownZero, RHSKnownOne;

- DAG.ComputeMaskedBits(N.getOperand(0),

- LHSKnownZero, LHSKnownOne);

+ DAG.computeKnownBits(N.getOperand(0),

+ LHSKnownZero, LHSKnownOne);

if (LHSKnownZero.getBoolValue()) {

- DAG.ComputeMaskedBits(N.getOperand(1),

- RHSKnownZero, RHSKnownOne);

+ DAG.computeKnownBits(N.getOperand(1),

+ RHSKnownZero, RHSKnownOne);

// If all of the bits are known zero on the LHS or RHS, the add won't

// carry.

if (~(LHSKnownZero | RHSKnownZero) == 0) {

@@ -1143,12 +1355,18 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,

// (for better address arithmetic) if the LHS and RHS of the OR are

// provably disjoint.

APInt LHSKnownZero, LHSKnownOne;

- DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);

+ DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);

if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {

// If all of the bits are known zero on the LHS or RHS, the add won't

// carry.

- Base = N.getOperand(0);

+ if (FrameIndexSDNode *FI =

+ dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {

+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());

+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());

+ } else {

+ Base = N.getOperand(0);

+ }

Disp = DAG.getTargetConstant(imm, N.getValueType());

return true;

}

@@ -1161,7 +1379,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,

short Imm;

if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {

Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));

- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,

+ Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,

CN->getValueType(0));

return true;

}

@@ -1212,7 +1430,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,

}

// Otherwise, do it the hard way, using R0 as the base register.

- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,

+ Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,

N.getValueType());

Index = N;

return true;

@@ -1303,14 +1521,14 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,

/// GetLabelAccessInfo - Return true if we should reference labels using a

/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.

static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,

- unsigned &LoOpFlags, const GlobalValue *GV = 0) {

+ unsigned &LoOpFlags,

+ const GlobalValue *GV = nullptr) {

HiOpFlags = PPCII::MO_HA;

LoOpFlags = PPCII::MO_LO;

- // Don't use the pic base if not in PIC relocation model. Or if we are on a

- // non-darwin platform. We don't support PIC on other platforms yet.

- bool isPIC = TM.getRelocationModel() == Reloc::PIC_ &&

- TM.getSubtarget<PPCSubtarget>().isDarwin();

+ // Don't use the pic base if not in PIC relocation model.

+ bool isPIC = TM.getRelocationModel() == Reloc::PIC_;

if (isPIC) {

HiOpFlags |= PPCII::MO_PIC_FLAG;

LoOpFlags |= PPCII::MO_PIC_FLAG;

@@ -1358,7 +1576,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,

// 64-bit SVR4 ABI code is always position-independent.

// The actual address of the GlobalValue is stored in the TOC.

- if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {

+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {

SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);

return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,

DAG.getRegister(PPC::X2, MVT::i64));

@@ -1366,6 +1584,15 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,

unsigned MOHiFlag, MOLoFlag;

bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);

+ if (isPIC && Subtarget.isSVR4ABI()) {

+ SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),

+ PPCII::MO_PIC_FLAG);

+ SDLoc DL(CP);

+ return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,

+ DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));

+ }

SDValue CPIHi =

DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);

SDValue CPILo =

@@ -1379,7 +1606,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {

// 64-bit SVR4 ABI code is always position-independent.

// The actual address of the GlobalValue is stored in the TOC.

- if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {

+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {

SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);

return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,

DAG.getRegister(PPC::X2, MVT::i64));

@@ -1387,6 +1614,15 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {

unsigned MOHiFlag, MOLoFlag;

bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);

+ if (isPIC && Subtarget.isSVR4ABI()) {

+ SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,

+ PPCII::MO_PIC_FLAG);

+ SDLoc DL(GA);

+ return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA,

+ DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));

+ }

SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);

SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);

return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);

@@ -1416,7 +1652,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,

SDLoc dl(GA);

const GlobalValue *GV = GA->getGlobal();

EVT PtrVT = getPointerTy();

- bool is64bit = PPCSubTarget.isPPC64();

+ bool is64bit = Subtarget.isPPC64();

TLSModel::Model Model = getTargetMachine().getTLSModel(GV);

@@ -1431,18 +1667,19 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,

return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);

}

- if (!is64bit)

- llvm_unreachable("only local-exec is currently supported for ppc32");

if (Model == TLSModel::InitialExec) {

SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);

SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,

PPCII::MO_TLS);

- SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);

- SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,

- PtrVT, GOTReg, TGA);

+ SDValue GOTPtr;

+ if (is64bit) {

+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);

+ GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,

+ PtrVT, GOTReg, TGA);

+ } else

+ GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);

SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,

- PtrVT, TGA, TPOffsetHi);

+ PtrVT, TGA, GOTPtr);

return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);

}

@@ -1506,7 +1743,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,

// 64-bit SVR4 ABI code is always position-independent.

// The actual address of the GlobalValue is stored in the TOC.

- if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {

+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {

SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());

return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,

DAG.getRegister(PPC::X2, MVT::i64));

@@ -1515,6 +1752,14 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,

unsigned MOHiFlag, MOLoFlag;

bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);

+ if (isPIC && Subtarget.isSVR4ABI()) {

+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,

+ GSDN->getOffset(),

+ PPCII::MO_PIC_FLAG);

+ return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,

+ DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32));

+ }

SDValue GAHi =

DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);

SDValue GALo =

@@ -1534,6 +1779,27 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {

ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();

SDLoc dl(Op);

+ if (Op.getValueType() == MVT::v2i64) {

+ // When the operands themselves are v2i64 values, we need to do something

+ // special because VSX has no underlying comparison operations for these.

+ if (Op.getOperand(0).getValueType() == MVT::v2i64) {

+ // Equality can be handled by casting to the legal type for Altivec

+ // comparisons, everything else needs to be expanded.

+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {

+ return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,

+ DAG.getSetCC(dl, MVT::v4i32,

+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),

+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),

+ CC));

+ }

+ return SDValue();

+ }

+ // We handle most of these in the usual way.

+ return Op;

+ }

// If we're comparing for equality to zero, expose the fact that this is

// implented as a ctlz/srl pair on ppc, so that the dag combiner can

// fold the new nodes.

@@ -1727,17 +1993,13 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,

Entry.Node = Nest; Args.push_back(Entry);

// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)

- TargetLowering::CallLoweringInfo CLI(Chain,

- Type::getVoidTy(*DAG.getContext()),

- false, false, false, false, 0,

- CallingConv::C,

- /*isTailCall=*/false,

- /*doesNotRet=*/false,

- /*isReturnValueUsed=*/true,

- DAG.getExternalSymbol("__trampoline_setup", PtrVT),

- Args, DAG, dl);

- std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);

+ TargetLowering::CallLoweringInfo CLI(DAG);

+ CLI.setDebugLoc(dl).setChain(Chain)

+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),

+ DAG.getExternalSymbol("__trampoline_setup", PtrVT),

+ std::move(Args), 0);

+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);

return CallResult.second;

}

@@ -1858,7 +2120,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,

CCValAssign::LocInfo &LocInfo,

ISD::ArgFlagsTy &ArgFlags,

CCState &State) {

- static const uint16_t ArgRegs[] = {

+ static const MCPhysReg ArgRegs[] = {

PPC::R3, PPC::R4, PPC::R5, PPC::R6,

PPC::R7, PPC::R8, PPC::R9, PPC::R10,

};

@@ -1885,7 +2147,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,

CCValAssign::LocInfo &LocInfo,

ISD::ArgFlagsTy &ArgFlags,

CCState &State) {

- static const uint16_t ArgRegs[] = {

+ static const MCPhysReg ArgRegs[] = {

PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,

PPC::F8

};

@@ -1909,8 +2171,8 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,

/// GetFPR - Get the set of FP registers that should be allocated for arguments,

/// on Darwin.

-static const uint16_t *GetFPR() {

- static const uint16_t FPR[] = {

+static const MCPhysReg *GetFPR() {

+ static const MCPhysReg FPR[] = {

PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,

PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13

};

@@ -1922,14 +2184,119 @@ static const uint16_t *GetFPR() {

/// the stack.

static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,

unsigned PtrByteSize) {

- unsigned ArgSize = ArgVT.getSizeInBits()/8;

+ unsigned ArgSize = ArgVT.getStoreSize();

if (Flags.isByVal())

ArgSize = Flags.getByValSize();

- ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

+ // Round up to multiples of the pointer size, except for array members,

+ // which are always packed.

+ if (!Flags.isInConsecutiveRegs())

+ ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

return ArgSize;

}

+/// CalculateStackSlotAlignment - Calculates the alignment of this argument

+/// on the stack.

+static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,

+ ISD::ArgFlagsTy Flags,

+ unsigned PtrByteSize) {

+ unsigned Align = PtrByteSize;

+ // Altivec parameters are padded to a 16 byte boundary.

+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||

+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||

+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)

+ Align = 16;

+ // ByVal parameters are aligned as requested.

+ if (Flags.isByVal()) {

+ unsigned BVAlign = Flags.getByValAlign();

+ if (BVAlign > PtrByteSize) {

+ if (BVAlign % PtrByteSize != 0)

+ llvm_unreachable(

+ "ByVal alignment is not a multiple of the pointer size");

+ Align = BVAlign;

+ }

+ // Array members are always packed to their original alignment.

+ if (Flags.isInConsecutiveRegs()) {

+ // If the array member was split into multiple registers, the first

+ // needs to be aligned to the size of the full type. (Except for

+ // ppcf128, which is only aligned as its f64 components.)

+ if (Flags.isSplit() && OrigVT != MVT::ppcf128)

+ Align = OrigVT.getStoreSize();

+ else

+ Align = ArgVT.getStoreSize();

+ }

+ return Align;

+/// CalculateStackSlotUsed - Return whether this argument will use its

+/// stack slot (instead of being passed in registers). ArgOffset,

+/// AvailableFPRs, and AvailableVRs must hold the current argument

+/// position, and will be updated to account for this argument.

+static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,

+ ISD::ArgFlagsTy Flags,

+ unsigned PtrByteSize,

+ unsigned LinkageSize,

+ unsigned ParamAreaSize,

+ unsigned &ArgOffset,

+ unsigned &AvailableFPRs,

+ unsigned &AvailableVRs) {

+ bool UseMemory = false;

+ // Respect alignment of argument on the stack.

+ unsigned Align =

+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);

+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;

+ // If there's no space left in the argument save area, we must

+ // use memory (this check also catches zero-sized arguments).

+ if (ArgOffset >= LinkageSize + ParamAreaSize)

+ UseMemory = true;

+ // Allocate argument on the stack.

+ ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);

+ if (Flags.isInConsecutiveRegsLast())

+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

+ // If we overran the argument save area, we must use memory

+ // (this check catches arguments passed partially in memory)

+ if (ArgOffset > LinkageSize + ParamAreaSize)

+ UseMemory = true;

+ // However, if the argument is actually passed in an FPR or a VR,

+ // we don't use memory after all.

+ if (!Flags.isByVal()) {

+ if (ArgVT == MVT::f32 || ArgVT == MVT::f64)

+ if (AvailableFPRs > 0) {

+ --AvailableFPRs;

+ return false;

+ }

+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||

+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||

+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)

+ if (AvailableVRs > 0) {

+ --AvailableVRs;

+ return false;

+ }

+ return UseMemory;

+/// EnsureStackAlignment - Round stack frame size up from NumBytes to

+/// ensure minimum alignment required for target.

+static unsigned EnsureStackAlignment(const TargetMachine &Target,

+ unsigned NumBytes) {

+ unsigned TargetAlign = Target.getFrameLowering()->getStackAlignment();

+ unsigned AlignMask = TargetAlign - 1;

+ NumBytes = (NumBytes + AlignMask) & ~AlignMask;

+ return NumBytes;

SDValue

PPCTargetLowering::LowerFormalArguments(SDValue Chain,

CallingConv::ID CallConv, bool isVarArg,

@@ -1938,8 +2305,8 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain,

SDLoc dl, SelectionDAG &DAG,

SmallVectorImpl<SDValue> &InVals)

const {

- if (PPCSubTarget.isSVR4ABI()) {

- if (PPCSubTarget.isPPC64())

+ if (Subtarget.isSVR4ABI()) {

+ if (Subtarget.isPPC64())

return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,

dl, DAG, InVals);

else

@@ -2005,7 +2372,8 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(

getTargetMachine(), ArgLocs, *DAG.getContext());

// Reserve space for the linkage area on the stack.

- CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);

+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false);

+ CCInfo.AllocateStack(LinkageSize, PtrByteSize);

CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);

@@ -2020,6 +2388,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(

switch (ValVT.getSimpleVT().SimpleTy) {

default:

llvm_unreachable("ValVT not supported by formal arguments Lowering");

+ case MVT::i1:

case MVT::i32:

RC = &PPC::GPRCRegClass;

break;

@@ -2027,7 +2396,10 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(

RC = &PPC::F4RCRegClass;

break;

case MVT::f64:

- RC = &PPC::F8RCRegClass;

+ if (Subtarget.hasVSX())

+ RC = &PPC::VSFRCRegClass;

+ else

+ RC = &PPC::F8RCRegClass;

break;

case MVT::v16i8:

case MVT::v8i16:

@@ -2035,18 +2407,26 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(

case MVT::v4f32:

RC = &PPC::VRRCRegClass;

break;

+ case MVT::v2f64:

+ case MVT::v2i64:

+ RC = &PPC::VSHRCRegClass;

+ break;

}

// Transform the arguments stored in physical registers into virtual ones.

unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);

- SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);

+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,

+ ValVT == MVT::i1 ? MVT::i32 : ValVT);

+ if (ValVT == MVT::i1)

+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);

InVals.push_back(ArgValue);

} else {

// Argument stored in memory.

assert(VA.isMemLoc());

- unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;

+ unsigned ArgSize = VA.getLocVT().getStoreSize();

int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),

isImmutable);

@@ -2072,36 +2452,27 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(

// Area that is at least reserved in the caller of this function.

unsigned MinReservedArea = CCByValInfo.getNextStackOffset();

+ MinReservedArea = std::max(MinReservedArea, LinkageSize);

// Set the size that is at least reserved in caller of this function. Tail

// call optimized function's reserved stack space needs to be aligned so that

// taking the difference between two stack areas will result in an aligned

// stack.

- PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();

- MinReservedArea =

- std::max(MinReservedArea,

- PPCFrameLowering::getMinCallFrameSize(false, false));

- unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->

- getStackAlignment();

- unsigned AlignMask = TargetAlign-1;

- MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;

- FI->setMinReservedArea(MinReservedArea);

+ MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);

+ FuncInfo->setMinReservedArea(MinReservedArea);

SmallVector<SDValue, 8> MemOps;

// If the function takes variable number of arguments, make a frame index for

// the start of the first vararg value... for expansion of llvm.va_start.

if (isVarArg) {

- static const uint16_t GPArgRegs[] = {

+ static const MCPhysReg GPArgRegs[] = {

PPC::R3, PPC::R4, PPC::R5, PPC::R6,

PPC::R7, PPC::R8, PPC::R9, PPC::R10,

};

const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);

- static const uint16_t FPArgRegs[] = {

+ static const MCPhysReg FPArgRegs[] = {

PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,

PPC::F8

};

@@ -2163,8 +2534,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(

}

if (!MemOps.empty())

- Chain = DAG.getNode(ISD::TokenFactor, dl,

- MVT::Other, &MemOps[0], MemOps.size());

+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);

return Chain;

}

@@ -2182,33 +2552,7 @@ PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,

ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,

DAG.getValueType(ObjectVT));

- return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);

-// Set the size that is at least reserved in caller of this function. Tail

-// call optimized functions' reserved stack space needs to be aligned so that

-// taking the difference between two stack areas will result in an aligned

-// stack.

-void

-PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,

- unsigned nAltivecParamsAtEnd,

- unsigned MinReservedArea,

- bool isPPC64) const {

- PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();

- // Add the Altivec parameters at the end, if needed.

- if (nAltivecParamsAtEnd) {

- MinReservedArea = ((MinReservedArea+15)/16)*16;

- MinReservedArea += 16*nAltivecParamsAtEnd;

- }

- MinReservedArea =

- std::max(MinReservedArea,

- PPCFrameLowering::getMinCallFrameSize(isPPC64, true));

- unsigned TargetAlign

- = DAG.getMachineFunction().getTarget().getFrameLowering()->

- getStackAlignment();

- unsigned AlignMask = TargetAlign-1;

- MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;

- FI->setMinReservedArea(MinReservedArea);

+ return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);

}

SDValue

@@ -2221,6 +2565,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(

SmallVectorImpl<SDValue> &InVals) const {

// TODO: add description of PPC stack frame format, or at least some docs.

+ bool isELFv2ABI = Subtarget.isELFv2ABI();

+ bool isLittleEndian = Subtarget.isLittleEndian();

MachineFunction &MF = DAG.getMachineFunction();

MachineFrameInfo *MFI = MF.getFrameInfo();

PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

@@ -2231,63 +2577,75 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(

(CallConv == CallingConv::Fast));

unsigned PtrByteSize = 8;

- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);

- // Area that is at least reserved in caller of this function.

- unsigned MinReservedArea = ArgOffset;

+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,

+ isELFv2ABI);

- static const uint16_t GPR[] = {

+ static const MCPhysReg GPR[] = {

PPC::X3, PPC::X4, PPC::X5, PPC::X6,

PPC::X7, PPC::X8, PPC::X9, PPC::X10,

};

- static const uint16_t *FPR = GetFPR();

+ static const MCPhysReg *FPR = GetFPR();

- static const uint16_t VR[] = {

+ static const MCPhysReg VR[] = {

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

};

+ static const MCPhysReg VSRH[] = {

+ PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,

+ PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13

+ };

const unsigned Num_GPR_Regs = array_lengthof(GPR);

const unsigned Num_FPR_Regs = 13;

const unsigned Num_VR_Regs = array_lengthof(VR);

- unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;

+ // Do a first pass over the arguments to determine whether the ABI

+ // guarantees that our caller has allocated the parameter save area

+ // on its stack frame. In the ELFv1 ABI, this is always the case;

+ // in the ELFv2 ABI, it is true if this is a vararg function or if

+ // any parameter is located in a stack slot.

+ bool HasParameterArea = !isELFv2ABI || isVarArg;

+ unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;

+ unsigned NumBytes = LinkageSize;

+ unsigned AvailableFPRs = Num_FPR_Regs;

+ unsigned AvailableVRs = Num_VR_Regs;

+ for (unsigned i = 0, e = Ins.size(); i != e; ++i)

+ if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,

+ PtrByteSize, LinkageSize, ParamAreaSize,

+ NumBytes, AvailableFPRs, AvailableVRs))

+ HasParameterArea = true;

// Add DAG nodes to load the arguments or copy them out of registers. On

// entry to a function on PPC, the arguments start after the linkage area,

// although the first ones are often in registers.

+ unsigned ArgOffset = LinkageSize;

+ unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;

SmallVector<SDValue, 8> MemOps;

- unsigned nAltivecParamsAtEnd = 0;

Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();

unsigned CurArgIdx = 0;

for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {

SDValue ArgVal;

bool needsLoad = false;

EVT ObjectVT = Ins[ArgNo].VT;

- unsigned ObjSize = ObjectVT.getSizeInBits()/8;

+ EVT OrigVT = Ins[ArgNo].ArgVT;

+ unsigned ObjSize = ObjectVT.getStoreSize();

unsigned ArgSize = ObjSize;

ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;

std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);

CurArgIdx = Ins[ArgNo].OrigArgIndex;

+ /* Respect alignment of argument on the stack. */

+ unsigned Align =

+ CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);

+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;

unsigned CurArgOffset = ArgOffset;

- // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.

- if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||

- ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {

- if (isVarArg) {

- MinReservedArea = ((MinReservedArea+15)/16)*16;

- MinReservedArea += CalculateStackSlotSize(ObjectVT,

- Flags,

- PtrByteSize);

- } else

- nAltivecParamsAtEnd++;

- } else

- // Calculate min reserved area.

- MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,

- Flags,

- PtrByteSize);

+ /* Compute GPR index associated with argument offset. */

+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;

+ GPR_idx = std::min(GPR_idx, Num_GPR_Regs);

// FIXME the codegen can be much improved in some cases.

// We do not have to keep everything in memory.

@@ -2309,21 +2667,31 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(

continue;

}

- unsigned BVAlign = Flags.getByValAlign();

- if (BVAlign > 8) {

- ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;

- CurArgOffset = ArgOffset;

- }

- // All aggregates smaller than 8 bytes must be passed right-justified.

- if (ObjSize < PtrByteSize)

- CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);

- // The value of the object is its address.

- int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);

+ // Create a stack object covering all stack doublewords occupied

+ // by the argument. If the argument is (fully or partially) on

+ // the stack, or if the argument is fully in registers but the

+ // caller has allocated the parameter save anyway, we can refer

+ // directly to the caller's stack frame. Otherwise, create a

+ // local copy in our own frame.

+ int FI;

+ if (HasParameterArea ||

+ ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)

+ FI = MFI->CreateFixedObject(ArgSize, ArgOffset, true);

+ else

+ FI = MFI->CreateStackObject(ArgSize, Align, false);

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

- InVals.push_back(FIN);

- if (ObjSize < 8) {

+ // Handle aggregates smaller than 8 bytes.

+ if (ObjSize < PtrByteSize) {

+ // The value of the object is its address, which differs from the

+ // address of the enclosing doubleword on big-endian systems.

+ SDValue Arg = FIN;

+ if (!isLittleEndian) {

+ SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, PtrVT);

+ Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);

+ }

+ InVals.push_back(Arg);

if (GPR_idx != Num_GPR_Regs) {

unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

@@ -2332,25 +2700,19 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(

if (ObjSize==1 || ObjSize==2 || ObjSize==4) {

EVT ObjType = (ObjSize == 1 ? MVT::i8 :

(ObjSize == 2 ? MVT::i16 : MVT::i32));

- Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,

+ Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,

MachinePointerInfo(FuncArg),

ObjType, false, false, 0);

} else {

// For sizes that don't fit a truncating store (3, 5, 6, 7),

// store the whole register as-is to the parameter save area

- // slot. The address of the parameter was already calculated

- // above (InVals.push_back(FIN)) to be the right-justified

- // offset within the slot. For this store, we need a new

- // frame index that points at the beginning of the slot.

- int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);

- SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

+ // slot.

Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,

MachinePointerInfo(FuncArg),

false, false, 0);

}

MemOps.push_back(Store);

- ++GPR_idx;

}

// Whether we copied from a register or not, advance the offset

// into the parameter save area by a full doubleword.

@@ -2358,44 +2720,48 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(

continue;

}

+ // The value of the object is its address, which is the address of

+ // its first stack doubleword.

+ InVals.push_back(FIN);

+ // Store whatever pieces of the object are in registers to memory.

for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {

- // Store whatever pieces of the object are in registers

- // to memory. ArgOffset will be the address of the beginning

- // of the object.

- if (GPR_idx != Num_GPR_Regs) {

- unsigned VReg;

- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);

- int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);

- SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

- SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

- SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,

- MachinePointerInfo(FuncArg, j),

- false, false, 0);

- MemOps.push_back(Store);

- ++GPR_idx;

- ArgOffset += PtrByteSize;

- } else {

- ArgOffset += ArgSize - j;

+ if (GPR_idx == Num_GPR_Regs)

break;

+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);

+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

+ SDValue Addr = FIN;

+ if (j) {

+ SDValue Off = DAG.getConstant(j, PtrVT);

+ Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);

}

+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,

+ MachinePointerInfo(FuncArg, j),

+ false, false, 0);

+ MemOps.push_back(Store);

+ ++GPR_idx;

}

+ ArgOffset += ArgSize;

continue;

}

switch (ObjectVT.getSimpleVT().SimpleTy) {

default: llvm_unreachable("Unhandled argument type!");

+ case MVT::i1:

case MVT::i32:

case MVT::i64:

+ // These can be scalar arguments or elements of an integer array type

+ // passed directly. Clang may use those instead of "byval" aggregate

+ // types to avoid forcing arguments to memory unnecessarily.

if (GPR_idx != Num_GPR_Regs) {

unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);

- if (ObjectVT == MVT::i32)

+ if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)

// PPC64 passes i8, i16, and i32 values in i64 registers. Promote

// value to MVT::i64 and then truncate to the correct register size.

ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);

- ++GPR_idx;

} else {

needsLoad = true;

ArgSize = PtrByteSize;

@@ -2405,63 +2771,76 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(

case MVT::f32:

case MVT::f64:

- // Every 8 bytes of argument space consumes one of the GPRs available for

- // argument passing.

- if (GPR_idx != Num_GPR_Regs) {

- ++GPR_idx;

- }

+ // These can be scalar arguments or elements of a float array type

+ // passed directly. The latter are used to implement ELFv2 homogenous

+ // float aggregates.

if (FPR_idx != Num_FPR_Regs) {

unsigned VReg;

if (ObjectVT == MVT::f32)

VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);

else

- VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);

+ VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ?

+ &PPC::VSFRCRegClass :

+ &PPC::F8RCRegClass);

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);

++FPR_idx;

+ } else if (GPR_idx != Num_GPR_Regs) {

+ // This can only ever happen in the presence of f32 array types,

+ // since otherwise we never run out of FPRs before running out

+ // of GPRs.

+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);

+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);

+ if (ObjectVT == MVT::f32) {

+ if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))

+ ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,

+ DAG.getConstant(32, MVT::i32));

+ ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);

+ }

+ ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);

} else {

needsLoad = true;

- ArgSize = PtrByteSize;

}

- ArgOffset += 8;

+ // When passing an array of floats, the array occupies consecutive

+ // space in the argument area; only round up to the next doubleword

+ // at the end of the array. Otherwise, each float takes 8 bytes.

+ ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;

+ ArgOffset += ArgSize;

+ if (Flags.isInConsecutiveRegsLast())

+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

break;

case MVT::v4f32:

case MVT::v4i32:

case MVT::v8i16:

case MVT::v16i8:

- // Note that vector arguments in registers don't reserve stack space,

- // except in varargs functions.

+ case MVT::v2f64:

+ case MVT::v2i64:

+ // These can be scalar arguments or elements of a vector array type

+ // passed directly. The latter are used to implement ELFv2 homogenous

+ // vector aggregates.

if (VR_idx != Num_VR_Regs) {

- unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);

+ unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?

+ MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :

+ MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);

- if (isVarArg) {

- while ((ArgOffset % 16) != 0) {

- ArgOffset += PtrByteSize;

- if (GPR_idx != Num_GPR_Regs)

- GPR_idx++;

- }

- ArgOffset += 16;

- GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?

- }

++VR_idx;

} else {

- // Vectors are aligned.

- ArgOffset = ((ArgOffset+15)/16)*16;

- CurArgOffset = ArgOffset;

- ArgOffset += 16;

needsLoad = true;

}

+ ArgOffset += 16;

break;

}

// We need to load the argument to a virtual register if we determined

// above that we ran out of physical registers of the appropriate type.

if (needsLoad) {

- int FI = MFI->CreateFixedObject(ObjSize,

- CurArgOffset + (ArgSize - ObjSize),

- isImmutable);

+ if (ObjSize < ArgSize && !isLittleEndian)

+ CurArgOffset += ArgSize - ObjSize;

+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);

SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),

false, false, false, 0);

@@ -2470,11 +2849,19 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(

InVals.push_back(ArgVal);

}

+ // Area that is at least reserved in the caller of this function.

+ unsigned MinReservedArea;

+ if (HasParameterArea)

+ MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);

+ else

+ MinReservedArea = LinkageSize;

// Set the size that is at least reserved in caller of this function. Tail

// call optimized functions' reserved stack space needs to be aligned so that

// taking the difference between two stack areas will result in an aligned

// stack.

- setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true);

+ MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);

+ FuncInfo->setMinReservedArea(MinReservedArea);

// If the function takes variable number of arguments, make a frame index for

// the start of the first vararg value... for expansion of llvm.va_start.

@@ -2488,7 +2875,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(

// If this function is vararg, store any remaining integer argument regs

// to their spots on the stack so that they may be loaded by deferencing the

// result of va_next.

- for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {

+ for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;

+ GPR_idx < Num_GPR_Regs; ++GPR_idx) {

unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);

SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);

SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,

@@ -2501,8 +2889,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(

}

if (!MemOps.empty())

- Chain = DAG.getNode(ISD::TokenFactor, dl,

- MVT::Other, &MemOps[0], MemOps.size());

+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);

return Chain;

}

@@ -2528,22 +2915,24 @@ PPCTargetLowering::LowerFormalArguments_Darwin(

(CallConv == CallingConv::Fast));

unsigned PtrByteSize = isPPC64 ? 8 : 4;

- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);

+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,

+ false);

+ unsigned ArgOffset = LinkageSize;

// Area that is at least reserved in caller of this function.

unsigned MinReservedArea = ArgOffset;

- static const uint16_t GPR_32[] = { // 32-bit registers.

+ static const MCPhysReg GPR_32[] = { // 32-bit registers.

PPC::R3, PPC::R4, PPC::R5, PPC::R6,

PPC::R7, PPC::R8, PPC::R9, PPC::R10,

};

- static const uint16_t GPR_64[] = { // 64-bit registers.

+ static const MCPhysReg GPR_64[] = { // 64-bit registers.

PPC::X3, PPC::X4, PPC::X5, PPC::X6,

PPC::X7, PPC::X8, PPC::X9, PPC::X10,

};

- static const uint16_t *FPR = GetFPR();

+ static const MCPhysReg *FPR = GetFPR();

- static const uint16_t VR[] = {

+ static const MCPhysReg VR[] = {

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

};

@@ -2554,7 +2943,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(

unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;

- const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;

+ const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;

// In 32-bit non-varargs functions, the stack space for vectors is after the

// stack space for non-vectors. We do not use this space unless we have

@@ -2581,6 +2970,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(

switch(ObjectVT.getSimpleVT().SimpleTy) {

default: llvm_unreachable("Unhandled argument type!");

+ case MVT::i1:

case MVT::i32:

case MVT::f32:

VecArgOffset += 4;

@@ -2704,11 +3094,16 @@ PPCTargetLowering::LowerFormalArguments_Darwin(

switch (ObjectVT.getSimpleVT().SimpleTy) {

default: llvm_unreachable("Unhandled argument type!");

+ case MVT::i1:

case MVT::i32:

if (!isPPC64) {

if (GPR_idx != Num_GPR_Regs) {

unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);

+ if (ObjectVT == MVT::i1)

+ ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);

++GPR_idx;

} else {

needsLoad = true;

@@ -2724,7 +3119,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(

unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);

ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);

- if (ObjectVT == MVT::i32)

+ if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)

// PPC64 passes i8, i16, and i32 values in i64 registers. Promote

// value to MVT::i64 and then truncate to the correct register size.

ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);

@@ -2813,11 +3208,21 @@ PPCTargetLowering::LowerFormalArguments_Darwin(

InVals.push_back(ArgVal);

}

+ // Allow for Altivec parameters at the end, if needed.

+ if (nAltivecParamsAtEnd) {

+ MinReservedArea = ((MinReservedArea+15)/16)*16;

+ MinReservedArea += 16*nAltivecParamsAtEnd;

+ }

+ // Area that is at least reserved in the caller of this function.

+ MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);

// Set the size that is at least reserved in caller of this function. Tail

// call optimized functions' reserved stack space needs to be aligned so that

// taking the difference between two stack areas will result in an aligned

// stack.

- setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64);

+ MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);

+ FuncInfo->setMinReservedArea(MinReservedArea);

// If the function takes variable number of arguments, make a frame index for

// the start of the first vararg value... for expansion of llvm.va_start.

@@ -2851,80 +3256,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin(

}

if (!MemOps.empty())

- Chain = DAG.getNode(ISD::TokenFactor, dl,

- MVT::Other, &MemOps[0], MemOps.size());

+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);

return Chain;

}

-/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus

-/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI.

-static unsigned

-CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,

- bool isPPC64,

- bool isVarArg,

- unsigned CC,

- const SmallVectorImpl<ISD::OutputArg>

- &Outs,

- const SmallVectorImpl<SDValue> &OutVals,

- unsigned &nAltivecParamsAtEnd) {

- // Count how many bytes are to be pushed on the stack, including the linkage

- // area, and parameter passing area. We start with 24/48 bytes, which is

- // prereserved space for [SP][CR][LR][3 x unused].

- unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true);

- unsigned NumOps = Outs.size();

- unsigned PtrByteSize = isPPC64 ? 8 : 4;

- // Add up all the space actually used.

- // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually

- // they all go in registers, but we must reserve stack space for them for

- // possible use by the caller. In varargs or 64-bit calls, parameters are

- // assigned stack space in order, with padding so Altivec parameters are

- // 16-byte aligned.

- nAltivecParamsAtEnd = 0;

- for (unsigned i = 0; i != NumOps; ++i) {

- ISD::ArgFlagsTy Flags = Outs[i].Flags;

- EVT ArgVT = Outs[i].VT;

- // Varargs Altivec parameters are padded to a 16 byte boundary.

- if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||

- ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {

- if (!isVarArg && !isPPC64) {

- // Non-varargs Altivec parameters go after all the non-Altivec

- // parameters; handle those later so we know how much padding we need.

- nAltivecParamsAtEnd++;

- continue;

- }

- // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.

- NumBytes = ((NumBytes+15)/16)*16;

- }

- NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);

- }

- // Allow for Altivec parameters at the end, if needed.

- if (nAltivecParamsAtEnd) {

- NumBytes = ((NumBytes+15)/16)*16;

- NumBytes += 16*nAltivecParamsAtEnd;

- }

- // The prolog code of the callee may store up to 8 GPR argument registers to

- // the stack, allowing va_start to index over them in memory if its varargs.

- // Because we cannot tell if this is needed on the caller side, we have to

- // conservatively assume that it is needed. As such, make sure we have at

- // least enough stack space for the caller to store the 8 GPRs.

- NumBytes = std::max(NumBytes,

- PPCFrameLowering::getMinCallFrameSize(isPPC64, true));

- // Tail call needs the stack to be aligned.

- if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){

- unsigned TargetAlign = DAG.getMachineFunction().getTarget().

- getFrameLowering()->getStackAlignment();

- unsigned AlignMask = TargetAlign-1;

- NumBytes = (NumBytes + AlignMask) & ~AlignMask;

- }

- return NumBytes;

/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be

/// adjusted to accommodate the arguments for the tailcall.

static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,

@@ -2967,7 +3303,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,

if (Flags.isByVal()) return false;

}

- // Non PIC/GOT tail calls are supported.

+ // Non-PIC/GOT tail calls are supported.

if (getTargetMachine().getRelocationModel() != Reloc::PIC_)

return true;

@@ -2985,12 +3321,12 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,

/// 32-bit value is representable in the immediate field of a BxA instruction.

static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {

ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);

- if (!C) return 0;

+ if (!C) return nullptr;

int Addr = C->getZExtValue();

if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.

SignExtend32<26>(Addr) != Addr)

- return 0; // Top 6 bits have to be sext of immediate.

+ return nullptr; // Top 6 bits have to be sext of immediate.

return DAG.getConstant((int)C->getZExtValue() >> 2,

DAG.getTargetLoweringInfo().getPointerTy()).getNode();

@@ -3096,7 +3432,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,

SDLoc dl) const {

if (SPDiff) {

// Load the LR and FP stack slot for later adjusting.

- EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;

+ EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;

LROpOut = getReturnAddrFrameIndex(DAG);

LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),

false, false, false, 0);

@@ -3126,8 +3462,8 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,

SDLoc dl) {

SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);

return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),

- false, false, MachinePointerInfo(0),

- MachinePointerInfo(0));

+ false, false, MachinePointerInfo(),

+ MachinePointerInfo());

}

/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of

@@ -3172,8 +3508,7 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,

StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,

MemOpChains2, dl);

if (!MemOpChains2.empty())

- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

- &MemOpChains2[0], MemOpChains2.size());

+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);

// Store the return address to the appropriate stack slot.

Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,

@@ -3190,10 +3525,11 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,

SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,

SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,

SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,

- const PPCSubtarget &PPCSubTarget) {

+ const PPCSubtarget &Subtarget) {

- bool isPPC64 = PPCSubTarget.isPPC64();

- bool isSVR4ABI = PPCSubTarget.isSVR4ABI();

+ bool isPPC64 = Subtarget.isPPC64();

+ bool isSVR4ABI = Subtarget.isSVR4ABI();

+ bool isELFv2ABI = Subtarget.isELFv2ABI();

EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

NodeTys.push_back(MVT::Other); // Returns a chain

@@ -3202,11 +3538,12 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,

unsigned CallOpc = PPCISD::CALL;

bool needIndirectCall = true;

- if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {

- // If this is an absolute destination address, use the munged value.

- Callee = SDValue(Dest, 0);

- needIndirectCall = false;

- }

+ if (!isSVR4ABI || !isPPC64)

+ if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {

+ // If this is an absolute destination address, use the munged value.

+ Callee = SDValue(Dest, 0);

+ needIndirectCall = false;

+ }

if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {

// XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201

@@ -3214,15 +3551,18 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,

// far-call stubs may be outside relocation limits for a BL instruction.

if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {

unsigned OpFlags = 0;

- if (DAG.getTarget().getRelocationModel() != Reloc::Static &&

- (PPCSubTarget.getTargetTriple().isMacOSX() &&

- PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&

+ if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&

+ (Subtarget.getTargetTriple().isMacOSX() &&

+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&

(G->getGlobal()->isDeclaration() ||

- G->getGlobal()->isWeakForLinker())) {

+ G->getGlobal()->isWeakForLinker())) ||

+ (Subtarget.isTargetELF() && !isPPC64 &&

+ !G->getGlobal()->hasLocalLinkage() &&

+ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {

// PC-relative references to external symbols should go through $stub,

// unless we're building with the leopard linker or later, which

// automatically synthesizes these stubs.

- OpFlags = PPCII::MO_DARWIN_STUB;

+ OpFlags = PPCII::MO_PLT_OR_STUB;

}

// If the callee is a GlobalAddress/ExternalSymbol node (quite common,

@@ -3238,13 +3578,15 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,

if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {

unsigned char OpFlags = 0;

- if (DAG.getTarget().getRelocationModel() != Reloc::Static &&

- (PPCSubTarget.getTargetTriple().isMacOSX() &&

- PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {

+ if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&

+ (Subtarget.getTargetTriple().isMacOSX() &&

+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) ||

+ (Subtarget.isTargetELF() && !isPPC64 &&

+ DAG.getTarget().getRelocationModel() == Reloc::PIC_) ) {

// PC-relative references to external symbols should go through $stub,

// unless we're building with the leopard linker or later, which

// automatically synthesizes these stubs.

- OpFlags = PPCII::MO_DARWIN_STUB;

+ OpFlags = PPCII::MO_PLT_OR_STUB;

}

Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),

@@ -3257,7 +3599,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,

// to do the call, we can't use PPCISD::CALL.

SDValue MTCTROps[] = {Chain, Callee, InFlag};

- if (isSVR4ABI && isPPC64) {

+ if (isSVR4ABI && isPPC64 && !isELFv2ABI) {

// Function pointers in the 64-bit SVR4 ABI do not point to the function

// entry point, but to the function descriptor (the function entry point

// address is part of the function descriptor though).

@@ -3287,8 +3629,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,

// Load the address of the function entry point from the function

// descriptor.

SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);

- SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps,

- InFlag.getNode() ? 3 : 2);

+ SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs,

+ makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));

Chain = LoadFuncPtr.getValue(1);

InFlag = LoadFuncPtr.getValue(2);

@@ -3314,8 +3656,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,

// additional register being allocated and an unnecessary move instruction

// being generated.

VTs = DAG.getVTList(MVT::Other, MVT::Glue);

+ SDValue TOCOff = DAG.getIntPtrConstant(8);

+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);

SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,

- Callee, InFlag);

+ AddTOC, InFlag);

Chain = LoadTOCPtr.getValue(0);

InFlag = LoadTOCPtr.getValue(1);

@@ -3324,8 +3668,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,

MTCTROps[2] = InFlag;

}

- Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,

- 2 + (InFlag.getNode() != 0));

+ Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,

+ makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));

InFlag = Chain.getValue(1);

NodeTys.clear();

@@ -3333,9 +3677,9 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,

NodeTys.push_back(MVT::Glue);

Ops.push_back(Chain);

CallOpc = PPCISD::BCTRL;

- Callee.setNode(0);

+ Callee.setNode(nullptr);

// Add use of X11 (holding environment pointer)

- if (isSVR4ABI && isPPC64)

+ if (isSVR4ABI && isPPC64 && !isELFv2ABI)

Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));

// Add CTR register as callee so a bctr can be emitted later.

if (isTailCall)

@@ -3357,6 +3701,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,

Ops.push_back(DAG.getRegister(RegsToPass[i].first,

RegsToPass[i].second.getValueType()));

+ // Direct calls in the ELFv2 ABI need the TOC register live into the call.

+ if (Callee.getNode() && isELFv2ABI)

+ Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));

return CallOpc;

}

@@ -3426,14 +3774,16 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,

int SPDiff, unsigned NumBytes,

const SmallVectorImpl<ISD::InputArg> &Ins,

SmallVectorImpl<SDValue> &InVals) const {

+ bool isELFv2ABI = Subtarget.isELFv2ABI();

std::vector<EVT> NodeTys;

SmallVector<SDValue, 8> Ops;

unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,

isTailCall, RegsToPass, Ops, NodeTys,

- PPCSubTarget);

+ Subtarget);

// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls

- if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())

+ if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())

Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));

// When performing tail call optimization the callee pops its arguments off

@@ -3461,7 +3811,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,

isa<ConstantSDNode>(Callee)) &&

"Expecting an global address, external symbol, absolute value or register");

- return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());

+ return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);

}

// Add a NOP immediately after the branch instruction when using the 64-bit

@@ -3474,7 +3824,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,

// same TOC), the NOP will remain unchanged.

bool needsTOCRestore = false;

- if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {

+ if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64()) {

if (CallOpc == PPCISD::BCTRL) {

// This is a call through a function pointer.

// Restore the caller TOC from the save area into R2.

@@ -3494,12 +3844,17 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,

}

- Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());

+ Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);

InFlag = Chain.getValue(1);

if (needsTOCRestore) {

SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);

- Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);

+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

+ SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);

+ unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);

+ SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);

+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);

+ Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag);

InFlag = Chain.getValue(1);

}

@@ -3531,8 +3886,12 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,

isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,

Ins, DAG);

- if (PPCSubTarget.isSVR4ABI()) {

- if (PPCSubTarget.isPPC64())

+ if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())

+ report_fatal_error("failed to perform tail call elimination on a call "

+ "site marked musttail");

+ if (Subtarget.isSVR4ABI()) {

+ if (Subtarget.isPPC64())

return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,

isTailCall, Outs, OutVals, Ins,

dl, DAG, InVals);

@@ -3585,7 +3944,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,

getTargetMachine(), ArgLocs, *DAG.getContext());

// Reserve space for the linkage area on the stack.

- CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);

+ CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false),

+ PtrByteSize);

if (isVarArg) {

// Handle fixed and variable vector arguments differently.

@@ -3611,7 +3971,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,

errs() << "Call operand #" << i << " has unhandled type "

<< EVT(ArgVT).getEVTString() << "\n";

#endif

- llvm_unreachable(0);

+ llvm_unreachable(nullptr);

}

} else {

@@ -3705,6 +4065,9 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,

}

if (VA.isRegLoc()) {

+ if (Arg.getValueType() == MVT::i1)

+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);

seenFloatArg |= VA.getLocVT().isFloatingPoint();

// Put argument in a physical register.

RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));

@@ -3729,8 +4092,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,

}

if (!MemOpChains.empty())

- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

- &MemOpChains[0], MemOpChains.size());

+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

// Build a sequence of copy-to-reg nodes chained together with token chain

// and flag operands which copy the outgoing args into the appropriate regs.

@@ -3748,7 +4110,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,

SDValue Ops[] = { Chain, InFlag };

Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,

- dl, VTs, Ops, InFlag.getNode() ? 2 : 1);

+ dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));

InFlag = Chain.getValue(1);

}

@@ -3792,6 +4154,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,

SDLoc dl, SelectionDAG &DAG,

SmallVectorImpl<SDValue> &InVals) const {

+ bool isELFv2ABI = Subtarget.isELFv2ABI();

+ bool isLittleEndian = Subtarget.isLittleEndian();

unsigned NumOps = Outs.size();

EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

@@ -3808,16 +4172,44 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,

CallConv == CallingConv::Fast)

MF.getInfo<PPCFunctionInfo>()->setHasFastCall();

- unsigned nAltivecParamsAtEnd = 0;

// Count how many bytes are to be pushed on the stack, including the linkage

- // area, and parameter passing area. We start with at least 48 bytes, which

- // is reserved space for [SP][CR][LR][3 x unused].

- // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result

- // of this call.

- unsigned NumBytes =

- CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv,

- Outs, OutVals, nAltivecParamsAtEnd);

+ // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes

+ // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage

+ // area is 32 bytes reserved space for [SP][CR][LR][TOC].

+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,

+ isELFv2ABI);

+ unsigned NumBytes = LinkageSize;

+ // Add up all the space actually used.

+ for (unsigned i = 0; i != NumOps; ++i) {

+ ISD::ArgFlagsTy Flags = Outs[i].Flags;

+ EVT ArgVT = Outs[i].VT;

+ EVT OrigVT = Outs[i].ArgVT;

+ /* Respect alignment of argument on the stack. */

+ unsigned Align =

+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);

+ NumBytes = ((NumBytes + Align - 1) / Align) * Align;

+ NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);

+ if (Flags.isInConsecutiveRegsLast())

+ NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

+ }

+ unsigned NumBytesActuallyUsed = NumBytes;

+ // The prolog code of the callee may store up to 8 GPR argument registers to

+ // the stack, allowing va_start to index over them in memory if its varargs.

+ // Because we cannot tell if this is needed on the caller side, we have to

+ // conservatively assume that it is needed. As such, make sure we have at

+ // least enough stack space for the caller to store the 8 GPRs.

+ // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.

+ NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);

+ // Tail call needs the stack to be aligned.

+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&

+ CallConv == CallingConv::Fast)

+ NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);

// Calculate by how many bytes the stack has to be adjusted in case of tail

// call optimization.

@@ -3849,19 +4241,24 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,

// memory. Also, if this is a vararg function, floating point operations

// must be stored to our stack, and loaded into integer regs as well, if

// any integer regs are available for argument passing.

- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);

- unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;

+ unsigned ArgOffset = LinkageSize;

+ unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;

- static const uint16_t GPR[] = {

+ static const MCPhysReg GPR[] = {

PPC::X3, PPC::X4, PPC::X5, PPC::X6,

PPC::X7, PPC::X8, PPC::X9, PPC::X10,

};

- static const uint16_t *FPR = GetFPR();

+ static const MCPhysReg *FPR = GetFPR();

- static const uint16_t VR[] = {

+ static const MCPhysReg VR[] = {

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

};

+ static const MCPhysReg VSRH[] = {

+ PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,

+ PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13

+ };

const unsigned NumGPRs = array_lengthof(GPR);

const unsigned NumFPRs = 13;

const unsigned NumVRs = array_lengthof(VR);

@@ -3873,6 +4270,17 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,

for (unsigned i = 0; i != NumOps; ++i) {

SDValue Arg = OutVals[i];

ISD::ArgFlagsTy Flags = Outs[i].Flags;

+ EVT ArgVT = Outs[i].VT;

+ EVT OrigVT = Outs[i].ArgVT;

+ /* Respect alignment of argument on the stack. */

+ unsigned Align =

+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);

+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;

+ /* Compute GPR index associated with argument offset. */

+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;

+ GPR_idx = std::min(GPR_idx, NumGPRs);

// PtrOff will be used to store the current argument to the stack if a

// register cannot be found for it.

@@ -3883,7 +4291,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,

PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);

// Promote integers to 64-bit values.

- if (Arg.getValueType() == MVT::i32) {

+ if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {

// FIXME: Should this use ANY_EXTEND if neither sext nor zext?

unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;

Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);

@@ -3905,15 +4313,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,

if (Size == 0)

continue;

- unsigned BVAlign = Flags.getByValAlign();

- if (BVAlign > 8) {

- if (BVAlign % PtrByteSize != 0)

- llvm_unreachable(

- "ByVal alignment is not a multiple of the pointer size");

- ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;

- }

// All aggregates smaller than 8 bytes must be passed right-justified.

if (Size==1 || Size==2 || Size==4) {

EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);

@@ -3922,7 +4321,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,

MachinePointerInfo(), VT,

false, false, 0);

MemOpChains.push_back(Load.getValue(1));

- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));

ArgOffset += PtrByteSize;

continue;

@@ -3930,9 +4329,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,

}

if (GPR_idx == NumGPRs && Size < 8) {

- SDValue Const = DAG.getConstant(PtrByteSize - Size,

- PtrOff.getValueType());

- SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);

+ SDValue AddPtr = PtrOff;

+ if (!isLittleEndian) {

+ SDValue Const = DAG.getConstant(PtrByteSize - Size,

+ PtrOff.getValueType());

+ AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);

+ }

Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,

CallSeqStart,

Flags, DAG, dl);

@@ -3967,8 +4369,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,

// small aggregates, particularly for packed ones.

// FIXME: It would be preferable to use the slot in the

// parameter save area instead of a new local variable.

- SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());

- SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);

+ SDValue AddPtr = PtrOff;

+ if (!isLittleEndian) {

+ SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());

+ AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);

+ }

Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,

CallSeqStart,

Flags, DAG, dl);

@@ -3978,7 +4383,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,

MachinePointerInfo(),

false, false, false, 0);

MemOpChains.push_back(Load.getValue(1));

- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));

// Done with this argument.

ArgOffset += PtrByteSize;

@@ -4007,10 +4412,14 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,

switch (Arg.getSimpleValueType().SimpleTy) {

default: llvm_unreachable("Unexpected ValueType for argument!");

+ case MVT::i1:

case MVT::i32:

case MVT::i64:

+ // These can be scalar arguments or elements of an integer array type

+ // passed directly. Clang may use those instead of "byval" aggregate

+ // types to avoid forcing arguments to memory unnecessarily.

if (GPR_idx != NumGPRs) {

- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));

+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg));

} else {

LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,

true, isTailCall, false, MemOpChains,

@@ -4019,40 +4428,70 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,

ArgOffset += PtrByteSize;

break;

case MVT::f32:

- case MVT::f64:

- if (FPR_idx != NumFPRs) {

+ case MVT::f64: {

+ // These can be scalar arguments or elements of a float array type

+ // passed directly. The latter are used to implement ELFv2 homogenous

+ // float aggregates.

+ // Named arguments go into FPRs first, and once they overflow, the

+ // remaining arguments go into GPRs and then the parameter save area.

+ // Unnamed arguments for vararg functions always go to GPRs and

+ // then the parameter save area. For now, put all arguments to vararg

+ // routines always in both locations (FPR *and* GPR or stack slot).

+ bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;

+ // First load the argument into the next available FPR.

+ if (FPR_idx != NumFPRs)

RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));

- if (isVarArg) {

- // A single float or an aggregate containing only a single float

- // must be passed right-justified in the stack doubleword, and

- // in the GPR, if one is available.

- SDValue StoreOff;

- if (Arg.getSimpleValueType().SimpleTy == MVT::f32) {

- SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());

- StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);

- } else

- StoreOff = PtrOff;

- SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff,

- MachinePointerInfo(), false, false, 0);

- MemOpChains.push_back(Store);

- // Float varargs are always shadowed in available integer registers

- if (GPR_idx != NumGPRs) {

- SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,

- MachinePointerInfo(), false, false,

- false, 0);

- MemOpChains.push_back(Load.getValue(1));

- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));

- }

- } else if (GPR_idx != NumGPRs)

- // If we have any FPRs remaining, we may also have GPRs remaining.

- ++GPR_idx;

+ // Next, load the argument into GPR or stack slot if needed.

+ if (!NeedGPROrStack)

+ ;

+ else if (GPR_idx != NumGPRs) {

+ // In the non-vararg case, this can only ever happen in the

+ // presence of f32 array types, since otherwise we never run

+ // out of FPRs before running out of GPRs.

+ SDValue ArgVal;

+ // Double values are always passed in a single GPR.

+ if (Arg.getValueType() != MVT::f32) {

+ ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);

+ // Non-array float values are extended and passed in a GPR.

+ } else if (!Flags.isInConsecutiveRegs()) {

+ ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);

+ ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);

+ // If we have an array of floats, we collect every odd element

+ // together with its predecessor into one GPR.

+ } else if (ArgOffset % PtrByteSize != 0) {

+ SDValue Lo, Hi;

+ Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);

+ Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);

+ if (!isLittleEndian)

+ std::swap(Lo, Hi);

+ ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);

+ // The final element, if even, goes into the first half of a GPR.

+ } else if (Flags.isInConsecutiveRegsLast()) {

+ ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);

+ ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);

+ if (!isLittleEndian)

+ ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,

+ DAG.getConstant(32, MVT::i32));

+ // Non-final even elements are skipped; they will be handled

+ // together the with subsequent argument on the next go-around.

+ } else

+ ArgVal = SDValue();

+ if (ArgVal.getNode())

+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal));

} else {

// Single-precision floating-point values are mapped to the

// second (rightmost) word of the stack doubleword.

- if (Arg.getValueType() == MVT::f32) {

+ if (Arg.getValueType() == MVT::f32 &&

+ !isLittleEndian && !Flags.isInConsecutiveRegs()) {

SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());

PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);

}

@@ -4061,27 +4500,32 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,

true, isTailCall, false, MemOpChains,

TailCallArguments, dl);

}

- ArgOffset += 8;

+ // When passing an array of floats, the array occupies consecutive

+ // space in the argument area; only round up to the next doubleword

+ // at the end of the array. Otherwise, each float takes 8 bytes.

+ ArgOffset += (Arg.getValueType() == MVT::f32 &&

+ Flags.isInConsecutiveRegs()) ? 4 : 8;

+ if (Flags.isInConsecutiveRegsLast())

+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;

break;

+ }

case MVT::v4f32:

case MVT::v4i32:

case MVT::v8i16:

case MVT::v16i8:

+ case MVT::v2f64:

+ case MVT::v2i64:

+ // These can be scalar arguments or elements of a vector array type

+ // passed directly. The latter are used to implement ELFv2 homogenous

+ // vector aggregates.

+ // For a varargs call, named arguments go into VRs or on the stack as

+ // usual; unnamed arguments always go to the stack or the corresponding

+ // GPRs when within range. For now, we always put the value in both

+ // locations (or even all three).

if (isVarArg) {

- // These go aligned on the stack, or in the corresponding R registers

- // when within range. The Darwin PPC ABI doc claims they also go in

- // V registers; in fact gcc does this only for arguments that are

- // prototyped, not for those that match the ... We do it for all

- // arguments, seems to work.

- while (ArgOffset % 16 !=0) {

- ArgOffset += PtrByteSize;

- if (GPR_idx != NumGPRs)

- GPR_idx++;

- }

// We could elide this store in the case where the object fits

// entirely in R registers. Maybe later.

- PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,

- DAG.getConstant(ArgOffset, PtrVT));

SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,

MachinePointerInfo(), false, false, 0);

MemOpChains.push_back(Store);

@@ -4090,7 +4534,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,

MachinePointerInfo(),

false, false, false, 0);

MemOpChains.push_back(Load.getValue(1));

- RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));

+ unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||

+ Arg.getSimpleValueType() == MVT::v2i64) ?

+ VSRH[VR_idx] : VR[VR_idx];

+ ++VR_idx;

+ RegsToPass.push_back(std::make_pair(VReg, Load));

}

ArgOffset += 16;

for (unsigned i=0; i<16; i+=PtrByteSize) {

@@ -4106,43 +4556,49 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,

break;

}

- // Non-varargs Altivec params generally go in registers, but have

- // stack space allocated at the end.

+ // Non-varargs Altivec params go into VRs or on the stack.

if (VR_idx != NumVRs) {

- // Doesn't have GPR space allocated.

- RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));

+ unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||

+ Arg.getSimpleValueType() == MVT::v2i64) ?

+ VSRH[VR_idx] : VR[VR_idx];

+ ++VR_idx;

+ RegsToPass.push_back(std::make_pair(VReg, Arg));

} else {

LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,

true, isTailCall, true, MemOpChains,

TailCallArguments, dl);

- ArgOffset += 16;

}

+ ArgOffset += 16;

break;

}

+ assert(NumBytesActuallyUsed == ArgOffset);

+ (void)NumBytesActuallyUsed;

if (!MemOpChains.empty())

- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

- &MemOpChains[0], MemOpChains.size());

+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

// Check if this is an indirect call (MTCTR/BCTRL).

// See PrepareCall() for more information about calls through function

// pointers in the 64-bit SVR4 ABI.

if (!isTailCall &&

!dyn_cast<GlobalAddressSDNode>(Callee) &&

- !dyn_cast<ExternalSymbolSDNode>(Callee) &&

- !isBLACompatibleAddress(Callee, DAG)) {

+ !dyn_cast<ExternalSymbolSDNode>(Callee)) {

// Load r2 into a virtual register and store it to the TOC save area.

SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);

// TOC save area offset.

- SDValue PtrOff = DAG.getIntPtrConstant(40);

+ unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);

+ SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);

SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);

Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),

false, false, 0);

- // R12 must contain the address of an indirect callee. This does not

- // mean the MTCTR instruction must use R12; it's easier to model this

- // as an extra parameter, so do that.

- RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));

+ // In the ELFv2 ABI, R12 must contain the address of an indirect callee.

+ // This does not mean the MTCTR instruction must use R12; it's easier

+ // to model this as an extra parameter, so do that.

+ if (isELFv2ABI)

+ RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));

}

// Build a sequence of copy-to-reg nodes chained together with token chain

@@ -4190,15 +4646,56 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,

CallConv == CallingConv::Fast)

MF.getInfo<PPCFunctionInfo>()->setHasFastCall();

- unsigned nAltivecParamsAtEnd = 0;

// Count how many bytes are to be pushed on the stack, including the linkage

// area, and parameter passing area. We start with 24/48 bytes, which is

// prereserved space for [SP][CR][LR][3 x unused].

- unsigned NumBytes =

- CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,

- Outs, OutVals,

- nAltivecParamsAtEnd);

+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,

+ false);

+ unsigned NumBytes = LinkageSize;

+ // Add up all the space actually used.

+ // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually

+ // they all go in registers, but we must reserve stack space for them for

+ // possible use by the caller. In varargs or 64-bit calls, parameters are

+ // assigned stack space in order, with padding so Altivec parameters are

+ // 16-byte aligned.

+ unsigned nAltivecParamsAtEnd = 0;

+ for (unsigned i = 0; i != NumOps; ++i) {

+ ISD::ArgFlagsTy Flags = Outs[i].Flags;

+ EVT ArgVT = Outs[i].VT;

+ // Varargs Altivec parameters are padded to a 16 byte boundary.

+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||

+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||

+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {

+ if (!isVarArg && !isPPC64) {

+ // Non-varargs Altivec parameters go after all the non-Altivec

+ // parameters; handle those later so we know how much padding we need.

+ nAltivecParamsAtEnd++;

+ continue;

+ }

+ // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.

+ NumBytes = ((NumBytes+15)/16)*16;

+ }

+ NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);

+ }

+ // Allow for Altivec parameters at the end, if needed.

+ if (nAltivecParamsAtEnd) {

+ NumBytes = ((NumBytes+15)/16)*16;

+ NumBytes += 16*nAltivecParamsAtEnd;

+ }

+ // The prolog code of the callee may store up to 8 GPR argument registers to

+ // the stack, allowing va_start to index over them in memory if its varargs.

+ // Because we cannot tell if this is needed on the caller side, we have to

+ // conservatively assume that it is needed. As such, make sure we have at

+ // least enough stack space for the caller to store the 8 GPRs.

+ NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);

+ // Tail call needs the stack to be aligned.

+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&

+ CallConv == CallingConv::Fast)

+ NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);

// Calculate by how many bytes the stack has to be adjusted in case of tail

// call optimization.

@@ -4234,20 +4731,20 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,

// memory. Also, if this is a vararg function, floating point operations

// must be stored to our stack, and loaded into integer regs as well, if

// any integer regs are available for argument passing.

- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);

+ unsigned ArgOffset = LinkageSize;

unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;

- static const uint16_t GPR_32[] = { // 32-bit registers.

+ static const MCPhysReg GPR_32[] = { // 32-bit registers.

PPC::R3, PPC::R4, PPC::R5, PPC::R6,

PPC::R7, PPC::R8, PPC::R9, PPC::R10,

};

- static const uint16_t GPR_64[] = { // 64-bit registers.

+ static const MCPhysReg GPR_64[] = { // 64-bit registers.

PPC::X3, PPC::X4, PPC::X5, PPC::X6,

PPC::X7, PPC::X8, PPC::X9, PPC::X10,

};

- static const uint16_t *FPR = GetFPR();

+ static const MCPhysReg *FPR = GetFPR();

- static const uint16_t VR[] = {

+ static const MCPhysReg VR[] = {

PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,

PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13

};

@@ -4255,7 +4752,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,

const unsigned NumFPRs = 13;

const unsigned NumVRs = array_lengthof(VR);

- const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;

+ const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;

SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;

SmallVector<TailCallArgumentInfo, 8> TailCallArguments;

@@ -4338,9 +4835,13 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,

switch (Arg.getSimpleValueType().SimpleTy) {

default: llvm_unreachable("Unexpected ValueType for argument!");

+ case MVT::i1:

case MVT::i32:

case MVT::i64:

if (GPR_idx != NumGPRs) {

+ if (Arg.getValueType() == MVT::i1)

+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);

RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));

} else {

LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,

@@ -4481,8 +4982,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,

}

if (!MemOpChains.empty())

- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

- &MemOpChains[0], MemOpChains.size());

+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

// On Darwin, R12 must contain the address of an indirect callee. This does

// not mean the MTCTR instruction must use R12; it's easier to model this as

@@ -4570,8 +5070,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain,

if (Flag.getNode())

RetOps.push_back(Flag);

- return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other,

- &RetOps[0], RetOps.size());

+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);

}

SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,

@@ -4609,8 +5108,8 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,

SDValue

PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {

MachineFunction &MF = DAG.getMachineFunction();

- bool isPPC64 = PPCSubTarget.isPPC64();

- bool isDarwinABI = PPCSubTarget.isDarwinABI();

+ bool isPPC64 = Subtarget.isPPC64();

+ bool isDarwinABI = Subtarget.isDarwinABI();

EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

// Get current frame pointer save index. The users of this index will be

@@ -4633,8 +5132,8 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {

SDValue

PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {

MachineFunction &MF = DAG.getMachineFunction();

- bool isPPC64 = PPCSubTarget.isPPC64();

- bool isDarwinABI = PPCSubTarget.isDarwinABI();

+ bool isPPC64 = Subtarget.isPPC64();

+ bool isDarwinABI = Subtarget.isDarwinABI();

EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

// Get current frame pointer save index. The users of this index will be

@@ -4674,7 +5173,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,

// Build a DYNALLOC node.

SDValue Ops[3] = { Chain, NegSize, FPSIdx };

SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);

- return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);

+ return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);

}

SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,

@@ -4692,6 +5191,55 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,

Op.getOperand(0), Op.getOperand(1));

}

+SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {

+ assert(Op.getValueType() == MVT::i1 &&

+ "Custom lowering only for i1 loads");

+ // First, load 8 bits into 32 bits, then truncate to 1 bit.

+ SDLoc dl(Op);

+ LoadSDNode *LD = cast<LoadSDNode>(Op);

+ SDValue Chain = LD->getChain();

+ SDValue BasePtr = LD->getBasePtr();

+ MachineMemOperand *MMO = LD->getMemOperand();

+ SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain,

+ BasePtr, MVT::i8, MMO);

+ SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);

+ SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };

+ return DAG.getMergeValues(Ops, dl);

+SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {

+ assert(Op.getOperand(1).getValueType() == MVT::i1 &&

+ "Custom lowering only for i1 stores");

+ // First, zero extend to 32 bits, then use a truncating store to 8 bits.

+ SDLoc dl(Op);

+ StoreSDNode *ST = cast<StoreSDNode>(Op);

+ SDValue Chain = ST->getChain();

+ SDValue BasePtr = ST->getBasePtr();

+ SDValue Value = ST->getValue();

+ MachineMemOperand *MMO = ST->getMemOperand();

+ Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value);

+ return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);

+// FIXME: Remove this once the ANDI glue bug is fixed:

+SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {

+ assert(Op.getValueType() == MVT::i1 &&

+ "Custom lowering only for i1 results");

+ SDLoc DL(Op);

+ return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,

+ Op.getOperand(0));

/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when

/// possible.

SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {

@@ -4805,12 +5353,12 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,

default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");

case MVT::i32:

Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :

- (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ :

+ (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ :

PPCISD::FCTIDZ),

dl, MVT::f64, Src);

break;

case MVT::i64:

- assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) &&

+ assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&

"i64 FP_TO_UINT is supported only with FPCVT");

Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :

PPCISD::FCTIDUZ,

@@ -4819,8 +5367,8 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,

}

// Convert the FP value to an int value through memory.

- bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() &&

- (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT());

+ bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&

+ (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());

SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);

int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();

MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);

@@ -4833,8 +5381,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,

MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);

SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };

Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,

- DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),

- MVT::i32, MMO);

+ DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);

} else

Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,

MPI, false, false, 0);

@@ -4858,17 +5405,22 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,

if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)

return SDValue();

- assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) &&

+ if (Op.getOperand(0).getValueType() == MVT::i1)

+ return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),

+ DAG.getConstantFP(1.0, Op.getValueType()),

+ DAG.getConstantFP(0.0, Op.getValueType()));

+ assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&

"UINT_TO_FP is supported only with FPCVT");

// If we have FCFIDS, then use it when converting to single-precision.

// Otherwise, convert to double-precision and then round.

- unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?

+ unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?

(Op.getOpcode() == ISD::UINT_TO_FP ?

PPCISD::FCFIDUS : PPCISD::FCFIDS) :

(Op.getOpcode() == ISD::UINT_TO_FP ?

PPCISD::FCFIDU : PPCISD::FCFID);

- MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?

+ MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?

MVT::f32 : MVT::f64;

if (Op.getOperand(0).getValueType() == MVT::i64) {

@@ -4884,7 +5436,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,

// However, if -enable-unsafe-fp-math is in effect, accept double

// rounding to avoid the extra overhead.

if (Op.getValueType() == MVT::f32 &&

- !PPCSubTarget.hasFPCVT() &&

+ !Subtarget.hasFPCVT() &&

!DAG.getTarget().Options.UnsafeFPMath) {

// Twiddle input to make sure the low 11 bits are zero. (If this

@@ -4922,7 +5474,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,

SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);

SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);

- if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())

+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())

FP = DAG.getNode(ISD::FP_ROUND, dl,

MVT::f32, FP, DAG.getIntPtrConstant(0));

return FP;

@@ -4939,7 +5491,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,

EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

SDValue Ld;

- if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) {

+ if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {

int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);

SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);

@@ -4956,9 +5508,9 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,

Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?

PPCISD::LFIWZX : PPCISD::LFIWAX,

dl, DAG.getVTList(MVT::f64, MVT::Other),

- Ops, 2, MVT::i32, MMO);

+ Ops, MVT::i32, MMO);

} else {

- assert(PPCSubTarget.isPPC64() &&

+ assert(Subtarget.isPPC64() &&

"i32->FP without LFIWAX supported only on PPC64");

int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);

@@ -4980,7 +5532,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,

// FCFID it and return it.

SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);

- if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())

+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())

FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));

return FP;

}

@@ -5010,14 +5562,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,

MachineFunction &MF = DAG.getMachineFunction();

EVT VT = Op.getValueType();

EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();

- SDValue MFFSreg, InFlag;

// Save FP Control Word to register

EVT NodeTys[] = {

MVT::f64, // return register

MVT::Glue // unused in this context

};

- SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);

+ SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);

// Save FP register to stack slot

int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);

@@ -5076,7 +5627,7 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {

SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);

SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);

SDValue OutOps[] = { OutLo, OutHi };

- return DAG.getMergeValues(OutOps, 2, dl);

+ return DAG.getMergeValues(OutOps, dl);

}

SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {

@@ -5105,7 +5656,7 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {

SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);

SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);

SDValue OutOps[] = { OutLo, OutHi };

- return DAG.getMergeValues(OutOps, 2, dl);

+ return DAG.getMergeValues(OutOps, dl);

}

SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {

@@ -5134,7 +5685,7 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {

SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),

Tmp4, Tmp6, ISD::SETLE);

SDValue OutOps[] = { OutLo, OutHi };

- return DAG.getMergeValues(OutOps, 2, dl);

+ return DAG.getMergeValues(OutOps, dl);

}

//===----------------------------------------------------------------------===//

@@ -5163,8 +5714,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,

SDValue Elt = DAG.getConstant(Val, MVT::i32);

SmallVector<SDValue, 8> Ops;

Ops.assign(CanonicalVT.getVectorNumElements(), Elt);

- SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,

- &Ops[0], Ops.size());

+ SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops);

return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);

}

@@ -5223,7 +5773,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,

SelectionDAG &DAG) const {

SDLoc dl(Op);

BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());

- assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");

+ assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");

// Check if this is a splat of a constant value.

APInt APSplatBits, APSplatUndef;

@@ -5271,10 +5821,14 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,

// we convert to a pseudo that will be expanded later into one of

// the above forms.

SDValue Elt = DAG.getConstant(SextVal, MVT::i32);

- EVT VT = Op.getValueType();

- int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4);

- SDValue EltSize = DAG.getConstant(Size, MVT::i32);

- return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);

+ EVT VT = (SplatSize == 1 ? MVT::v16i8 :

+ (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));

+ SDValue EltSize = DAG.getConstant(SplatSize, MVT::i32);

+ SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);

+ if (VT == Op.getValueType())

+ return RetVal;

+ else

+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);

}

// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is

@@ -5293,6 +5847,22 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,

return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);

}

+ // The remaining cases assume either big endian element order or

+ // a splat-size that equates to the element size of the vector

+ // to be built. An example that doesn't work for little endian is

+ // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits

+ // and a vector element size of 16 bits. The code below will

+ // produce the vector in big endian element order, which for little

+ // endian is {-1, 0, -1, 0, -1, 0, -1, 0}.

+ // For now, just avoid these optimizations in that case.

+ // FIXME: Develop correct optimizations for LE with mismatched

+ // splat and element sizes.

+ if (Subtarget.isLittleEndian() &&

+ SplatSize != Op.getValueType().getVectorElementType().getSizeInBits())

+ return SDValue();

// Check to see if this is a wide variety of vsplti*, binop self cases.

static const signed char SplatCsts[] = {

-1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,

@@ -5461,6 +6031,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,

SDValue V2 = Op.getOperand(1);

ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);

EVT VT = Op.getValueType();

+ bool isLittleEndian = Subtarget.isLittleEndian();

// Cases that are handled by instructions that take permute immediates

// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be

@@ -5469,15 +6040,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,

if (PPC::isSplatShuffleMask(SVOp, 1) ||

PPC::isSplatShuffleMask(SVOp, 2) ||

PPC::isSplatShuffleMask(SVOp, 4) ||

- PPC::isVPKUWUMShuffleMask(SVOp, true) ||

- PPC::isVPKUHUMShuffleMask(SVOp, true) ||

- PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||

- PPC::isVMRGLShuffleMask(SVOp, 1, true) ||

- PPC::isVMRGLShuffleMask(SVOp, 2, true) ||

- PPC::isVMRGLShuffleMask(SVOp, 4, true) ||

- PPC::isVMRGHShuffleMask(SVOp, 1, true) ||

- PPC::isVMRGHShuffleMask(SVOp, 2, true) ||

- PPC::isVMRGHShuffleMask(SVOp, 4, true)) {

+ PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||

+ PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||

+ PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||

+ PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||

+ PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||

+ PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||

+ PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||

+ PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||

+ PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) {

return Op;

}

@@ -5485,15 +6056,16 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,

// Altivec has a variety of "shuffle immediates" that take two vector inputs

// and produce a fixed permutation. If any of these match, do not lower to

// VPERM.

- if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||

- PPC::isVPKUHUMShuffleMask(SVOp, false) ||

- PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||

- PPC::isVMRGLShuffleMask(SVOp, 1, false) ||

- PPC::isVMRGLShuffleMask(SVOp, 2, false) ||

- PPC::isVMRGLShuffleMask(SVOp, 4, false) ||

- PPC::isVMRGHShuffleMask(SVOp, 1, false) ||

- PPC::isVMRGHShuffleMask(SVOp, 2, false) ||

- PPC::isVMRGHShuffleMask(SVOp, 4, false))

+ unsigned int ShuffleKind = isLittleEndian ? 2 : 0;

+ if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||

+ PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||

+ PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||

+ PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||

+ PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||

+ PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||

+ PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||

+ PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||

+ PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG))

return Op;

// Check to see if this is a shuffle of 4-byte values. If so, we can use our

@@ -5527,7 +6099,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,

// If this shuffle can be expressed as a shuffle of 4-byte elements, use the

// perfect shuffle vector to determine if it is cost effective to do this as

// discrete instructions, or whether we should use a vperm.

- if (isFourElementShuffle) {

+ // For now, we skip this for little endian until such time as we have a

+ // little-endian perfect shuffle table.

+ if (isFourElementShuffle && !isLittleEndian) {

// Compute the index in the perfect shuffle table.

unsigned PFTableIndex =

PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];

@@ -5556,6 +6130,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,

// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except

// that it is in input element units, not in bytes. Convert now.

+ // For little endian, the order of the input vectors is reversed, and

+ // the permutation mask is complemented with respect to 31. This is

+ // necessary to produce proper semantics with the big-endian-biased vperm

+ // instruction.

EVT EltVT = V1.getValueType().getVectorElementType();

unsigned BytesPerElement = EltVT.getSizeInBits()/8;

@@ -5564,13 +6143,22 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,

unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];

for (unsigned j = 0; j != BytesPerElement; ++j)

- ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,

- MVT::i32));

+ if (isLittleEndian)

+ ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j),

+ MVT::i32));

+ else

+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,

+ MVT::i32));

}

SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,

- &ResultMask[0], ResultMask.size());

- return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);

+ ResultMask);

+ if (isLittleEndian)

+ return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),

+ V2, V1, VPermMask);

+ else

+ return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),

+ V1, V2, VPermMask);

}

/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an

@@ -5644,7 +6232,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,

DAG.getConstant(CompareOpc, MVT::i32)

};

EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };

- SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);

+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);

// Now that we have the comparison, emit a copy from the CR to a GPR.

// This is flagged to the above dot comparison.

@@ -5685,6 +6273,30 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,

return Flags;

}

+SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,

+ SelectionDAG &DAG) const {

+ SDLoc dl(Op);

+ // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int

+ // instructions), but for smaller types, we need to first extend up to v2i32

+ // before doing going farther.

+ if (Op.getValueType() == MVT::v2i64) {

+ EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();

+ if (ExtVT != MVT::v2i32) {

+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));

+ Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,

+ DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),

+ ExtVT.getVectorElementType(), 4)));

+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);

+ Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,

+ DAG.getValueType(MVT::v2i32));

+ }

+ return Op;

+ }

+ return SDValue();

SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,

SelectionDAG &DAG) const {

SDLoc dl(Op);

@@ -5739,6 +6351,7 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {

LHS, RHS, Zero, DAG, dl);

} else if (Op.getValueType() == MVT::v16i8) {

SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);

+ bool isLittleEndian = Subtarget.isLittleEndian();

// Multiply the even 8-bit parts, producing 16-bit sums.

SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,

@@ -5750,13 +6363,24 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {

LHS, RHS, DAG, dl, MVT::v8i16);

OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);

- // Merge the results together.

+ // Merge the results together. Because vmuleub and vmuloub are

+ // instructions with a big-endian bias, we must reverse the

+ // element numbering and reverse the meaning of "odd" and "even"

+ // when generating little endian code.

int Ops[16];

for (unsigned i = 0; i != 8; ++i) {

- Ops[i*2 ] = 2*i+1;

- Ops[i*2+1] = 2*i+1+16;

+ if (isLittleEndian) {

+ Ops[i*2 ] = 2*i;

+ Ops[i*2+1] = 2*i+16;

+ } else {

+ Ops[i*2 ] = 2*i+1;

+ Ops[i*2+1] = 2*i+1+16;

+ }

}

- return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);

+ if (isLittleEndian)

+ return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);

+ else

+ return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);

} else {

llvm_unreachable("Unknown mul to lower!");

}

@@ -5776,21 +6400,24 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {

case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);

case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);

case ISD::VASTART:

- return LowerVASTART(Op, DAG, PPCSubTarget);

+ return LowerVASTART(Op, DAG, Subtarget);

case ISD::VAARG:

- return LowerVAARG(Op, DAG, PPCSubTarget);

+ return LowerVAARG(Op, DAG, Subtarget);

case ISD::VACOPY:

- return LowerVACOPY(Op, DAG, PPCSubTarget);

+ return LowerVACOPY(Op, DAG, Subtarget);

- case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);

+ case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget);

case ISD::DYNAMIC_STACKALLOC:

- return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);

+ return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget);

case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);

case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);

+ case ISD::LOAD: return LowerLOAD(Op, DAG);

+ case ISD::STORE: return LowerSTORE(Op, DAG);

+ case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);

case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);

case ISD::FP_TO_UINT:

case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,

@@ -5809,6 +6436,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {

case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);

case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);

case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);

+ case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);

case ISD::MUL: return LowerMUL(Op, DAG);

// For counter-based loop handling.

@@ -5852,7 +6480,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,

EVT VT = N->getValueType(0);

if (VT == MVT::i64) {

- SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget);

+ SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget);

Results.push_back(NewNode);

Results.push_back(NewNode.getValue(1));

@@ -5914,8 +6542,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,

F->insert(It, loopMBB);

F->insert(It, exitMBB);

exitMBB->splice(exitMBB->begin(), BB,

- llvm::next(MachineBasicBlock::iterator(MI)),

- BB->end());

+ std::next(MachineBasicBlock::iterator(MI)), BB->end());

exitMBB->transferSuccessorsAndUpdatePHIs(BB);

MachineRegisterInfo &RegInfo = F->getRegInfo();

@@ -5964,7 +6591,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,

// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address

// registers without caring whether they're 32 or 64, but here we're

// doing actual arithmetic on the addresses.

- bool is64bit = PPCSubTarget.isPPC64();

+ bool is64bit = Subtarget.isPPC64();

unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;

const BasicBlock *LLVM_BB = BB->getBasicBlock();

@@ -5983,8 +6610,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,

F->insert(It, loopMBB);

F->insert(It, exitMBB);

exitMBB->splice(exitMBB->begin(), BB,

- llvm::next(MachineBasicBlock::iterator(MI)),

- BB->end());

+ std::next(MachineBasicBlock::iterator(MI)), BB->end());

exitMBB->transferSuccessorsAndUpdatePHIs(BB);

MachineRegisterInfo &RegInfo = F->getRegInfo();

@@ -6136,7 +6762,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,

// Transfer the remainder of BB and its successor edges to sinkMBB.

sinkMBB->splice(sinkMBB->begin(), MBB,

- llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());

+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());

sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);

// Note that the structure of the jmp_buf used here is not compatible

@@ -6160,7 +6786,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,

unsigned LabelReg = MRI.createVirtualRegister(PtrRC);

unsigned BufReg = MI->getOperand(1).getReg();

- if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {

+ if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {

MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))

.addReg(PPC::X2)

.addImm(TOCOffset)

@@ -6173,12 +6799,12 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,

unsigned BaseReg;

if (MF->getFunction()->getAttributes().hasAttribute(

AttributeSet::FunctionIndex, Attribute::Naked))

- BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1;

+ BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;

else

- BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP;

+ BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;

MIB = BuildMI(*thisMBB, MI, DL,

- TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW))

+ TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))

.addReg(BaseReg)

.addImm(BPOffset)

.addReg(BufReg);

@@ -6202,10 +6828,10 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,

// mainMBB:

// mainDstReg = 0

MIB = BuildMI(mainMBB, DL,

- TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);

+ TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);

// Store IP

- if (PPCSubTarget.isPPC64()) {

+ if (Subtarget.isPPC64()) {

MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))

.addReg(LabelReg)

.addImm(LabelOffset)

@@ -6255,7 +6881,10 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,

// Since FP is only updated here but NOT referenced, it's treated as GPR.

unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;

unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;

- unsigned BP = (PVT == MVT::i64) ? PPC::X30 : PPC::R30;

+ unsigned BP = (PVT == MVT::i64) ? PPC::X30 :

+ (Subtarget.isSVR4ABI() &&

+ MF->getTarget().getRelocationModel() == Reloc::PIC_ ?

+ PPC::R29 : PPC::R30);

MachineInstrBuilder MIB;

@@ -6317,7 +6946,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,

MIB.setMemRefs(MMOBegin, MMOEnd);

// Reload TOC

- if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {

+ if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {

MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)

.addImm(TOCOffset)

.addReg(BufReg);

@@ -6355,10 +6984,16 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,

MachineFunction *F = BB->getParent();

- if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||

- MI->getOpcode() == PPC::SELECT_CC_I8)) {

+ if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||

+ MI->getOpcode() == PPC::SELECT_CC_I8 ||

+ MI->getOpcode() == PPC::SELECT_I4 ||

+ MI->getOpcode() == PPC::SELECT_I8)) {

SmallVector<MachineOperand, 2> Cond;

- Cond.push_back(MI->getOperand(4));

+ if (MI->getOpcode() == PPC::SELECT_CC_I4 ||

+ MI->getOpcode() == PPC::SELECT_CC_I8)

+ Cond.push_back(MI->getOperand(4));

+ else

+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));

Cond.push_back(MI->getOperand(1));

DebugLoc dl = MI->getDebugLoc();

@@ -6370,9 +7005,12 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,

MI->getOpcode() == PPC::SELECT_CC_I8 ||

MI->getOpcode() == PPC::SELECT_CC_F4 ||

MI->getOpcode() == PPC::SELECT_CC_F8 ||

- MI->getOpcode() == PPC::SELECT_CC_VRRC) {

+ MI->getOpcode() == PPC::SELECT_CC_VRRC ||

+ MI->getOpcode() == PPC::SELECT_I4 ||

+ MI->getOpcode() == PPC::SELECT_I8 ||

+ MI->getOpcode() == PPC::SELECT_F4 ||

+ MI->getOpcode() == PPC::SELECT_F8 ||

+ MI->getOpcode() == PPC::SELECT_VRRC) {

// The incoming instruction knows the destination vreg to set, the

// condition code register to branch on, the true/false values to

// select between, and a branch opcode to use.

@@ -6386,23 +7024,31 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,

MachineBasicBlock *thisMBB = BB;

MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);

MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);

- unsigned SelectPred = MI->getOperand(4).getImm();

DebugLoc dl = MI->getDebugLoc();

F->insert(It, copy0MBB);

F->insert(It, sinkMBB);

// Transfer the remainder of BB and its successor edges to sinkMBB.

sinkMBB->splice(sinkMBB->begin(), BB,

- llvm::next(MachineBasicBlock::iterator(MI)),

- BB->end());

+ std::next(MachineBasicBlock::iterator(MI)), BB->end());

sinkMBB->transferSuccessorsAndUpdatePHIs(BB);

// Next, add the true and fallthrough blocks as its successors.

BB->addSuccessor(copy0MBB);

BB->addSuccessor(sinkMBB);

- BuildMI(BB, dl, TII->get(PPC::BCC))

- .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);

+ if (MI->getOpcode() == PPC::SELECT_I4 ||

+ MI->getOpcode() == PPC::SELECT_I8 ||

+ MI->getOpcode() == PPC::SELECT_F4 ||

+ MI->getOpcode() == PPC::SELECT_F8 ||

+ MI->getOpcode() == PPC::SELECT_VRRC) {

+ BuildMI(BB, dl, TII->get(PPC::BC))

+ .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);

+ } else {

+ unsigned SelectPred = MI->getOperand(4).getImm();

+ BuildMI(BB, dl, TII->get(PPC::BCC))

+ .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);

+ }

// copy0MBB:

// %FalseValue = ...

@@ -6458,13 +7104,13 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,

BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);

else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)

- BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);

+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);

else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)

- BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);

+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);

else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)

- BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);

+ BB = EmitAtomicBinary(MI, BB, false, PPC::NAND);

else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)

- BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);

+ BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8);

else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)

BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);

@@ -6504,8 +7150,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,

F->insert(It, midMBB);

F->insert(It, exitMBB);

exitMBB->splice(exitMBB->begin(), BB,

- llvm::next(MachineBasicBlock::iterator(MI)),

- BB->end());

+ std::next(MachineBasicBlock::iterator(MI)), BB->end());

exitMBB->transferSuccessorsAndUpdatePHIs(BB);

// thisMBB:

@@ -6556,7 +7201,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,

// We must use 64-bit registers for addresses when targeting 64-bit,

// since we're actually doing arithmetic on them. Other registers

// can be 32-bit.

- bool is64bit = PPCSubTarget.isPPC64();

+ bool is64bit = Subtarget.isPPC64();

bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;

unsigned dest = MI->getOperand(0).getReg();

@@ -6575,8 +7220,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,

F->insert(It, midMBB);

F->insert(It, exitMBB);

exitMBB->splice(exitMBB->begin(), BB,

- llvm::next(MachineBasicBlock::iterator(MI)),

- BB->end());

+ std::next(MachineBasicBlock::iterator(MI)), BB->end());

exitMBB->transferSuccessorsAndUpdatePHIs(BB);

MachineRegisterInfo &RegInfo = F->getRegInfo();

@@ -6725,6 +7369,27 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,

// Restore FPSCR value.

BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);

+ } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||

+ MI->getOpcode() == PPC::ANDIo_1_GT_BIT ||

+ MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||

+ MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) {

+ unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||

+ MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ?

+ PPC::ANDIo8 : PPC::ANDIo;

+ bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||

+ MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8);

+ MachineRegisterInfo &RegInfo = F->getRegInfo();

+ unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?

+ &PPC::GPRCRegClass :

+ &PPC::G8RCRegClass);

+ DebugLoc dl = MI->getDebugLoc();

+ BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)

+ .addReg(MI->getOperand(1).getReg()).addImm(1);

+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),

+ MI->getOperand(0).getReg())

+ .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);

} else {

llvm_unreachable("Unexpected instr type to insert");

}

@@ -6744,9 +7409,10 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,

EVT VT = Op.getValueType();

- if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) ||

- (VT == MVT::f64 && PPCSubTarget.hasFRE()) ||

- (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {

+ if ((VT == MVT::f32 && Subtarget.hasFRES()) ||

+ (VT == MVT::f64 && Subtarget.hasFRE()) ||

+ (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||

+ (VT == MVT::v2f64 && Subtarget.hasVSX())) {

// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)

// For the reciprocal, we need to find the zero of the function:

@@ -6759,7 +7425,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,

// correct after every iteration. The minimum architected relative

// accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has

// 23 digits and double has 52 digits.

- int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;

+ int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;

if (VT.getScalarType() == MVT::f64)

++Iterations;

@@ -6806,9 +7472,10 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,

EVT VT = Op.getValueType();

- if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||

- (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) ||

- (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {

+ if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||

+ (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||

+ (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||

+ (VT == MVT::v2f64 && Subtarget.hasVSX())) {

// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)

// For the reciprocal sqrt, we need to find the zero of the function:

@@ -6821,7 +7488,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,

// correct after every iteration. The minimum architected relative

// accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has

// 23 digits and double has 52 digits.

- int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;

+ int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;

if (VT.getScalarType() == MVT::f64)

++Iterations;

@@ -6899,8 +7566,8 @@ static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,

return true;

const TargetLowering &TLI = DAG.getTargetLoweringInfo();

- const GlobalValue *GV1 = NULL;

- const GlobalValue *GV2 = NULL;

+ const GlobalValue *GV1 = nullptr;

+ const GlobalValue *GV2 = nullptr;

int64_t Offset1 = 0;

int64_t Offset2 = 0;

bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);

@@ -6938,10 +7605,9 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {

if (!Visited.count(ChainLD->getChain().getNode()))

Queue.push_back(ChainLD->getChain().getNode());

} else if (ChainNext->getOpcode() == ISD::TokenFactor) {

- for (SDNode::op_iterator O = ChainNext->op_begin(),

- OE = ChainNext->op_end(); O != OE; ++O)

- if (!Visited.count(O->getNode()))

- Queue.push_back(O->getNode());

+ for (const SDUse &O : ChainNext->ops())

+ if (!Visited.count(O.getNode()))

+ Queue.push_back(O.getNode());

} else

LoadRoots.insert(ChainNext);

}

@@ -6979,6 +7645,534 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {

return false;

}

+SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,

+ DAGCombinerInfo &DCI) const {

+ SelectionDAG &DAG = DCI.DAG;

+ SDLoc dl(N);

+ assert(Subtarget.useCRBits() &&

+ "Expecting to be tracking CR bits");

+ // If we're tracking CR bits, we need to be careful that we don't have:

+ // trunc(binary-ops(zext(x), zext(y)))

+ // or

+ // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)

+ // such that we're unnecessarily moving things into GPRs when it would be

+ // better to keep them in CR bits.

+ // Note that trunc here can be an actual i1 trunc, or can be the effective

+ // truncation that comes from a setcc or select_cc.

+ if (N->getOpcode() == ISD::TRUNCATE &&

+ N->getValueType(0) != MVT::i1)

+ return SDValue();

+ if (N->getOperand(0).getValueType() != MVT::i32 &&

+ N->getOperand(0).getValueType() != MVT::i64)

+ return SDValue();

+ if (N->getOpcode() == ISD::SETCC ||

+ N->getOpcode() == ISD::SELECT_CC) {

+ // If we're looking at a comparison, then we need to make sure that the

+ // high bits (all except for the first) don't matter the result.

+ ISD::CondCode CC =

+ cast<CondCodeSDNode>(N->getOperand(

+ N->getOpcode() == ISD::SETCC ? 2 : 4))->get();

+ unsigned OpBits = N->getOperand(0).getValueSizeInBits();

+ if (ISD::isSignedIntSetCC(CC)) {

+ if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||

+ DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)

+ return SDValue();

+ } else if (ISD::isUnsignedIntSetCC(CC)) {

+ if (!DAG.MaskedValueIsZero(N->getOperand(0),

+ APInt::getHighBitsSet(OpBits, OpBits-1)) ||

+ !DAG.MaskedValueIsZero(N->getOperand(1),

+ APInt::getHighBitsSet(OpBits, OpBits-1)))

+ return SDValue();

+ } else {

+ // This is neither a signed nor an unsigned comparison, just make sure

+ // that the high bits are equal.

+ APInt Op1Zero, Op1One;

+ APInt Op2Zero, Op2One;

+ DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One);

+ DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One);

+ // We don't really care about what is known about the first bit (if

+ // anything), so clear it in all masks prior to comparing them.

+ Op1Zero.clearBit(0); Op1One.clearBit(0);

+ Op2Zero.clearBit(0); Op2One.clearBit(0);

+ if (Op1Zero != Op2Zero || Op1One != Op2One)

+ return SDValue();

+ }

+ // We now know that the higher-order bits are irrelevant, we just need to

+ // make sure that all of the intermediate operations are bit operations, and

+ // all inputs are extensions.

+ if (N->getOperand(0).getOpcode() != ISD::AND &&

+ N->getOperand(0).getOpcode() != ISD::OR &&

+ N->getOperand(0).getOpcode() != ISD::XOR &&

+ N->getOperand(0).getOpcode() != ISD::SELECT &&

+ N->getOperand(0).getOpcode() != ISD::SELECT_CC &&

+ N->getOperand(0).getOpcode() != ISD::TRUNCATE &&

+ N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&

+ N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&

+ N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)

+ return SDValue();

+ if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&

+ N->getOperand(1).getOpcode() != ISD::AND &&

+ N->getOperand(1).getOpcode() != ISD::OR &&

+ N->getOperand(1).getOpcode() != ISD::XOR &&

+ N->getOperand(1).getOpcode() != ISD::SELECT &&

+ N->getOperand(1).getOpcode() != ISD::SELECT_CC &&

+ N->getOperand(1).getOpcode() != ISD::TRUNCATE &&

+ N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&

+ N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&

+ N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)

+ return SDValue();

+ SmallVector<SDValue, 4> Inputs;

+ SmallVector<SDValue, 8> BinOps, PromOps;

+ SmallPtrSet<SDNode *, 16> Visited;

+ for (unsigned i = 0; i < 2; ++i) {

+ if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||

+ N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||

+ N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&

+ N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||

+ isa<ConstantSDNode>(N->getOperand(i)))

+ Inputs.push_back(N->getOperand(i));

+ else

+ BinOps.push_back(N->getOperand(i));

+ if (N->getOpcode() == ISD::TRUNCATE)

+ break;

+ }

+ // Visit all inputs, collect all binary operations (and, or, xor and

+ // select) that are all fed by extensions.

+ while (!BinOps.empty()) {

+ SDValue BinOp = BinOps.back();

+ BinOps.pop_back();

+ if (!Visited.insert(BinOp.getNode()))

+ continue;

+ PromOps.push_back(BinOp);

+ for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {

+ // The condition of the select is not promoted.

+ if (BinOp.getOpcode() == ISD::SELECT && i == 0)

+ continue;

+ if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)

+ continue;

+ if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||

+ BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||

+ BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&

+ BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||

+ isa<ConstantSDNode>(BinOp.getOperand(i))) {

+ Inputs.push_back(BinOp.getOperand(i));

+ } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||

+ BinOp.getOperand(i).getOpcode() == ISD::OR ||

+ BinOp.getOperand(i).getOpcode() == ISD::XOR ||

+ BinOp.getOperand(i).getOpcode() == ISD::SELECT ||

+ BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||

+ BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||

+ BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||

+ BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||

+ BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {

+ BinOps.push_back(BinOp.getOperand(i));

+ } else {

+ // We have an input that is not an extension or another binary

+ // operation; we'll abort this transformation.

+ return SDValue();

+ }

+ // Make sure that this is a self-contained cluster of operations (which

+ // is not quite the same thing as saying that everything has only one

+ // use).

+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {

+ if (isa<ConstantSDNode>(Inputs[i]))

+ continue;

+ for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),

+ UE = Inputs[i].getNode()->use_end();

+ UI != UE; ++UI) {

+ SDNode *User = *UI;

+ if (User != N && !Visited.count(User))

+ return SDValue();

+ // Make sure that we're not going to promote the non-output-value

+ // operand(s) or SELECT or SELECT_CC.

+ // FIXME: Although we could sometimes handle this, and it does occur in

+ // practice that one of the condition inputs to the select is also one of

+ // the outputs, we currently can't deal with this.

+ if (User->getOpcode() == ISD::SELECT) {

+ if (User->getOperand(0) == Inputs[i])

+ return SDValue();

+ } else if (User->getOpcode() == ISD::SELECT_CC) {

+ if (User->getOperand(0) == Inputs[i] ||

+ User->getOperand(1) == Inputs[i])

+ return SDValue();

+ }

+ for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {

+ for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),

+ UE = PromOps[i].getNode()->use_end();

+ UI != UE; ++UI) {

+ SDNode *User = *UI;

+ if (User != N && !Visited.count(User))

+ return SDValue();

+ // Make sure that we're not going to promote the non-output-value

+ // operand(s) or SELECT or SELECT_CC.

+ // FIXME: Although we could sometimes handle this, and it does occur in

+ // practice that one of the condition inputs to the select is also one of

+ // the outputs, we currently can't deal with this.

+ if (User->getOpcode() == ISD::SELECT) {

+ if (User->getOperand(0) == PromOps[i])

+ return SDValue();

+ } else if (User->getOpcode() == ISD::SELECT_CC) {

+ if (User->getOperand(0) == PromOps[i] ||

+ User->getOperand(1) == PromOps[i])

+ return SDValue();

+ }

+ // Replace all inputs with the extension operand.

+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {

+ // Constants may have users outside the cluster of to-be-promoted nodes,

+ // and so we need to replace those as we do the promotions.

+ if (isa<ConstantSDNode>(Inputs[i]))

+ continue;

+ else

+ DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));

+ }

+ // Replace all operations (these are all the same, but have a different

+ // (i1) return type). DAG.getNode will validate that the types of

+ // a binary operator match, so go through the list in reverse so that

+ // we've likely promoted both operands first. Any intermediate truncations or

+ // extensions disappear.

+ while (!PromOps.empty()) {

+ SDValue PromOp = PromOps.back();

+ PromOps.pop_back();

+ if (PromOp.getOpcode() == ISD::TRUNCATE ||

+ PromOp.getOpcode() == ISD::SIGN_EXTEND ||

+ PromOp.getOpcode() == ISD::ZERO_EXTEND ||

+ PromOp.getOpcode() == ISD::ANY_EXTEND) {

+ if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&

+ PromOp.getOperand(0).getValueType() != MVT::i1) {

+ // The operand is not yet ready (see comment below).

+ PromOps.insert(PromOps.begin(), PromOp);

+ continue;

+ }

+ SDValue RepValue = PromOp.getOperand(0);

+ if (isa<ConstantSDNode>(RepValue))

+ RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);

+ DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);

+ continue;

+ }

+ unsigned C;

+ switch (PromOp.getOpcode()) {

+ default: C = 0; break;

+ case ISD::SELECT: C = 1; break;

+ case ISD::SELECT_CC: C = 2; break;

+ }

+ if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&

+ PromOp.getOperand(C).getValueType() != MVT::i1) ||

+ (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&

+ PromOp.getOperand(C+1).getValueType() != MVT::i1)) {

+ // The to-be-promoted operands of this node have not yet been

+ // promoted (this should be rare because we're going through the

+ // list backward, but if one of the operands has several users in

+ // this cluster of to-be-promoted nodes, it is possible).

+ PromOps.insert(PromOps.begin(), PromOp);

+ continue;

+ }

+ SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),

+ PromOp.getNode()->op_end());

+ // If there are any constant inputs, make sure they're replaced now.

+ for (unsigned i = 0; i < 2; ++i)

+ if (isa<ConstantSDNode>(Ops[C+i]))

+ Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);

+ DAG.ReplaceAllUsesOfValueWith(PromOp,

+ DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));

+ }

+ // Now we're left with the initial truncation itself.

+ if (N->getOpcode() == ISD::TRUNCATE)

+ return N->getOperand(0);

+ // Otherwise, this is a comparison. The operands to be compared have just

+ // changed type (to i1), but everything else is the same.

+ return SDValue(N, 0);

+SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,

+ DAGCombinerInfo &DCI) const {

+ SelectionDAG &DAG = DCI.DAG;

+ SDLoc dl(N);

+ // If we're tracking CR bits, we need to be careful that we don't have:

+ // zext(binary-ops(trunc(x), trunc(y)))

+ // or

+ // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)

+ // such that we're unnecessarily moving things into CR bits that can more

+ // efficiently stay in GPRs. Note that if we're not certain that the high

+ // bits are set as required by the final extension, we still may need to do

+ // some masking to get the proper behavior.

+ // This same functionality is important on PPC64 when dealing with

+ // 32-to-64-bit extensions; these occur often when 32-bit values are used as

+ // the return values of functions. Because it is so similar, it is handled

+ // here as well.

+ if (N->getValueType(0) != MVT::i32 &&

+ N->getValueType(0) != MVT::i64)

+ return SDValue();

+ if (!((N->getOperand(0).getValueType() == MVT::i1 &&

+ Subtarget.useCRBits()) ||

+ (N->getOperand(0).getValueType() == MVT::i32 &&

+ Subtarget.isPPC64())))

+ return SDValue();

+ if (N->getOperand(0).getOpcode() != ISD::AND &&

+ N->getOperand(0).getOpcode() != ISD::OR &&

+ N->getOperand(0).getOpcode() != ISD::XOR &&

+ N->getOperand(0).getOpcode() != ISD::SELECT &&

+ N->getOperand(0).getOpcode() != ISD::SELECT_CC)

+ return SDValue();

+ SmallVector<SDValue, 4> Inputs;

+ SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;

+ SmallPtrSet<SDNode *, 16> Visited;

+ // Visit all inputs, collect all binary operations (and, or, xor and

+ // select) that are all fed by truncations.

+ while (!BinOps.empty()) {

+ SDValue BinOp = BinOps.back();

+ BinOps.pop_back();

+ if (!Visited.insert(BinOp.getNode()))

+ continue;

+ PromOps.push_back(BinOp);

+ for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {

+ // The condition of the select is not promoted.

+ if (BinOp.getOpcode() == ISD::SELECT && i == 0)

+ continue;

+ if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)

+ continue;

+ if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||

+ isa<ConstantSDNode>(BinOp.getOperand(i))) {

+ Inputs.push_back(BinOp.getOperand(i));

+ } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||

+ BinOp.getOperand(i).getOpcode() == ISD::OR ||

+ BinOp.getOperand(i).getOpcode() == ISD::XOR ||

+ BinOp.getOperand(i).getOpcode() == ISD::SELECT ||

+ BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {

+ BinOps.push_back(BinOp.getOperand(i));

+ } else {

+ // We have an input that is not a truncation or another binary

+ // operation; we'll abort this transformation.

+ return SDValue();

+ }

+ // Make sure that this is a self-contained cluster of operations (which

+ // is not quite the same thing as saying that everything has only one

+ // use).

+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {

+ if (isa<ConstantSDNode>(Inputs[i]))

+ continue;

+ for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),

+ UE = Inputs[i].getNode()->use_end();

+ UI != UE; ++UI) {

+ SDNode *User = *UI;

+ if (User != N && !Visited.count(User))

+ return SDValue();

+ // Make sure that we're not going to promote the non-output-value

+ // operand(s) or SELECT or SELECT_CC.

+ // FIXME: Although we could sometimes handle this, and it does occur in

+ // practice that one of the condition inputs to the select is also one of

+ // the outputs, we currently can't deal with this.

+ if (User->getOpcode() == ISD::SELECT) {

+ if (User->getOperand(0) == Inputs[i])

+ return SDValue();

+ } else if (User->getOpcode() == ISD::SELECT_CC) {

+ if (User->getOperand(0) == Inputs[i] ||

+ User->getOperand(1) == Inputs[i])

+ return SDValue();

+ }

+ for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {

+ for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),

+ UE = PromOps[i].getNode()->use_end();

+ UI != UE; ++UI) {

+ SDNode *User = *UI;

+ if (User != N && !Visited.count(User))

+ return SDValue();

+ // Make sure that we're not going to promote the non-output-value

+ // operand(s) or SELECT or SELECT_CC.

+ // FIXME: Although we could sometimes handle this, and it does occur in

+ // practice that one of the condition inputs to the select is also one of

+ // the outputs, we currently can't deal with this.

+ if (User->getOpcode() == ISD::SELECT) {

+ if (User->getOperand(0) == PromOps[i])

+ return SDValue();

+ } else if (User->getOpcode() == ISD::SELECT_CC) {

+ if (User->getOperand(0) == PromOps[i] ||

+ User->getOperand(1) == PromOps[i])

+ return SDValue();

+ }

+ unsigned PromBits = N->getOperand(0).getValueSizeInBits();

+ bool ReallyNeedsExt = false;

+ if (N->getOpcode() != ISD::ANY_EXTEND) {

+ // If all of the inputs are not already sign/zero extended, then

+ // we'll still need to do that at the end.

+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {

+ if (isa<ConstantSDNode>(Inputs[i]))

+ continue;

+ unsigned OpBits =

+ Inputs[i].getOperand(0).getValueSizeInBits();

+ assert(PromBits < OpBits && "Truncation not to a smaller bit count?");

+ if ((N->getOpcode() == ISD::ZERO_EXTEND &&

+ !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),

+ APInt::getHighBitsSet(OpBits,

+ OpBits-PromBits))) ||

+ (N->getOpcode() == ISD::SIGN_EXTEND &&

+ DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <

+ (OpBits-(PromBits-1)))) {

+ ReallyNeedsExt = true;

+ break;

+ }

+ // Replace all inputs, either with the truncation operand, or a

+ // truncation or extension to the final output type.

+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {

+ // Constant inputs need to be replaced with the to-be-promoted nodes that

+ // use them because they might have users outside of the cluster of

+ // promoted nodes.

+ if (isa<ConstantSDNode>(Inputs[i]))

+ continue;

+ SDValue InSrc = Inputs[i].getOperand(0);

+ if (Inputs[i].getValueType() == N->getValueType(0))

+ DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);

+ else if (N->getOpcode() == ISD::SIGN_EXTEND)

+ DAG.ReplaceAllUsesOfValueWith(Inputs[i],

+ DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));

+ else if (N->getOpcode() == ISD::ZERO_EXTEND)

+ DAG.ReplaceAllUsesOfValueWith(Inputs[i],

+ DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));

+ else

+ DAG.ReplaceAllUsesOfValueWith(Inputs[i],

+ DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));

+ }

+ // Replace all operations (these are all the same, but have a different

+ // (promoted) return type). DAG.getNode will validate that the types of

+ // a binary operator match, so go through the list in reverse so that

+ // we've likely promoted both operands first.

+ while (!PromOps.empty()) {

+ SDValue PromOp = PromOps.back();

+ PromOps.pop_back();

+ unsigned C;

+ switch (PromOp.getOpcode()) {

+ default: C = 0; break;

+ case ISD::SELECT: C = 1; break;

+ case ISD::SELECT_CC: C = 2; break;

+ }

+ if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&

+ PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||

+ (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&

+ PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {

+ // The to-be-promoted operands of this node have not yet been

+ // promoted (this should be rare because we're going through the

+ // list backward, but if one of the operands has several users in

+ // this cluster of to-be-promoted nodes, it is possible).

+ PromOps.insert(PromOps.begin(), PromOp);

+ continue;

+ }

+ SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),

+ PromOp.getNode()->op_end());

+ // If this node has constant inputs, then they'll need to be promoted here.

+ for (unsigned i = 0; i < 2; ++i) {

+ if (!isa<ConstantSDNode>(Ops[C+i]))

+ continue;

+ if (Ops[C+i].getValueType() == N->getValueType(0))

+ continue;

+ if (N->getOpcode() == ISD::SIGN_EXTEND)

+ Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));

+ else if (N->getOpcode() == ISD::ZERO_EXTEND)

+ Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));

+ else

+ Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));

+ }

+ DAG.ReplaceAllUsesOfValueWith(PromOp,

+ DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));

+ }

+ // Now we're left with the initial extension itself.

+ if (!ReallyNeedsExt)

+ return N->getOperand(0);

+ // To zero extend, just mask off everything except for the first bit (in the

+ // i1 case).

+ if (N->getOpcode() == ISD::ZERO_EXTEND)

+ return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),

+ DAG.getConstant(APInt::getLowBitsSet(

+ N->getValueSizeInBits(0), PromBits),

+ N->getValueType(0)));

+ assert(N->getOpcode() == ISD::SIGN_EXTEND &&

+ "Invalid extension type");

+ EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0));

+ SDValue ShiftCst =

+ DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy);

+ return DAG.getNode(ISD::SRA, dl, N->getValueType(0),

+ DAG.getNode(ISD::SHL, dl, N->getValueType(0),

+ N->getOperand(0), ShiftCst), ShiftCst);

SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

DAGCombinerInfo &DCI) const {

const TargetMachine &TM = getTargetMachine();

@@ -7005,6 +8199,14 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

return N->getOperand(0);

}

break;

+ case ISD::SIGN_EXTEND:

+ case ISD::ZERO_EXTEND:

+ case ISD::ANY_EXTEND:

+ return DAGCombineExtBoolTrunc(N, DCI);

+ case ISD::TRUNCATE:

+ case ISD::SETCC:

+ case ISD::SELECT_CC:

+ return DAGCombineTruncBoolExt(N, DCI);

case ISD::FDIV: {

assert(TM.Options.UnsafeFPMath &&

"Reciprocal estimates require UnsafeFPMath");

@@ -7012,7 +8214,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

if (N->getOperand(1).getOpcode() == ISD::FSQRT) {

SDValue RV =

DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);

- if (RV.getNode() != 0) {

+ if (RV.getNode()) {

DCI.AddToWorklist(RV.getNode());

return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),

N->getOperand(0), RV);

@@ -7022,7 +8224,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

SDValue RV =

DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),

DCI);

- if (RV.getNode() != 0) {

+ if (RV.getNode()) {

DCI.AddToWorklist(RV.getNode());

RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)),

N->getValueType(0), RV);

@@ -7035,7 +8237,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

SDValue RV =

DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),

DCI);

- if (RV.getNode() != 0) {

+ if (RV.getNode()) {

DCI.AddToWorklist(RV.getNode());

RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)),

N->getValueType(0), RV,

@@ -7047,7 +8249,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

}

SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);

- if (RV.getNode() != 0) {

+ if (RV.getNode()) {

DCI.AddToWorklist(RV.getNode());

return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),

N->getOperand(0), RV);

@@ -7062,12 +8264,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

// Compute this as 1/(1/sqrt(X)), which is the reciprocal of the

// reciprocal sqrt.

SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);

- if (RV.getNode() != 0) {

+ if (RV.getNode()) {

DCI.AddToWorklist(RV.getNode());

RV = DAGCombineFastRecip(RV, DCI);

- if (RV.getNode() != 0) {

- // Unfortunately, RV is now NaN if the input was exactly 0. Select out

- // this case and force the answer to 0.

+ if (RV.getNode()) {

+ // Unfortunately, RV is now NaN if the input was exactly 0. Select out

+ // this case and force the answer to 0.

EVT VT = RV.getValueType();

@@ -7143,7 +8345,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

};

Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,

- DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),

+ DAG.getVTList(MVT::Other), Ops,

cast<StoreSDNode>(N)->getMemoryVT(),

cast<StoreSDNode>(N)->getMemOperand());

DCI.AddToWorklist(Val.getNode());

@@ -7170,8 +8372,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

};

return

DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),

- Ops, array_lengthof(Ops),

- cast<StoreSDNode>(N)->getMemoryVT(),

+ Ops, cast<StoreSDNode>(N)->getMemoryVT(),

cast<StoreSDNode>(N)->getMemOperand());

}

break;

@@ -7188,6 +8389,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

// This is a type-legal unaligned Altivec load.

SDValue Chain = LD->getChain();

SDValue Ptr = LD->getBasePtr();

+ bool isLittleEndian = Subtarget.isLittleEndian();

// This implements the loading of unaligned vectors as described in

// the venerable Apple Velocity Engine overview. Specifically:

@@ -7195,25 +8397,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

// https://developer.apple.com/hardwaredrivers/ve/code_optimization.html

// The general idea is to expand a sequence of one or more unaligned

- // loads into a alignment-based permutation-control instruction (lvsl),

- // a series of regular vector loads (which always truncate their

- // input address to an aligned address), and a series of permutations.

- // The results of these permutations are the requested loaded values.

- // The trick is that the last "extra" load is not taken from the address

- // you might suspect (sizeof(vector) bytes after the last requested

- // load), but rather sizeof(vector) - 1 bytes after the last

- // requested vector. The point of this is to avoid a page fault if the

- // base address happend to be aligned. This works because if the base

- // address is aligned, then adding less than a full vector length will

- // cause the last vector in the sequence to be (re)loaded. Otherwise,

- // the next vector will be fetched as you might suspect was necessary.

+ // loads into an alignment-based permutation-control instruction (lvsl

+ // or lvsr), a series of regular vector loads (which always truncate

+ // their input address to an aligned address), and a series of

+ // permutations. The results of these permutations are the requested

+ // loaded values. The trick is that the last "extra" load is not taken

+ // from the address you might suspect (sizeof(vector) bytes after the

+ // last requested load), but rather sizeof(vector) - 1 bytes after the

+ // last requested vector. The point of this is to avoid a page fault if

+ // the base address happened to be aligned. This works because if the

+ // base address is aligned, then adding less than a full vector length

+ // will cause the last vector in the sequence to be (re)loaded.

+ // Otherwise, the next vector will be fetched as you might suspect was

+ // necessary.

// We might be able to reuse the permutation generation from

// a different base address offset from this one by an aligned amount.

// The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this

// optimization later.

- SDValue PermCntl = BuildIntrinsicOp(Intrinsic::ppc_altivec_lvsl, Ptr,

- DAG, dl, MVT::v16i8);

+ Intrinsic::ID Intr = (isLittleEndian ?

+ Intrinsic::ppc_altivec_lvsr :

+ Intrinsic::ppc_altivec_lvsl);

+ SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8);

// Refine the alignment of the original load (a "new" load created here

// which was identical to the first except for the alignment would be

@@ -7262,8 +8467,18 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

if (ExtraLoad.getValueType() != MVT::v4i32)

ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad);

- SDValue Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,

- BaseLoad, ExtraLoad, PermCntl, DAG, dl);

+ // Because vperm has a big-endian bias, we must reverse the order

+ // of the input vectors and complement the permute control vector

+ // when generating little endian code. We have already handled the

+ // latter by using lvsr instead of lvsl, so just reverse BaseLoad

+ // and ExtraLoad here.

+ SDValue Perm;

+ if (isLittleEndian)

+ Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,

+ ExtraLoad, BaseLoad, PermCntl, DAG, dl);

+ else

+ Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,

+ BaseLoad, ExtraLoad, PermCntl, DAG, dl);

if (VT != MVT::v4i32)

Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);

@@ -7288,24 +8503,26 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

++UI;

SmallVector<SDValue, 8> Ops;

- for (SDNode::op_iterator O = User->op_begin(),

- OE = User->op_end(); O != OE; ++O) {

- if (*O == Use)

+ for (const SDUse &O : User->ops()) {

+ if (O == Use)

Ops.push_back(To);

else

- Ops.push_back(*O);

+ Ops.push_back(O);

}

- DAG.UpdateNodeOperands(User, Ops.data(), Ops.size());

+ DAG.UpdateNodeOperands(User, Ops);

}

return SDValue(N, 0);

}

break;

- case ISD::INTRINSIC_WO_CHAIN:

- if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() ==

- Intrinsic::ppc_altivec_lvsl &&

+ case ISD::INTRINSIC_WO_CHAIN: {

+ bool isLittleEndian = Subtarget.isLittleEndian();

+ Intrinsic::ID Intr = (isLittleEndian ?

+ Intrinsic::ppc_altivec_lvsr :

+ Intrinsic::ppc_altivec_lvsl);

+ if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr &&

N->getOperand(1)->getOpcode() == ISD::ADD) {

SDValue Add = N->getOperand(1);

@@ -7317,8 +8534,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

UE = BasePtr->use_end(); UI != UE; ++UI) {

if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&

cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==

- Intrinsic::ppc_altivec_lvsl) {

- // We've found another LVSL, and this address if an aligned

+ Intr) {

+ // We've found another LVSL/LVSR, and this address is an aligned

// multiple of that one. The results will be the same, so use the

// one we've just found instead.

@@ -7327,6 +8544,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

}

+ }

break;

case ISD::BSWAP:

@@ -7349,7 +8567,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,

DAG.getVTList(N->getValueType(0) == MVT::i64 ?

MVT::i64 : MVT::i32, MVT::Other),

- Ops, 3, LD->getMemoryVT(), LD->getMemOperand());

+ Ops, LD->getMemoryVT(), LD->getMemOperand());

// If this is an i16 load, insert the truncate.

SDValue ResVal = BSLoad;

@@ -7379,7 +8597,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

!N->getOperand(2).hasOneUse()) {

// Scan all of the users of the LHS, looking for VCMPo's that match.

- SDNode *VCMPoNode = 0;

+ SDNode *VCMPoNode = nullptr;

SDNode *LHSN = N->getOperand(0).getNode();

for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();

@@ -7400,9 +8618,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

// Look at the (necessarily single) use of the flag value. If it has a

// chain, this transformation is more complex. Note that multiple things

// could use the value result, which we should ignore.

- SDNode *FlagUser = 0;

+ SDNode *FlagUser = nullptr;

for (SDNode::use_iterator UI = VCMPoNode->use_begin();

- FlagUser == 0; ++UI) {

+ FlagUser == nullptr; ++UI) {

assert(UI != VCMPoNode->use_end() && "Didn't find user!");

SDNode *User = *UI;

for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {

@@ -7420,6 +8638,25 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

}

break;

}

+ case ISD::BRCOND: {

+ SDValue Cond = N->getOperand(1);

+ SDValue Target = N->getOperand(2);

+ if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&

+ cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==

+ Intrinsic::ppc_is_decremented_ctr_nonzero) {

+ // We now need to make the intrinsic dead (it cannot be instruction

+ // selected).

+ DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));

+ assert(Cond.getNode()->hasOneUse() &&

+ "Counter decrement has more than one use");

+ return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,

+ N->getOperand(0), Target);

+ }

+ break;

case ISD::BR_CC: {

// If this is a branch on an altivec predicate comparison, lower this so

// that we don't have to do a MFOCRF: instead, branch directly on CR6. This

@@ -7488,7 +8725,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

DAG.getConstant(CompareOpc, MVT::i32)

};

EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };

- SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);

+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);

// Unpack the result based on how the target uses it.

PPC::Predicate CompOpc;

@@ -7524,11 +8761,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,

// Inline Assembly Support

//===----------------------------------------------------------------------===//

-void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,

- APInt &KnownZero,

- APInt &KnownOne,

- const SelectionDAG &DAG,

- unsigned Depth) const {

+void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,

+ APInt &KnownZero,

+ APInt &KnownOne,

+ const SelectionDAG &DAG,

+ unsigned Depth) const {

KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);

switch (Op.getOpcode()) {

default: break;

@@ -7584,6 +8821,11 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const {

// suboptimal.

return C_Memory;

}

+ } else if (Constraint == "wc") { // individual CR bits.

+ return C_RegisterClass;

+ } else if (Constraint == "wa" || Constraint == "wd" ||

+ Constraint == "wf" || Constraint == "ws") {

+ return C_RegisterClass; // VSX registers.

}

return TargetLowering::getConstraintType(Constraint);

}

@@ -7598,10 +8840,21 @@ PPCTargetLowering::getSingleConstraintMatchWeight(

Value *CallOperandVal = info.CallOperandVal;

// If we don't have a value, we can't do a match,

// but allow it at the lowest weight.

- if (CallOperandVal == NULL)

+ if (!CallOperandVal)

return CW_Default;

Type *type = CallOperandVal->getType();

// Look at the constraint type.

+ if (StringRef(constraint) == "wc" && type->isIntegerTy(1))

+ return CW_Register; // an individual CR bit.

+ else if ((StringRef(constraint) == "wa" ||

+ StringRef(constraint) == "wd" ||

+ StringRef(constraint) == "wf") &&

+ type->isVectorTy())

+ return CW_Register;

+ else if (StringRef(constraint) == "ws" && type->isDoubleTy())

+ return CW_Register;

switch (*constraint) {

default:

weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);

@@ -7639,11 +8892,11 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,

// GCC RS6000 Constraint Letters

switch (Constraint[0]) {

case 'b': // R1-R31

- if (VT == MVT::i64 && PPCSubTarget.isPPC64())

+ if (VT == MVT::i64 && Subtarget.isPPC64())

return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);

return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);

case 'r': // R0-R31

- if (VT == MVT::i64 && PPCSubTarget.isPPC64())

+ if (VT == MVT::i64 && Subtarget.isPPC64())

return std::make_pair(0U, &PPC::G8RCRegClass);

return std::make_pair(0U, &PPC::GPRCRegClass);

case 'f':

@@ -7657,6 +8910,13 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,

case 'y': // crrc

return std::make_pair(0U, &PPC::CRRCRegClass);

}

+ } else if (Constraint == "wc") { // an individual CR bit.

+ return std::make_pair(0U, &PPC::CRBITRCRegClass);

+ } else if (Constraint == "wa" || Constraint == "wd" ||

+ Constraint == "wf") {

+ return std::make_pair(0U, &PPC::VSRCRegClass);

+ } else if (Constraint == "ws") {

+ return std::make_pair(0U, &PPC::VSFRCRegClass);

}

std::pair<unsigned, const TargetRegisterClass*> R =

@@ -7668,7 +8928,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,

// register.

// FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use

// the AsmName field from *RegisterInfo.td, then this would not be necessary.

- if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() &&

+ if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&

PPC::GPRCRegClass.contains(R.first)) {

const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();

return std::make_pair(TRI->getMatchingSuperReg(R.first,

@@ -7686,7 +8946,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,

std::string &Constraint,

std::vector<SDValue>&Ops,

SelectionDAG &DAG) const {

- SDValue Result(0,0);

+ SDValue Result;

// Only support length 1 constraints.

if (Constraint.length() > 1) return;

@@ -7792,6 +9052,9 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,

MachineFrameInfo *MFI = MF.getFrameInfo();

MFI->setReturnAddressIsTaken(true);

+ if (verifyReturnAddressArgumentIsConstant(Op, DAG))

+ return SDValue();

SDLoc dl(Op);

unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();

@@ -7799,8 +9062,8 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,

// the stack.

PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();

FuncInfo->setLRStoreRequired();

- bool isPPC64 = PPCSubTarget.isPPC64();

- bool isDarwinABI = PPCSubTarget.isDarwinABI();

+ bool isPPC64 = Subtarget.isPPC64();

+ bool isDarwinABI = Subtarget.isDarwinABI();

if (Depth > 0) {

SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);

@@ -7850,6 +9113,30 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,

return FrameAddr;

}

+// FIXME? Maybe this could be a TableGen attribute on some registers and

+// this table could be generated automatically from RegInfo.

+unsigned PPCTargetLowering::getRegisterByName(const char* RegName,

+ EVT VT) const {

+ bool isPPC64 = Subtarget.isPPC64();

+ bool isDarwinABI = Subtarget.isDarwinABI();

+ if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||

+ (!isPPC64 && VT != MVT::i32))

+ report_fatal_error("Invalid register global variable type");

+ bool is64Bit = isPPC64 && VT == MVT::i64;

+ unsigned Reg = StringSwitch<unsigned>(RegName)

+ .Case("r1", is64Bit ? PPC::X1 : PPC::R1)

+ .Case("r2", isDarwinABI ? 0 : (is64Bit ? PPC::X2 : PPC::R2))

+ .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :

+ (is64Bit ? PPC::X13 : PPC::R13))

+ .Default(0);

+ if (Reg)

+ return Reg;

+ report_fatal_error("Invalid register name global variable");

bool

PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {

// The PowerPC target isn't yet aware of offsets.

@@ -7872,14 +9159,51 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,

bool IsMemset, bool ZeroMemset,

bool MemcpyStrSrc,

MachineFunction &MF) const {

- if (this->PPCSubTarget.isPPC64()) {

+ if (Subtarget.isPPC64()) {

return MVT::i64;

} else {

return MVT::i32;

}

+/// \brief Returns true if it is beneficial to convert a load of a constant

+/// to just the constant itself.

+bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,

+ Type *Ty) const {

+ assert(Ty->isIntegerTy());

+ unsigned BitSize = Ty->getPrimitiveSizeInBits();

+ if (BitSize == 0 || BitSize > 64)

+ return false;

+ return true;

+bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {

+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())

+ return false;

+ unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();

+ unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();

+ return NumBits1 == 64 && NumBits2 == 32;

+bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {

+ if (!VT1.isInteger() || !VT2.isInteger())

+ return false;

+ unsigned NumBits1 = VT1.getSizeInBits();

+ unsigned NumBits2 = VT2.getSizeInBits();

+ return NumBits1 == 64 && NumBits2 == 32;

+bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {

+ return isInt<16>(Imm) || isUInt<16>(Imm);

+bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {

+ return isInt<16>(Imm) || isUInt<16>(Imm);

bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,

+ unsigned,

bool *Fast) const {

if (DisablePPCUnaligned)

return false;

@@ -7893,8 +9217,14 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,

if (!VT.isSimple())

return false;

- if (VT.getSimpleVT().isVector())

- return false;

+ if (VT.getSimpleVT().isVector()) {

+ if (Subtarget.hasVSX()) {

+ if (VT != MVT::v2f64 && VT != MVT::v2i64)

+ return false;

+ } else {

+ return false;

+ }

if (VT == MVT::ppcf128)

return false;

@@ -7922,8 +9252,17 @@ bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {

return false;

}

+bool

+PPCTargetLowering::shouldExpandBuildVectorWithShuffles(

+ EVT VT , unsigned DefinedValues) const {

+ if (VT == MVT::v2i64)

+ return false;

+ return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);

Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {

- if (DisableILPPref || PPCSubTarget.enableMachineScheduler())

+ if (DisableILPPref || Subtarget.enableMachineScheduler())

return TargetLowering::getSchedulingPreference(N);

return Sched::ILP;