summaryrefslogtreecommitdiff
path: root/lib/Target/PowerPC
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-05-16 19:46:52 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-05-16 19:46:52 +0000
commit6b3f41ed88e8e440e11a4fbf20b6600529f80049 (patch)
tree928b056f24a634d628c80238dbbf10d41b1a71d5 /lib/Target/PowerPC
parentc46e6a5940c50058e00c0c5f9123fd82e338d29a (diff)
Notes
Diffstat (limited to 'lib/Target/PowerPC')
-rw-r--r--lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp17
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp3
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp3
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp255
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp98
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h29
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td28
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td40
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td13
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td2
-rw-r--r--lib/Target/PowerPC/PPCTLSDynamicCall.cpp3
11 files changed, 439 insertions, 52 deletions
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 12ffbfdeacc12..11d22377611bf 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -204,6 +204,17 @@ static const unsigned G8Regs[] = {
PPC::X28, PPC::X29, PPC::X30, PPC::X31
};
+static const unsigned G80Regs[] = {
+ PPC::ZERO8, PPC::X1, PPC::X2, PPC::X3,
+ PPC::X4, PPC::X5, PPC::X6, PPC::X7,
+ PPC::X8, PPC::X9, PPC::X10, PPC::X11,
+ PPC::X12, PPC::X13, PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31
+};
+
static const unsigned QFRegs[] = {
PPC::QF0, PPC::QF1, PPC::QF2, PPC::QF3,
PPC::QF4, PPC::QF5, PPC::QF6, PPC::QF7,
@@ -301,6 +312,12 @@ static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, G8Regs);
}
+static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, G80Regs);
+}
+
#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 609d959c6d08f..84bb9ec568009 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -95,7 +95,8 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
- if (MI->getOpcode() == PPC::RLDICR) {
+ if (MI->getOpcode() == PPC::RLDICR ||
+ MI->getOpcode() == PPC::RLDICR_32) {
unsigned char SH = MI->getOperand(2).getImm();
unsigned char ME = MI->getOperand(3).getImm();
// rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 9b91b9ab8f827..2fc8654deeab9 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -1330,7 +1330,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
// Issue CALLSEQ_START.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TII.getCallFrameSetupOpcode()))
- .addImm(NumBytes);
+ .addImm(NumBytes).addImm(0);
// Prepare to assign register arguments. Every argument uses up a
// GPR protocol register even if it's passed in a floating-point
@@ -2246,6 +2246,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
}
case PPC::EXTSW:
+ case PPC::EXTSW_32:
case PPC::EXTSW_32_64: {
if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
return false;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 1b0402bf003d9..5fa7b2c6bfb1b 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -54,6 +54,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/Statistic.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -68,6 +69,14 @@ using namespace llvm;
#define DEBUG_TYPE "ppc-codegen"
+STATISTIC(NumSextSetcc,
+ "Number of (sext(setcc)) nodes expanded into GPR sequence.");
+STATISTIC(NumZextSetcc,
+ "Number of (zext(setcc)) nodes expanded into GPR sequence.");
+STATISTIC(SignExtensionsAdded,
+ "Number of sign extensions for compare inputs added.");
+STATISTIC(ZeroExtensionsAdded,
+ "Number of zero extensions for compare inputs added.");
// FIXME: Remove this once the bug has been fixed!
cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
@@ -252,7 +261,28 @@ namespace {
#include "PPCGenDAGISel.inc"
private:
+ // Conversion type for interpreting results of a 32-bit instruction as
+ // a 64-bit value or vice versa.
+ enum ExtOrTruncConversion { Ext, Trunc };
+
+ // Modifiers to guide how an ISD::SETCC node's result is to be computed
+ // in a GPR.
+ // ZExtOrig - use the original condition code, zero-extend value
+ // ZExtInvert - invert the condition code, zero-extend value
+ // SExtOrig - use the original condition code, sign-extend value
+ // SExtInvert - invert the condition code, sign-extend value
+ enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
+
bool trySETCC(SDNode *N);
+ bool tryEXTEND(SDNode *N);
+ SDValue signExtendInputIfNeeded(SDValue Input);
+ SDValue zeroExtendInputIfNeeded(SDValue Input);
+ SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
+ SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ int64_t RHSValue, SDLoc dl);
+ SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ int64_t RHSValue, SDLoc dl);
+ SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
void PeepholePPC64();
void PeepholePPC64ZExt();
@@ -2471,6 +2501,225 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
return true;
}
+/// If this node is a sign/zero extension of an integer comparison,
+/// it can usually be computed in GPR's rather than using comparison
+/// instructions and ISEL. We only do this on 64-bit targets for now
+/// as the code is specialized for 64-bit (it uses 64-bit instructions
+/// and assumes 64-bit registers).
+bool PPCDAGToDAGISel::tryEXTEND(SDNode *N) {
+ if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
+ return false;
+ assert((N->getOpcode() == ISD::ZERO_EXTEND ||
+ N->getOpcode() == ISD::SIGN_EXTEND) &&
+ "Expecting a zero/sign extend node!");
+
+ if (N->getOperand(0).getOpcode() != ISD::SETCC)
+ return false;
+
+ SDValue WideRes =
+ getSETCCInGPR(N->getOperand(0),
+ N->getOpcode() == ISD::SIGN_EXTEND ?
+ SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
+
+ if (!WideRes)
+ return false;
+
+ SDLoc dl(N);
+ bool Inputs32Bit = N->getOperand(0).getOperand(0).getValueType() == MVT::i32;
+ bool Output32Bit = N->getValueType(0) == MVT::i32;
+
+ NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
+ NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
+
+ SDValue ConvOp = WideRes;
+ if (Inputs32Bit != Output32Bit)
+ ConvOp = addExtOrTrunc(WideRes, Inputs32Bit ? ExtOrTruncConversion::Ext :
+ ExtOrTruncConversion::Trunc);
+ ReplaceNode(N, ConvOp.getNode());
+
+ return true;
+}
+
+/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
+/// Useful when emitting comparison code for 32-bit values without using
+/// the compare instruction (which only considers the lower 32-bits).
+SDValue PPCDAGToDAGISel::signExtendInputIfNeeded(SDValue Input) {
+ assert(Input.getValueType() == MVT::i32 &&
+ "Can only sign-extend 32-bit values here.");
+ unsigned Opc = Input.getOpcode();
+
+ // The value was sign extended and then truncated to 32-bits. No need to
+ // sign extend it again.
+ if (Opc == ISD::TRUNCATE &&
+ (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
+ Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
+ return Input;
+
+ LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
+ // The input is a sign-extending load. No reason to sign-extend.
+ if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
+ return Input;
+
+ ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
+ // We don't sign-extend constants and already sign-extended values.
+ if (InputConst || Opc == ISD::AssertSext || Opc == ISD::SIGN_EXTEND_INREG ||
+ Opc == ISD::SIGN_EXTEND)
+ return Input;
+
+ SDLoc dl(Input);
+ SignExtensionsAdded++;
+ return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32, dl, MVT::i32, Input), 0);
+}
+
+/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
+/// Useful when emitting comparison code for 32-bit values without using
+/// the compare instruction (which only considers the lower 32-bits).
+SDValue PPCDAGToDAGISel::zeroExtendInputIfNeeded(SDValue Input) {
+ assert(Input.getValueType() == MVT::i32 &&
+ "Can only zero-extend 32-bit values here.");
+ LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
+ unsigned Opc = Input.getOpcode();
+
+ // No need to zero-extend loaded values (unless they're loaded with
+ // a sign-extending load).
+ if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
+ return Input;
+
+ ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
+ bool InputZExtConst = InputConst && InputConst->getSExtValue() >= 0;
+ // An ISD::TRUNCATE will be lowered to an EXTRACT_SUBREG so we have
+ // to conservatively actually clear the high bits. We also don't need to
+ // zero-extend constants or values that are already zero-extended.
+ if (InputZExtConst || Opc == ISD::AssertZext || Opc == ISD::ZERO_EXTEND)
+ return Input;
+
+ SDLoc dl(Input);
+ ZeroExtensionsAdded++;
+ return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32, dl, MVT::i32, Input,
+ getI64Imm(0, dl), getI64Imm(32, dl)),
+ 0);
+}
+
+// Handle a 32-bit value in a 64-bit register and vice-versa. These are of
+// course not actual zero/sign extensions that will generate machine code,
+// they're just a way to reinterpret a 32 bit value in a register as a
+// 64 bit value and vice-versa.
+SDValue PPCDAGToDAGISel::addExtOrTrunc(SDValue NatWidthRes,
+ ExtOrTruncConversion Conv) {
+ SDLoc dl(NatWidthRes);
+
+ // For reinterpreting 32-bit values as 64 bit values, we generate
+ // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
+ if (Conv == ExtOrTruncConversion::Ext) {
+ SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
+ SDValue SubRegIdx =
+ CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
+ return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
+ ImDef, NatWidthRes, SubRegIdx), 0);
+ }
+
+ assert(Conv == ExtOrTruncConversion::Trunc &&
+ "Unknown convertion between 32 and 64 bit values.");
+ // For reinterpreting 64-bit values as 32-bit values, we just need to
+ // EXTRACT_SUBREG (i.e. extract the low word).
+ SDValue SubRegIdx =
+ CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
+ return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
+ NatWidthRes, SubRegIdx), 0);
+}
+
+/// Produces a zero-extended result of comparing two 32-bit values according to
+/// the passed condition code.
+SDValue PPCDAGToDAGISel::get32BitZExtCompare(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC,
+ int64_t RHSValue, SDLoc dl) {
+ bool IsRHSZero = RHSValue == 0;
+ switch (CC) {
+ default: return SDValue();
+ case ISD::SETEQ: {
+ // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
+ // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
+ SDValue Xor = IsRHSZero ? LHS :
+ SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
+ SDValue Clz =
+ SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
+ SDValue ShiftOps[] = { Clz, getI32Imm(27, dl), getI32Imm(5, dl),
+ getI32Imm(31, dl) };
+ return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
+ ShiftOps), 0);
+ }
+ }
+}
+
+/// Produces a sign-extended result of comparing two 32-bit values according to
+/// the passed condition code.
+SDValue PPCDAGToDAGISel::get32BitSExtCompare(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC,
+ int64_t RHSValue, SDLoc dl) {
+ bool IsRHSZero = RHSValue == 0;
+ switch (CC) {
+ default: return SDValue();
+ case ISD::SETEQ: {
+ // (sext (setcc %a, %b, seteq)) ->
+ // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
+ // (sext (setcc %a, 0, seteq)) ->
+ // (ashr (shl (ctlz %a), 58), 63)
+ SDValue CountInput = IsRHSZero ? LHS :
+ SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
+ SDValue Cntlzw =
+ SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
+ SDValue SHLOps[] = { Cntlzw, getI32Imm(58, dl), getI32Imm(0, dl) };
+ SDValue Sldi =
+ SDValue(CurDAG->getMachineNode(PPC::RLDICR_32, dl, MVT::i32, SHLOps), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::SRADI_32, dl, MVT::i32, Sldi,
+ getI32Imm(63, dl)), 0);
+ }
+ }
+}
+
+/// Returns an equivalent of a SETCC node but with the result the same width as
+/// the inputs. This can nalso be used for SELECT_CC if either the true or false
+/// values is a power of two while the other is zero.
+SDValue PPCDAGToDAGISel::getSETCCInGPR(SDValue Compare,
+ SetccInGPROpts ConvOpts) {
+ assert((Compare.getOpcode() == ISD::SETCC ||
+ Compare.getOpcode() == ISD::SELECT_CC) &&
+ "An ISD::SETCC node required here.");
+
+ SDValue LHS = Compare.getOperand(0);
+ SDValue RHS = Compare.getOperand(1);
+
+ // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
+ int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
+ ISD::CondCode CC =
+ cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
+ EVT InputVT = LHS.getValueType();
+ if (InputVT != MVT::i32)
+ return SDValue();
+
+ SDLoc dl(Compare);
+ ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
+ int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
+
+ if (ConvOpts == SetccInGPROpts::ZExtInvert ||
+ ConvOpts == SetccInGPROpts::SExtInvert)
+ CC = ISD::getSetCCInverse(CC, true);
+
+ if (ISD::isSignedIntSetCC(CC)) {
+ LHS = signExtendInputIfNeeded(LHS);
+ RHS = signExtendInputIfNeeded(RHS);
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ LHS = zeroExtendInputIfNeeded(LHS);
+ RHS = zeroExtendInputIfNeeded(RHS);
+ }
+
+ bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
+ ConvOpts == SetccInGPROpts::SExtInvert;
+ if (IsSext)
+ return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
+ return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
+}
+
void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
// Transfer memoperands.
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
@@ -2508,6 +2757,12 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
break;
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ if (tryEXTEND(N))
+ return;
+ break;
+
case ISD::SETCC:
if (trySETCC(N))
return;
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 685f24cb502e3..17bdd595da109 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -923,6 +923,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
// We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::SRA);
+ setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::BUILD_VECTOR);
if (Subtarget.hasFPCVT())
@@ -4949,8 +4952,7 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
- dl);
+ Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
SDValue CallSeqStart = Chain;
// Load the return address and frame pointer so it can be moved somewhere else
@@ -5000,9 +5002,8 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
Flags, DAG, dl);
// This must go outside the CALLSEQ_START..END.
- SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
- CallSeqStart.getNode()->getOperand(1),
- SDLoc(MemcpyCall));
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
+ SDLoc(MemcpyCall));
DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
NewCallSeqStart.getNode());
Chain = CallSeqStart = NewCallSeqStart;
@@ -5083,9 +5084,9 @@ SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
CallSeqStart.getNode()->getOperand(0),
Flags, DAG, dl);
// The MEMCPY must go outside the CALLSEQ_START..END.
- SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
- CallSeqStart.getNode()->getOperand(1),
- SDLoc(MemcpyCall));
+ int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
+ SDLoc(MemcpyCall));
DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
NewCallSeqStart.getNode());
return NewCallSeqStart;
@@ -5268,8 +5269,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!IsSibCall)
- Chain = DAG.getCALLSEQ_START(Chain,
- DAG.getIntPtrConstant(NumBytes, dl, true), dl);
+ Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
SDValue CallSeqStart = Chain;
// Load the return address and frame pointer so it can be move somewhere else
@@ -5828,8 +5828,7 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
- dl);
+ Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
SDValue CallSeqStart = Chain;
// Load the return address and frame pointer so it can be move somewhere else
@@ -8741,9 +8740,9 @@ static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
// The mappings for emitLeading/TrailingFence is taken from
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
-Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
- AtomicOrdering Ord, bool IsStore,
- bool IsLoad) const {
+Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
+ Instruction *Inst,
+ AtomicOrdering Ord) const {
if (Ord == AtomicOrdering::SequentiallyConsistent)
return callIntrinsic(Builder, Intrinsic::ppc_sync);
if (isReleaseOrStronger(Ord))
@@ -8751,10 +8750,10 @@ Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
return nullptr;
}
-Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
- AtomicOrdering Ord, bool IsStore,
- bool IsLoad) const {
- if (IsLoad && isAcquireOrStronger(Ord))
+Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
+ Instruction *Inst,
+ AtomicOrdering Ord) const {
+ if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord))
return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
// FIXME: this is too conservative, a dependent branch + isync is enough.
// See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
@@ -11316,6 +11315,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
SDLoc dl(N);
switch (N->getOpcode()) {
default: break;
+ case ISD::SHL:
+ return combineSHL(N, DCI);
+ case ISD::SRA:
+ return combineSRA(N, DCI);
+ case ISD::SRL:
+ return combineSRL(N, DCI);
case PPCISD::SHL:
if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
return N->getOperand(0);
@@ -12948,3 +12953,58 @@ bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
return Imm.isPosZero();
}
}
+
+// For vector shift operation op, fold
+// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
+static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
+ SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarSizeInBits();
+ unsigned Opcode = N->getOpcode();
+ unsigned TargetOpcode;
+
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Unexpected shift operation");
+ case ISD::SHL:
+ TargetOpcode = PPCISD::SHL;
+ break;
+ case ISD::SRL:
+ TargetOpcode = PPCISD::SRL;
+ break;
+ case ISD::SRA:
+ TargetOpcode = PPCISD::SRA;
+ break;
+ }
+
+ if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
+ N1->getOpcode() == ISD::AND)
+ if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
+ if (Mask->getZExtValue() == OpSizeInBits - 1)
+ return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
+
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
+ if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
+ return Value;
+
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
+ if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
+ return Value;
+
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
+ if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
+ return Value;
+
+ return SDValue();
+}
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 32661099b79d3..4fc7442572628 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -117,9 +117,13 @@ namespace llvm {
/// at function entry, used for PIC code.
GlobalBaseReg,
- /// These nodes represent the 32-bit PPC shifts that operate on 6-bit
- /// shift amounts. These nodes are generated by the multi-precision shift
- /// code.
+ /// These nodes represent PPC shifts.
+ ///
+ /// For scalar types, only the last `n + 1` bits of the shift amounts
+ /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc.
+ /// for exact behaviors.
+ ///
+ /// For vector types, only the last n bits are used. See vsld.
SRL, SRA, SHL,
/// The combination of sra[wd]i and addze used to implemented signed
@@ -617,10 +621,10 @@ namespace llvm {
return true;
}
- Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
- bool IsStore, bool IsLoad) const override;
- Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
- bool IsStore, bool IsLoad) const override;
+ Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst,
+ AtomicOrdering Ord) const override;
+ Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst,
+ AtomicOrdering Ord) const override;
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
@@ -999,6 +1003,9 @@ namespace llvm {
SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
/// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
/// SETCC with integer subtraction when (1) there is a legal way of doing it
@@ -1017,14 +1024,6 @@ namespace llvm {
SDValue
combineElementTruncationToVectorTruncation(SDNode *N,
DAGCombinerInfo &DCI) const;
-
- bool supportsModuloShift(ISD::NodeType Inst,
- EVT ReturnType) const override {
- assert((Inst == ISD::SHL || Inst == ISD::SRA || Inst == ISD::SRL) &&
- "Expect a shift instruction");
- assert(isOperationLegal(Inst, ReturnType));
- return ReturnType.isVector();
- }
};
namespace PPC {
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 997b96ca6ec8b..a8433919f0f3a 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -634,10 +634,19 @@ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm EXTSW_32_64 : XForm_11r<31, 986, (outs g8rc:$rA), (ins gprc:$rS),
"extsw", "$rA, $rS", IIC_IntSimple,
[(set i64:$rA, (sext i32:$rS))]>, isPPC64;
+let isCodeGenOnly = 1 in
+def EXTSW_32 : XForm_11<31, 986, (outs gprc:$rA), (ins gprc:$rS),
+ "extsw $rA, $rS", IIC_IntSimple,
+ []>, isPPC64;
defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
"sradi", "$rA, $rS, $SH", IIC_IntRotateDI,
[(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
+// For fast-isel:
+let isCodeGenOnly = 1 in
+def SRADI_32 : XSForm_1<31, 413, (outs gprc:$rA), (ins gprc:$rS, u6imm:$SH),
+ "sradi $rA, $rS, $SH", IIC_IntRotateDI, []>, isPPC64;
+
defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS),
"cntlzd", "$rA, $rS", IIC_IntGeneral,
[(set i64:$rA, (ctlz i64:$rS))]>;
@@ -721,15 +730,26 @@ defm RLDICL : MDForm_1r<30, 0,
// For fast-isel:
let isCodeGenOnly = 1 in
def RLDICL_32_64 : MDForm_1<30, 0,
- (outs g8rc:$rA),
- (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI,
- []>, isPPC64;
+ (outs g8rc:$rA),
+ (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ []>, isPPC64;
// End fast-isel.
+let isCodeGenOnly = 1 in
+def RLDICL_32 : MDForm_1<30, 0,
+ (outs gprc:$rA),
+ (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ []>, isPPC64;
defm RLDICR : MDForm_1r<30, 1,
(outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
"rldicr", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64;
+let isCodeGenOnly = 1 in
+def RLDICR_32 : MDForm_1<30, 1,
+ (outs gprc:$rA), (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicr $rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ []>, isPPC64;
defm RLDIC : MDForm_1r<30, 2,
(outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
"rldic", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index c380766e9f5c4..e14d18fd54331 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -987,6 +987,12 @@ def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)),
(v8i16 (VSLH $vA, $vB))>;
def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)),
(v4i32 (VSLW $vA, $vB))>;
+def : Pat<(v16i8 (PPCshl v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSLB $vA, $vB))>;
+def : Pat<(v8i16 (PPCshl v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSLH $vA, $vB))>;
+def : Pat<(v4i32 (PPCshl v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSLW $vA, $vB))>;
def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)),
(v16i8 (VSRB $vA, $vB))>;
@@ -994,6 +1000,12 @@ def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)),
(v8i16 (VSRH $vA, $vB))>;
def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)),
(v4i32 (VSRW $vA, $vB))>;
+def : Pat<(v16i8 (PPCsrl v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSRB $vA, $vB))>;
+def : Pat<(v8i16 (PPCsrl v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSRH $vA, $vB))>;
+def : Pat<(v4i32 (PPCsrl v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSRW $vA, $vB))>;
def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)),
(v16i8 (VSRAB $vA, $vB))>;
@@ -1001,6 +1013,12 @@ def : Pat<(v8i16 (sra v8i16:$vA, v8i16:$vB)),
(v8i16 (VSRAH $vA, $vB))>;
def : Pat<(v4i32 (sra v4i32:$vA, v4i32:$vB)),
(v4i32 (VSRAW $vA, $vB))>;
+def : Pat<(v16i8 (PPCsra v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSRAB $vA, $vB))>;
+def : Pat<(v8i16 (PPCsra v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSRAH $vA, $vB))>;
+def : Pat<(v4i32 (PPCsra v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSRAW $vA, $vB))>;
// Float to integer and integer to float conversions
def : Pat<(v4i32 (fp_to_sint v4f32:$vA)),
@@ -1072,14 +1090,24 @@ def:Pat<(vmrgow_swapped_shuffle v16i8:$vA, v16i8:$vB),
// Vector shifts
def VRLD : VX1_Int_Ty<196, "vrld", int_ppc_altivec_vrld, v2i64>;
def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsld $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (shl v2i64:$vA, v2i64:$vB))]>;
+ "vsld $vD, $vA, $vB", IIC_VecGeneral, []>;
def VSRD : VXForm_1<1732, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsrd $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (srl v2i64:$vA, v2i64:$vB))]>;
+ "vsrd $vD, $vA, $vB", IIC_VecGeneral, []>;
def VSRAD : VXForm_1<964, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsrad $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (sra v2i64:$vA, v2i64:$vB))]>;
+ "vsrad $vD, $vA, $vB", IIC_VecGeneral, []>;
+
+def : Pat<(v2i64 (shl v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VSLD $vA, $vB))>;
+def : Pat<(v2i64 (PPCshl v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VSLD $vA, $vB))>;
+def : Pat<(v2i64 (srl v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VSRD $vA, $vB))>;
+def : Pat<(v2i64 (PPCsrl v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VSRD $vA, $vB))>;
+def : Pat<(v2i64 (sra v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VSRAD $vA, $vB))>;
+def : Pat<(v2i64 (PPCsra v2i64:$vA, v2i64:$vB)),
+ (v2i64 (VSRAD $vA, $vB))>;
// Vector Integer Arithmetic Instructions
let isCommutable = 1 in {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index f004ce49cac0d..1af5e7f28342f 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -33,7 +33,8 @@ def SDT_PPCVexts : SDTypeProfile<1, 2, [
SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2>
]>;
-def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
def SDT_PPCvperm : SDTypeProfile<1, 3, [
@@ -1099,9 +1100,11 @@ multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
let hasCtrlDep = 1 in {
let Defs = [R1], Uses = [R1] in {
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "#ADJCALLSTACKDOWN $amt",
- [(callseq_start timm:$amt)]>;
-def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "#ADJCALLSTACKUP $amt1 $amt2",
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
+ "#ADJCALLSTACKDOWN $amt1 $amt2",
+ [(callseq_start timm:$amt1, timm:$amt2)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2),
+ "#ADJCALLSTACKUP $amt1 $amt2",
[(callseq_end timm:$amt1, timm:$amt2)]>;
}
@@ -4163,6 +4166,8 @@ def : InstAlias<"rotldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, u6imm:$n, 0
def : InstAlias<"rotld $rA, $rS, $rB", (RLDCL g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
+def : InstAlias<"clrldi $rA, $rS, $n",
+ (RLDICL_32 gprc:$rA, gprc:$rS, 0, u6imm:$n)>;
def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
def RLWINMbm : PPCAsmPseudo<"rlwinm $rA, $rS, $n, $b",
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 967557452f249..b98140fedfc04 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -1436,7 +1436,7 @@ let Predicates = [IsISA3_0, HasDirectMove] in {
def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA),
"mtvsrws $XT, $rA", IIC_VecGeneral, []>;
- def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
+ def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc_nox0:$rA, g8rc:$rB),
"mtvsrdd $XT, $rA, $rB", IIC_VecGeneral,
[]>, Requires<[In64BitMode]>;
diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 0c1260a2965b7..c7aa4cb78b7a4 100644
--- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -99,7 +99,8 @@ protected:
// Don't really need to save data to the stack - the clobbered
// registers are already saved when the SDNode (e.g. PPCaddiTlsgdLAddr)
// gets translated to the pseudo instruction (e.g. ADDItlsgdLADDR).
- BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0);
+ BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0)
+ .addImm(0);
// Expand into two ops built prior to the existing instruction.
MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3)