summaryrefslogtreecommitdiff
path: root/lib/Target
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2011-02-26 22:03:50 +0000
committerDimitry Andric <dim@FreeBSD.org>2011-02-26 22:03:50 +0000
commitd0e4e96dc17a6c1c6de3340842c80f0e187ba349 (patch)
treeddf53b8bd9235bcb0b8aae16c5e22310dcdad665 /lib/Target
parentcf099d11218cb6f6c5cce947d6738e347f07fb12 (diff)
downloadsrc-test-d0e4e96dc17a6c1c6de3340842c80f0e187ba349.tar.gz
src-test-d0e4e96dc17a6c1c6de3340842c80f0e187ba349.zip
Notes
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h3
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp38
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp16
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.cpp17
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp15
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp102
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td7
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td4
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td31
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td162
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp4
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp20
-rw-r--r--lib/Target/ARM/NEONMoveFix.cpp9
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp6
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.cpp1
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.h14
-rw-r--r--lib/Target/Blackfin/BlackfinISelLowering.cpp1
-rw-r--r--lib/Target/Blackfin/BlackfinISelLowering.h1
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp7
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h6
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.cpp4
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp6
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h2
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp25
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h94
-rw-r--r--lib/Target/README.txt24
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp31
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp43
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h2
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td18
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp3
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h2
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp13
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp8
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h2
-rw-r--r--lib/Target/X86/README.txt82
-rw-r--r--lib/Target/X86/X86FastISel.cpp8
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp50
-rw-r--r--lib/Target/X86/X86ISelLowering.h18
-rw-r--r--lib/Target/X86/X86InstrFormats.td2
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp4
-rw-r--r--lib/Target/X86/X86InstrInfo.h4
-rw-r--r--lib/Target/X86/X86InstrInfo.td3
-rw-r--r--lib/Target/X86/X86InstrSystem.td5
-rw-r--r--lib/Target/X86/X86MCCodeEmitter.cpp8
-rw-r--r--lib/Target/X86/X86Subtarget.cpp7
-rw-r--r--lib/Target/X86/X86Subtarget.h2
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp125
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h23
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td55
50 files changed, 732 insertions, 405 deletions
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 1fb88726d0dec..7e2183d7cd5e0 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -155,10 +155,11 @@ namespace ARMII {
//===------------------------------------------------------------------===//
// Code domain.
DomainShift = 18,
- DomainMask = 3 << DomainShift,
+ DomainMask = 7 << DomainShift,
DomainGeneral = 0 << DomainShift,
DomainVFP = 1 << DomainShift,
DomainNEON = 2 << DomainShift,
+ DomainNEONA8 = 4 << DomainShift,
//===------------------------------------------------------------------===//
// Field shifts - such shifts are used to set field while generating
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 9f295302db0e4..26f48b3083168 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -172,6 +172,7 @@ class ARMFastISel : public FastISel {
unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
+ unsigned ARMSelectCallOp(const GlobalValue *GV);
// Call handling routines.
private:
@@ -1633,6 +1634,25 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
return true;
}
+unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) {
+
+ // Depend our opcode for thumb on whether or not we're targeting an
+ // externally callable function. For libcalls we'll just pass a NULL GV
+ // in here.
+ bool isExternal = false;
+ if (!GV || GV->hasExternalLinkage()) isExternal = true;
+
+ // Darwin needs the r9 versions of the opcodes.
+ bool isDarwin = Subtarget->isTargetDarwin();
+ if (isThumb && isExternal) {
+ return isDarwin ? ARM::tBLXi_r9 : ARM::tBLXi;
+ } else if (isThumb) {
+ return isDarwin ? ARM::tBLr9 : ARM::tBL;
+ } else {
+ return isDarwin ? ARM::BLr9 : ARM::BL;
+ }
+}
+
// A quick function that will emit a call for a named libcall in F with the
// vector of passed arguments for the Instruction in I. We can assume that we
// can emit a call for any libcall we can produce. This is an abridged version
@@ -1694,20 +1714,17 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
// Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
// TODO: Turn this into the table of arm call ops.
MachineInstrBuilder MIB;
- unsigned CallOpc;
- if(isThumb) {
- CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
+ unsigned CallOpc = ARMSelectCallOp(NULL);
+ if(isThumb)
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(CallOpc)))
.addExternalSymbol(TLI.getLibcallName(Call));
- } else {
- CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
+ else
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(CallOpc))
.addExternalSymbol(TLI.getLibcallName(Call)));
- }
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
@@ -1813,21 +1830,18 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
// Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
// TODO: Turn this into the table of arm call ops.
MachineInstrBuilder MIB;
- unsigned CallOpc;
+ unsigned CallOpc = ARMSelectCallOp(GV);
// Explicitly adding the predicate here.
- if(isThumb) {
- CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi;
+ if(isThumb)
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(CallOpc)))
.addGlobalAddress(GV, 0, 0);
- } else {
- CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL;
+ else
// Explicitly adding the predicate here.
MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(CallOpc))
.addGlobalAddress(GV, 0, 0));
- }
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index f42c6db84fd3e..68c33f098ec9e 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -215,7 +215,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
// Move past area 3.
- if (DPRCSSize > 0) MBBI++;
+ if (DPRCSSize > 0) {
+ MBBI++;
+ // Since vpush register list cannot have gaps, there may be multiple vpush
+ // instructions in the prologue.
+ while (MBBI->getOpcode() == ARM::VSTMDDB_UPD)
+ MBBI++;
+ }
NumBytes = DPRCSOffset;
if (NumBytes) {
@@ -370,7 +376,13 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
// Increment past our save areas.
- if (AFI->getDPRCalleeSavedAreaSize()) MBBI++;
+ if (AFI->getDPRCalleeSavedAreaSize()) {
+ MBBI++;
+ // Since vpop register list cannot have gaps, there may be multiple vpop
+ // instructions in the epilogue.
+ while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
+ MBBI++;
+ }
if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
}
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 676b01e91c53a..e97ce50bc4294 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -21,17 +21,14 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
// FIXME: Detect integer instructions properly.
const TargetInstrDesc &TID = MI->getDesc();
unsigned Domain = TID.TSFlags & ARMII::DomainMask;
- if (Domain == ARMII::DomainVFP) {
- unsigned Opcode = MI->getOpcode();
- if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD ||
- Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
- return false;
- } else if (Domain == ARMII::DomainNEON) {
- if (MI->getDesc().mayStore() || MI->getDesc().mayLoad())
- return false;
- } else
+ if (TID.mayStore())
return false;
- return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
+ unsigned Opcode = TID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return false;
+ if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+ return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
+ return false;
}
ScheduleHazardRecognizer::HazardType
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index a506cffdba348..f0d5a7d7c2e7b 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -126,6 +126,7 @@ public:
bool SelectAddrMode5(SDValue N, SDValue &Base,
SDValue &Offset);
bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
+ bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
@@ -886,6 +887,20 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
return true;
}
+bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
+ SDValue &Offset) {
+ LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
+ ISD::MemIndexedMode AM = LdSt->getAddressingMode();
+ if (AM != ISD::POST_INC)
+ return false;
+ Offset = N;
+ if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
+ if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
+ Offset = CurDAG->getRegister(0, MVT::i32);
+ }
+ return true;
+}
+
bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
SDValue &Offset, SDValue &Label) {
if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 1835ec0f00542..ab9f9e1571e34 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -2236,7 +2236,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
RC = ARM::GPRRegisterClass;
// Transform the arguments stored in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
SDValue ArgValue2;
@@ -2250,7 +2250,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
MachinePointerInfo::getFixedStack(FI),
false, false, 0);
} else {
- Reg = MF.addLiveIn(NextVA.getLocReg(), RC, dl);
+ Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
}
@@ -2331,7 +2331,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
// Transform the arguments in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
}
@@ -2408,7 +2408,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
else
RC = ARM::GPRRegisterClass;
- unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC, dl);
+ unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -2838,8 +2838,51 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
EVT SrcVT = Tmp1.getValueType();
- bool F2IisFast = Subtarget->isCortexA9() ||
- Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR;
+ bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
+ Tmp0.getOpcode() == ARMISD::VMOVDRR;
+ bool UseNEON = !InGPR && Subtarget->hasNEON();
+
+ if (UseNEON) {
+ // Use VBSL to copy the sign bit.
+ unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
+ SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
+ DAG.getTargetConstant(EncodedVal, MVT::i32));
+ EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
+ if (VT == MVT::f64)
+ Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
+ DAG.getConstant(32, MVT::i32));
+ else /*if (VT == MVT::f32)*/
+ Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
+ if (SrcVT == MVT::f32) {
+ Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
+ if (VT == MVT::f64)
+ Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
+ DAG.getConstant(32, MVT::i32));
+ }
+ Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
+
+ SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
+ MVT::i32);
+ AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
+ SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
+ DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
+
+ SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
+ DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
+ DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
+ if (SrcVT == MVT::f32) {
+ Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
+ Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
+ DAG.getConstant(0, MVT::i32));
+ } else {
+ Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
+ }
+
+ return Res;
+ }
// Bitcast operand 1 to i32.
if (SrcVT == MVT::f64)
@@ -2847,37 +2890,24 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
&Tmp1, 1).getValue(1);
Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
- // If float to int conversion isn't going to be super expensive, then simply
- // or in the signbit.
- if (F2IisFast) {
- SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
- SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
- Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
- if (VT == MVT::f32) {
- Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
- DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
- return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
- DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
- }
-
- // f64: Or the high part with signbit and then combine two parts.
- Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
- &Tmp0, 1);
- SDValue Lo = Tmp0.getValue(0);
- SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
- Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
- return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+ // Or in the signbit with integer operations.
+ SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
+ SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
+ Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
+ if (VT == MVT::f32) {
+ Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+ DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
}
- // Remove the signbit of operand 0.
- Tmp0 = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
-
- // If operand 1 signbit is one, then negate operand 0.
- SDValue ARMcc;
- SDValue Cmp = getARMCmp(Tmp1, DAG.getConstant(0, MVT::i32),
- ISD::SETLT, ARMcc, DAG, dl);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::CNEG, dl, VT, Tmp0, Tmp0, ARMcc, CCR, Cmp);
+ // f64: Or the high part with signbit and then combine two parts.
+ Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+ &Tmp0, 1);
+ SDValue Lo = Tmp0.getValue(0);
+ SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
+ Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
}
SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
@@ -2897,7 +2927,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
}
// Return LR, which contains the return address. Mark it an implicit live-in.
- unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32), dl);
+ unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
}
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 765cba42d0bd9..359ac45cee1dc 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -127,13 +127,14 @@ def IndexModePost : IndexMode<2>;
def IndexModeUpd : IndexMode<3>;
// Instruction execution domain.
-class Domain<bits<2> val> {
- bits<2> Value = val;
+class Domain<bits<3> val> {
+ bits<3> Value = val;
}
def GenericDomain : Domain<0>;
def VFPDomain : Domain<1>; // Instructions in VFP domain only
def NeonDomain : Domain<2>; // Instructions in Neon domain only
def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
+def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8
//===----------------------------------------------------------------------===//
// ARM special operands.
@@ -249,7 +250,7 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
let TSFlags{15-10} = Form;
let TSFlags{16} = isUnaryDataProc;
let TSFlags{17} = canXformTo16Bit;
- let TSFlags{19-18} = D.Value;
+ let TSFlags{20-18} = D.Value;
let Constraints = cstr;
let Itinerary = itin;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index c827ce3da97cb..6e3fe2e039f57 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -561,7 +561,9 @@ def addrmode6 : Operand<i32>,
let EncoderMethod = "getAddrMode6AddressOpValue";
}
-def am6offset : Operand<i32> {
+def am6offset : Operand<i32>,
+ ComplexPattern<i32, 1, "SelectAddrMode6Offset",
+ [], [SDNPWantRoot]> {
let PrintMethod = "printAddrMode6OffsetOperand";
let MIOperandInfo = (ops GPR);
let EncoderMethod = "getAddrMode6OffsetOpValue";
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 1e2e5504e6629..dc3d63e26ef5e 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1402,31 +1402,42 @@ def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
(VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
-let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
-
// ...with address register writeback:
-class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+ PatFrag StoreOp, SDNode ExtractOp>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$Rn, am6offset:$Rm,
DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
"\\{$Vd[$lane]\\}, $Rn$Rm",
- "$Rn.addr = $wb", []>;
+ "$Rn.addr = $wb",
+ [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
+ addrmode6:$Rn, am6offset:$Rm))]>;
+class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
+ : VSTQLNWBPseudo<IIC_VST1lnu> {
+ let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
+ addrmode6:$addr, am6offset:$offset))];
+}
-def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8"> {
+def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
+ NEONvgetlaneu> {
let Inst{7-5} = lane{2-0};
}
-def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16"> {
+def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
+ NEONvgetlaneu> {
let Inst{7-6} = lane{1-0};
let Inst{4} = Rn{5};
}
-def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32"> {
+def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
+ extractelt> {
let Inst{7} = lane{0};
let Inst{5-4} = Rn{5-4};
}
-def VST1LNq8Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
-def VST1LNq16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
-def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST1lnu>;
+def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
+def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
+def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
// VST2LN : Vector Store (single 2-element structure from one lane)
class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 920c5c98002af..29902833f2bb2 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -197,9 +197,9 @@ def VADDS : ASbIn<0b11100, 0b11, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VSUBD : ADbI<0b11100, 0b11, 1, 0,
@@ -211,9 +211,9 @@ def VSUBS : ASbIn<0b11100, 0b11, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VDIVD : ADbI<0b11101, 0b00, 0, 0,
@@ -235,9 +235,9 @@ def VMULS : ASbIn<0b11100, 0b10, 0, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VNMULD : ADbI<0b11100, 0b10, 1, 0,
@@ -249,9 +249,9 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0,
(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
[(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
// Match reassociated forms only if not sign dependent rounding.
@@ -271,9 +271,9 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
[(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
// FIXME: Verify encoding after integrated assembler is working.
@@ -286,9 +286,9 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
(outs), (ins SPR:$Sd, SPR:$Sm),
IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
} // Defs = [FPSCR]
@@ -305,9 +305,9 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm",
[(set SPR:$Sd, (fabs SPR:$Sm))]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
let Defs = [FPSCR] in {
@@ -326,9 +326,9 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
// FIXME: Verify encoding after integrated assembler is working.
@@ -347,9 +347,9 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
let Inst{3-0} = 0b0000;
let Inst{5} = 0;
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
} // Defs = [FPSCR]
@@ -423,9 +423,9 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
(outs SPR:$Sd), (ins SPR:$Sm),
IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
[(set SPR:$Sd, (fneg SPR:$Sm))]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
@@ -598,9 +598,9 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
[(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
let Inst{7} = 1; // s32
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
@@ -616,9 +616,9 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
[(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
let Inst{7} = 0; // u32
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
// FP -> Int:
@@ -671,9 +671,9 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
[(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
let Inst{7} = 1; // Z bit
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
@@ -689,9 +689,9 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
[(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
let Inst{7} = 1; // Z bit
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
@@ -743,36 +743,36 @@ def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
@@ -801,36 +801,36 @@ def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
@@ -874,9 +874,9 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
@@ -901,9 +901,9 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
@@ -928,9 +928,9 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
@@ -954,9 +954,9 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
[(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
@@ -995,9 +995,9 @@ def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0,
IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
[/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
RegConstraint<"$Sn = $Sd"> {
- // Some single precision VFP instructions may be executed on both NEON and VFP
- // pipelines.
- let D = VFPNeonDomain;
+ // Some single precision VFP instructions may be executed on both NEON and
+ // VFP pipelines on A8.
+ let D = VFPNeonA8Domain;
}
} // neverHasSideEffects
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 0bd740cfb28c0..1465984899c6f 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -171,7 +171,9 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
// Materializable GVs (in JIT lazy compilation mode) do not require an extra
// load from stub.
- bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
+ bool isDecl = GV->hasAvailableExternallyLinkage();
+ if (GV->isDeclaration() && !GV->isMaterializable())
+ isDecl = true;
if (!isTargetDarwin()) {
// Extra load is needed for all externally visible.
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index f9e86eb36e04e..9a27e2f470648 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -132,22 +132,16 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
}
bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
- const TargetInstrDesc &TID = MI->getDesc();
// FIXME: Detect integer instructions properly.
+ const TargetInstrDesc &TID = MI->getDesc();
unsigned Domain = TID.TSFlags & ARMII::DomainMask;
- if (Domain == ARMII::DomainVFP) {
- unsigned Opcode = TID.getOpcode();
- if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD ||
- Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
- return false;
- } else if (Domain == ARMII::DomainNEON) {
- if (TID.mayStore() || TID.mayLoad())
- return false;
- } else {
+ if (TID.mayStore())
return false;
- }
-
- return MI->readsRegister(Reg, TRI);
+ unsigned Opcode = TID.getOpcode();
+ if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+ return false;
+ if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+ return MI->readsRegister(Reg, TRI);
return false;
}
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index 97e54bfaed9e7..965665c2821ac 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -35,6 +35,7 @@ namespace {
private:
const TargetRegisterInfo *TRI;
const ARMBaseInstrInfo *TII;
+ bool isA8;
typedef DenseMap<unsigned, const MachineInstr*> RegMap;
@@ -43,6 +44,11 @@ namespace {
char NEONMoveFixPass::ID = 0;
}
+static bool inNEONDomain(unsigned Domain, bool isA8) {
+ return (Domain & ARMII::DomainNEON) ||
+ (isA8 && (Domain & ARMII::DomainNEONA8));
+}
+
bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
RegMap Defs;
bool Modified = false;
@@ -70,7 +76,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
Domain = ARMII::DomainNEON;
}
- if (Domain & ARMII::DomainNEON) {
+ if (inNEONDomain(Domain, isA8)) {
// Convert VMOVD to VMOVDneon
unsigned DestReg = MI->getOperand(0).getReg();
@@ -123,6 +129,7 @@ bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) {
TRI = TM.getRegisterInfo();
TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+ isA8 = TM.getSubtarget<ARMSubtarget>().isCortexA8();
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 2f67257f8fa11..9b1073be3c8e4 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -95,6 +95,12 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
bool
Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const {
+ while (MBBI->isDebugValue()) {
+ ++MBBI;
+ if (MBBI == MBB.end())
+ return false;
+ }
+
unsigned PredReg = 0;
return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
}
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 9137d654edba4..c4f43ab9e4e77 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -48,7 +48,6 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
: TargetLowering(TM, new TargetLoweringObjectFileELF()) {
// Set up the TargetLowering object.
//I am having problems with shr n i8 1
- setShiftAmountType(MVT::i64);
setBooleanContents(ZeroOrOneBooleanContent);
addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass);
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
index b429e9fc13902..cb98f921dd688 100644
--- a/lib/Target/Alpha/AlphaISelLowering.h
+++ b/lib/Target/Alpha/AlphaISelLowering.h
@@ -31,25 +31,25 @@ namespace llvm {
/// GPRelHi/GPRelLo - These represent the high and low 16-bit
/// parts of a global address respectively.
- GPRelHi, GPRelLo,
+ GPRelHi, GPRelLo,
/// RetLit - Literal Relocation of a Global
RelLit,
/// GlobalRetAddr - used to restore the return address
GlobalRetAddr,
-
+
/// CALL - Normal call.
CALL,
/// DIVCALL - used for special library calls for div and rem
DivCall,
-
+
/// return flag operand
RET_FLAG,
/// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This
- /// corresponds to the COND_BRANCH pseudo instruction.
+ /// corresponds to the COND_BRANCH pseudo instruction.
/// *PRC is the input register to compare to zero,
/// OPC is the branch opcode to use (e.g. Alpha::BEQ),
/// DESTBB is the destination block to branch to, and INFLAG is
@@ -62,7 +62,9 @@ namespace llvm {
class AlphaTargetLowering : public TargetLowering {
public:
explicit AlphaTargetLowering(TargetMachine &TM);
-
+
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; }
+
/// getSetCCResultType - Get the SETCC result ValueType
virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
@@ -92,7 +94,7 @@ namespace llvm {
ConstraintWeight getSingleConstraintMatchWeight(
AsmOperandInfo &info, const char *constraint) const;
- std::vector<unsigned>
+ std::vector<unsigned>
getRegClassForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const;
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
index dd27d0a0ff366..7c80eec3ba630 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -41,7 +41,6 @@ using namespace llvm;
BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM)
: TargetLowering(TM, new TargetLoweringObjectFileELF()) {
- setShiftAmountType(MVT::i16);
setBooleanContents(ZeroOrOneBooleanContent);
setStackPointerRegisterToSaveRestore(BF::SP);
setIntDivIsCheap(false);
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h
index 15a745fa87240..102c830688e26 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.h
+++ b/lib/Target/Blackfin/BlackfinISelLowering.h
@@ -32,6 +32,7 @@ namespace llvm {
class BlackfinTargetLowering : public TargetLowering {
public:
BlackfinTargetLowering(TargetMachine &TM);
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i16; }
virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
virtual void ReplaceNodeResults(SDNode *N,
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index e6511d008c2b7..743a4d7a0f78c 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -435,7 +435,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
- setShiftAmountType(MVT::i32);
setBooleanContents(ZeroOrNegativeOneBooleanContent);
setStackPointerRegisterToSaveRestore(SPU::R1);
@@ -1219,7 +1218,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
FuncInfo->setVarArgsFrameIndex(
MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
- unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass, dl);
+ unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
false, false, 0);
@@ -2190,7 +2189,7 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
{
SDValue N0 = Op.getOperand(0); // Everything has at least one operand
DebugLoc dl = Op.getDebugLoc();
- EVT ShiftVT = TLI.getShiftAmountTy();
+ EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType());
assert(Op.getValueType() == MVT::i8);
switch (Opc) {
@@ -3112,7 +3111,7 @@ SPUTargetLowering::getSingleConstraintMatchWeight(
switch (*constraint) {
default:
weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
- break;
+ break;
//FIXME: Seems like the supported constraint letters were just copied
// from PPC, as the following doesn't correspond to the GCC docs.
// I'm leaving it so until someone adds the corresponding lowering support.
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index 95d44afe37c8f..dd48d7bafaef9 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -109,6 +109,8 @@ namespace llvm {
/// getSetCCResultType - Return the ValueType for ISD::SETCC
virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
//! Custom lowering hooks
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
@@ -179,9 +181,9 @@ namespace llvm {
virtual bool isLegalICmpImmediate(int64_t Imm) const;
- virtual bool isLegalAddressingMode(const AddrMode &AM,
+ virtual bool isLegalAddressingMode(const AddrMode &AM,
const Type *Ty) const;
-
+
/// After allocating this many registers, the allocator should feel
/// register pressure. The value is a somewhat random guess, based on the
/// number of non callee saved registers in the C calling convention.
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 2f40bfc896016..f39826b1cf17e 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -907,7 +907,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
// Transform the arguments stored on
// physical registers into virtual ones
- unsigned Reg = MF.addLiveIn(ArgRegEnd, RC, dl);
+ unsigned Reg = MF.addLiveIn(ArgRegEnd, RC);
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
// If this is an 8 or 16-bit value, it has been passed promoted
@@ -973,7 +973,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
for (; Start <= End; ++Start, ++StackLoc) {
unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start);
- unsigned LiveReg = MF.addLiveIn(Reg, RC, dl);
+ unsigned LiveReg = MF.addLiveIn(Reg, RC);
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32);
int FI = MFI->CreateFixedObject(4, 0, true);
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 30ef4f5da08eb..a95d59c0576c8 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -77,10 +77,6 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
// Division is expensive
setIntDivIsCheap(false);
- // Even if we have only 1 bit shift here, we can perform
- // shifts of the whole bitwidth 1 bit per step.
- setShiftAmountType(MVT::i8);
-
setStackPointerRegisterToSaveRestore(MSP430::SPW);
setBooleanContents(ZeroOrOneBooleanContent);
setSchedulingPreference(Sched::Latency);
@@ -330,7 +326,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
// Arguments passed in registers
EVT RegVT = VA.getLocVT();
switch (RegVT.getSimpleVT().SimpleTy) {
- default:
+ default:
{
#ifndef NDEBUG
errs() << "LowerFormalArguments Unhandled argument type: "
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 673c5433b96e6..19c9eac589f00 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -73,6 +73,8 @@ namespace llvm {
public:
explicit MSP430TargetLowering(MSP430TargetMachine &TM);
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+
/// LowerOperation - Provide custom lowering hooks for some operations.
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 8f623b859b554..70d00e4b5cc50 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -362,7 +362,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
}
- setShiftAmountType(MVT::i32);
setBooleanContents(ZeroOrOneBooleanContent);
if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
@@ -1597,7 +1596,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
}
// Transform the arguments stored in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
InVals.push_back(ArgValue);
@@ -1689,7 +1688,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
// Get an existing live-in vreg, or add a new one.
unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
if (!VReg)
- VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass, dl);
+ VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -1708,7 +1707,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
// Get an existing live-in vreg, or add a new one.
unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
if (!VReg)
- VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass, dl);
+ VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -1872,7 +1871,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
InVals.push_back(FIN);
if (ObjSize==1 || ObjSize==2) {
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
MachinePointerInfo(),
@@ -1891,7 +1890,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
// to memory. ArgVal will be address of the beginning of
// the object.
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
@@ -1914,7 +1913,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
case MVT::i32:
if (!isPPC64) {
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
++GPR_idx;
} else {
@@ -1928,7 +1927,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
// FALLTHROUGH
case MVT::i64: // PPC64
if (GPR_idx != Num_GPR_Regs) {
- unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl);
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
if (ObjectVT == MVT::i32) {
@@ -1966,9 +1965,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
unsigned VReg;
if (ObjectVT == MVT::f32)
- VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass, dl);
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
else
- VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass, dl);
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++FPR_idx;
@@ -1986,7 +1985,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
// Note that vector arguments in registers don't reserve stack space,
// except in varargs functions.
if (VR_idx != Num_VR_Regs) {
- unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass, dl);
+ unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
if (isVarArg) {
while ((ArgOffset % 16) != 0) {
@@ -2064,9 +2063,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
unsigned VReg;
if (isPPC64)
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl);
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
else
- VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl);
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 80cab75b960ac..33daae9b5445c 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -29,36 +29,36 @@ namespace llvm {
/// FSEL - Traditional three-operand fsel node.
///
FSEL,
-
+
/// FCFID - The FCFID instruction, taking an f64 operand and producing
/// and f64 value containing the FP representation of the integer that
/// was temporarily in the f64 operand.
FCFID,
-
- /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
+
+ /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
/// operand, producing an f64 value containing the integer representation
/// of that FP value.
FCTIDZ, FCTIWZ,
-
+
/// STFIWX - The STFIWX instruction. The first operand is an input token
/// chain, then an f64 value to store, then an address to store it to.
STFIWX,
-
+
// VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
// three v4f32 operands and producing a v4f32 result.
VMADDFP, VNMSUBFP,
-
+
/// VPERM - The PPC VPERM Instruction.
///
VPERM,
-
+
/// Hi/Lo - These represent the high and low 16-bit parts of a global
/// address respectively. These nodes have two operands, the first of
/// which must be a TargetGlobalAddress, and the second of which must be a
/// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C',
/// though these are usually folded into other nodes.
Hi, Lo,
-
+
TOC_ENTRY,
/// The following three target-specific nodes are used for calls through
@@ -80,37 +80,37 @@ namespace llvm {
/// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
/// compute an allocation on the stack.
DYNALLOC,
-
+
/// GlobalBaseReg - On Darwin, this node represents the result of the mflr
/// at function entry, used for PIC code.
GlobalBaseReg,
-
+
/// These nodes represent the 32-bit PPC shifts that operate on 6-bit
/// shift amounts. These nodes are generated by the multi-precision shift
/// code.
SRL, SRA, SHL,
-
+
/// EXTSW_32 - This is the EXTSW instruction for use with "32-bit"
/// registers.
EXTSW_32,
/// CALL - A direct function call.
CALL_Darwin, CALL_SVR4,
-
+
/// NOP - Special NOP which follows 64-bit SVR4 calls.
NOP,
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
/// MTCTR instruction.
MTCTR,
-
+
/// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
/// BCTRL instruction.
BCTRL_Darwin, BCTRL_SVR4,
-
+
/// Return with a flag operand, matched by 'blr'
RET_FLAG,
-
+
/// R32 = MFCR(CRREG, INFLAG) - Represents the MFCRpseud/MFOCRF
/// instructions. This copies the bits corresponding to the specified
/// CRREG into the resultant GPR. Bits corresponding to other CR regs
@@ -122,20 +122,20 @@ namespace llvm {
/// encoding for the OPC field to identify the compare. For example, 838
/// is VCMPGTSH.
VCMP,
-
+
/// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
- /// altivec VCMP*o instructions. For lack of better number, we use the
+ /// altivec VCMP*o instructions. For lack of better number, we use the
/// opcode number encoding for the OPC field to identify the compare. For
/// example, 838 is VCMPGTSH.
VCMPo,
-
+
/// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
/// corresponds to the COND_BRANCH pseudo instruction. CRRC is the
/// condition register to branch on, OPC is the branch opcode to use (e.g.
/// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
/// an optional input flag argument.
COND_BRANCH,
-
+
// The following 5 instructions are used only as part of the
// long double-to-int conversion sequence.
@@ -150,7 +150,7 @@ namespace llvm {
MTFSB1,
/// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with
- /// rounding towards zero. It has flags added so it won't move past the
+ /// rounding towards zero. It has flags added so it won't move past the
/// FPSCR-setting instructions.
FADDRTZ,
@@ -174,14 +174,14 @@ namespace llvm {
/// STD_32 - This is the STD instruction for use with "32-bit" registers.
STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
-
- /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
+
+ /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
/// i32.
- STBRX,
-
- /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
+ STBRX,
+
+ /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
/// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
/// then puts it in the bottom bits of the GPRC. TYPE can be either i16
/// or i32.
@@ -194,7 +194,7 @@ namespace llvm {
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
-
+
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
@@ -208,16 +208,16 @@ namespace llvm {
/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
bool isUnary);
-
+
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
int isVSLDOIShuffleMask(SDNode *N, bool isUnary);
-
+
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to
/// VSPLTB/VSPLTH/VSPLTW.
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
-
+
/// isAllNegativeZeroVector - Returns true if all elements of build_vector
/// are -0.0.
bool isAllNegativeZeroVector(SDNode *N);
@@ -225,24 +225,26 @@ namespace llvm {
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize);
-
+
/// get_VSPLTI_elt - If this is a build_vector of constants which can be
/// formed by using a vspltis[bhw] instruction of the specified element
/// size, return the constant being splatted. The ByteSize field indicates
/// the number of bytes of each element [124] -> [bhw].
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
}
-
+
class PPCTargetLowering : public TargetLowering {
const PPCSubtarget &PPCSubTarget;
public:
explicit PPCTargetLowering(PPCTargetMachine &TM);
-
+
/// getTargetNodeName() - This method returns the name of a target specific
/// DAG node.
virtual const char *getTargetNodeName(unsigned Opcode) const;
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
/// getSetCCResultType - Return the ISD::SETCC ValueType
virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
@@ -253,19 +255,19 @@ namespace llvm {
SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const;
-
+
/// SelectAddressRegReg - Given the specified addressed, check to see if it
/// can be represented as an indexed [r+r] operation. Returns false if it
/// can be more efficiently represented with [r+imm].
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
SelectionDAG &DAG) const;
-
+
/// SelectAddressRegImm - Returns true if the address N can be represented
/// by a base register plus a signed 16-bit displacement [r+imm], and if it
/// is not better represented as reg+reg.
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
SelectionDAG &DAG) const;
-
+
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
@@ -277,7 +279,7 @@ namespace llvm {
bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base,
SelectionDAG &DAG) const;
-
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
@@ -289,10 +291,10 @@ namespace llvm {
SelectionDAG &DAG) const;
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-
+
virtual void computeMaskedBitsForTargetNode(const SDValue Op,
const APInt &Mask,
- APInt &KnownZero,
+ APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth = 0) const;
@@ -300,13 +302,13 @@ namespace llvm {
virtual MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const;
- MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
MachineBasicBlock *MBB, bool is64Bit,
unsigned BinOpcode) const;
- MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI,
- MachineBasicBlock *MBB,
+ MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *MBB,
bool is8bit, unsigned Opcode) const;
-
+
ConstraintType getConstraintType(const std::string &Constraint) const;
/// Examine constraint string and operand type and determine a weight value.
@@ -314,7 +316,7 @@ namespace llvm {
ConstraintWeight getSingleConstraintMatchWeight(
AsmOperandInfo &info, const char *constraint) const;
- std::pair<unsigned, const TargetRegisterClass*>
+ std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const;
@@ -329,11 +331,11 @@ namespace llvm {
char ConstraintLetter,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
-
+
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
-
+
/// isLegalAddressImmediate - Return true if the integer value can be used
/// as the offset of the target addressing mode for load / store of the
/// given type.
@@ -344,7 +346,7 @@ namespace llvm {
virtual bool isLegalAddressImmediate(GlobalValue *GV) const;
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
-
+
/// getOptimalMemOpType - Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
/// lowering. If DstAlign is zero that means it's safe to destination
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 4e14fbbb09ba9..f85914b61d9d8 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -254,6 +254,20 @@ unsigned long reverse(unsigned v) {
//===---------------------------------------------------------------------===//
+[LOOP DELETION]
+
+We don't delete this output free loop, because trip count analysis doesn't
+realize that it is finite (if it were infinite, it would be undefined). Not
+having this blocks Loop Idiom from matching strlen and friends.
+
+void foo(char *C) {
+ int x = 0;
+ while (*C)
+ ++x,++C;
+}
+
+//===---------------------------------------------------------------------===//
+
[LOOP RECOGNITION]
These idioms should be recognized as popcount (see PR1488):
@@ -287,6 +301,16 @@ unsigned int popcount(unsigned int input) {
return count;
}
+This should be recognized as CLZ: rdar://8459039
+
+unsigned clz_a(unsigned a) {
+ int i;
+ for (i=0;i<32;i++)
+ if (a & (1<<(31-i)))
+ return i;
+ return 32;
+}
+
This sort of thing should be added to the loop idiom pass.
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index ee292758d1861..4b12852ef8730 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -79,6 +79,7 @@ namespace {
MachineBasicBlock::iterator
findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot);
+ bool needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize);
};
char Filler::ID = 0;
@@ -91,6 +92,7 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
return new Filler(tm);
}
+
/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
/// We assume there is only one delay slot per delayed instruction.
///
@@ -112,6 +114,13 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP));
else
MBB.splice(++J, &MBB, D);
+ unsigned structSize = 0;
+ if (needsUnimp(I, structSize)) {
+ MachineBasicBlock::iterator J = I;
+ ++J; //skip the delay filler.
+ BuildMI(MBB, ++J, I->getDebugLoc(),
+ TII->get(SP::UNIMP)).addImm(structSize);
+ }
}
return Changed;
}
@@ -287,6 +296,28 @@ bool Filler::isDelayFiller(MachineBasicBlock &MBB,
{
if (candidate == MBB.begin())
return false;
+ if (candidate->getOpcode() == SP::UNIMP)
+ return true;
const TargetInstrDesc &prevdesc = (--candidate)->getDesc();
return prevdesc.hasDelaySlot();
}
+
+bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
+{
+ if (!I->getDesc().isCall())
+ return false;
+
+ unsigned structSizeOpNum = 0;
+ switch (I->getOpcode()) {
+ default: llvm_unreachable("Unknown call opcode.");
+ case SP::CALL: structSizeOpNum = 1; break;
+ case SP::JMPLrr:
+ case SP::JMPLri: structSizeOpNum = 2; break;
+ }
+
+ const MachineOperand &MO = I->getOperand(structSizeOpNum);
+ if (!MO.isImm())
+ return false;
+ StructSize = MO.getImm();
+ return true;
+}
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 196b87dd58d0e..70574c370f359 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -16,7 +16,9 @@
#include "SparcISelLowering.h"
#include "SparcTargetMachine.h"
#include "SparcMachineFunctionInfo.h"
+#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
+#include "llvm/Module.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -116,6 +118,8 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
// Guarantee that all emitted copies are stuck together with flags.
Flag = Chain.getValue(1);
}
+
+ unsigned RetAddrOffset = 8; //Call Inst + Delay Slot
// If the function returns a struct, copy the SRetReturnReg to I0
if (MF.getFunction()->hasStructRetAttr()) {
SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
@@ -127,11 +131,16 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
Flag = Chain.getValue(1);
if (MF.getRegInfo().liveout_empty())
MF.getRegInfo().addLiveOut(SP::I0);
+ RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
}
+ SDValue RetAddrOffsetNode = DAG.getConstant(RetAddrOffset, MVT::i32);
+
if (Flag.getNode())
- return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
- return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain);
+ return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain,
+ RetAddrOffsetNode, Flag);
+ return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain,
+ RetAddrOffsetNode);
}
/// LowerFormalArguments - V8 uses a very simple ABI, where all values are
@@ -194,7 +203,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
false, false, 0);
} else {
unsigned loReg = MF.addLiveIn(NextVA.getLocReg(),
- &SP::IntRegsRegClass, dl);
+ &SP::IntRegsRegClass);
LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32);
}
SDValue WholeValue =
@@ -393,6 +402,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SmallVector<SDValue, 8> MemOpChains;
const unsigned StackOffset = 92;
+ bool hasStructRetAttr = false;
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size();
i != e;
@@ -433,6 +443,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo(),
false, false, 0));
+ hasStructRetAttr = true;
continue;
}
@@ -546,6 +557,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
InFlag = Chain.getValue(1);
}
+ unsigned SRetArgSize = (hasStructRetAttr)? getSRetArgSize(DAG, Callee):0;
+
// If the callee is a GlobalAddress node (quite common, every direct call is)
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
@@ -559,6 +572,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SmallVector<SDValue, 8> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
+ if (hasStructRetAttr)
+ Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32));
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
unsigned Reg = RegsToPass[i].first;
if (Reg >= SP::I0 && Reg <= SP::I7)
@@ -600,7 +615,29 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
return Chain;
}
+unsigned
+SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
+{
+ const Function *CalleeFn = 0;
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ CalleeFn = dyn_cast<Function>(G->getGlobal());
+ } else if (ExternalSymbolSDNode *E =
+ dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const Function *Fn = DAG.getMachineFunction().getFunction();
+ const Module *M = Fn->getParent();
+ CalleeFn = M->getFunction(E->getSymbol());
+ }
+
+ if (!CalleeFn)
+ return 0;
+ assert(CalleeFn->hasStructRetAttr() &&
+ "Callee does not have the StructRet attribute.");
+
+ const PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
+ const Type *ElementTy = Ty->getElementType();
+ return getTargetData()->getTypeAllocSize(ElementTy);
+}
//===----------------------------------------------------------------------===//
// TargetLowering Implementation
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 849e4010af6bc..7d02df8adcca1 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -101,6 +101,8 @@ namespace llvm {
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+
+ unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const;
};
} // end namespace llvm
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index 107232357b3b7..cf5c48fd18d9d 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -124,7 +124,8 @@ def call : SDNode<"SPISD::CALL", SDT_SPCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
-def retflag : SDNode<"SPISD::RET_FLAG", SDTNone,
+def SDT_SPRet : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet,
[SDNPHasChain, SDNPOptInGlue]>;
def flushw : SDNode<"SPISD::FLUSHW", SDTNone,
@@ -132,7 +133,7 @@ def flushw : SDNode<"SPISD::FLUSHW", SDTNone,
def getPCX : Operand<i32> {
let PrintMethod = "printGetPCX";
-}
+}
//===----------------------------------------------------------------------===//
// SPARC Flag Conditions
@@ -232,6 +233,9 @@ let hasSideEffects = 1, mayStore = 1 in {
[(flushw)]>;
}
+def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val),
+ "unimp $val", []>;
+
// FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the
// fpmover pass.
let Predicates = [HasNoV9] in { // Only emit these in V8 mode.
@@ -292,11 +296,13 @@ let usesCustomInserter = 1, Uses = [FCC] in {
// Section A.3 - Synthetic Instructions, p. 85
// special cases of JMPL:
let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
- let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in
- def RETL: F3_2<2, 0b111000, (outs), (ins), "retl", [(retflag)]>;
+ let rd = O7.Num, rs1 = G0.Num in
+ def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
+ "jmp %o7+$val", [(retflag simm13:$val)]>;
- let rd = I7.Num, rs1 = G0.Num, simm13 = 8 in
- def RET: F3_2<2, 0b111000, (outs), (ins), "ret", []>;
+ let rd = I7.Num, rs1 = G0.Num in
+ def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
+ "jmp %i7+$val", []>;
}
// Section B.1 - Load Integer Instructions, p. 90
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index d694f2e67edc9..90939c312065b 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -59,9 +59,6 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
// Compute derived properties from the register classes
computeRegisterProperties();
- // Set shifts properties
- setShiftAmountType(MVT::i64);
-
// Provide all sorts of operation actions
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 51d2df3a30088..30192420dcb60 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -57,6 +57,8 @@ namespace llvm {
public:
explicit SystemZTargetLowering(SystemZTargetMachine &TM);
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; }
+
/// LowerOperation - Provide custom lowering hooks for some operations.
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 1cac07a0e10a6..8fe549ba3126b 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -775,6 +775,19 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
delete &Op;
}
}
+ // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
+ if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
+ Operands.size() == 3) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ if (Op.isMem() && Op.Mem.SegReg == 0 &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
+ SMLoc Loc = Op.getEndLoc();
+ Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
+ delete &Op;
+ }
+ }
// FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
// "shift <op>".
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 691e2d7204ab9..f7777561b6a74 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -168,16 +168,16 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
switch (insn.displacementSize) {
default:
break;
- case 8:
+ case 1:
type = TYPE_MOFFS8;
break;
- case 16:
+ case 2:
type = TYPE_MOFFS16;
break;
- case 32:
+ case 4:
type = TYPE_MOFFS32;
break;
- case 64:
+ case 8:
type = TYPE_MOFFS64;
break;
}
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 4f4fbcdd394cf..d0dc8b56aea50 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -399,7 +399,7 @@ struct InternalInstruction {
/* The segment override type */
SegmentOverride segmentOverride;
- /* Sizes of various critical pieces of data */
+ /* Sizes of various critical pieces of data, in bytes */
uint8_t registerSize;
uint8_t addressSize;
uint8_t displacementSize;
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index c10e1709f6677..abd1515cf5d70 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1879,39 +1879,71 @@ _add32carry:
//===---------------------------------------------------------------------===//
-This:
-char t(char c) {
- return c/3;
+The hot loop of 256.bzip2 contains code that looks a bit like this:
+
+int foo(char *P, char *Q, int x, int y) {
+ if (P[0] != Q[0])
+ return P[0] < Q[0];
+ if (P[1] != Q[1])
+ return P[1] < Q[1];
+ if (P[2] != Q[2])
+ return P[2] < Q[2];
+ return P[3] < Q[3];
}
-Compiles to: $clang t.c -S -o - -O3 -mkernel -fomit-frame-pointer
+In the real code, we get a lot more wrong than this. However, even in this
+code we generate:
-_t: ## @t
- movslq %edi, %rax
- imulq $-1431655765, %rax, %rcx ## imm = 0xFFFFFFFFAAAAAAAB
- shrq $32, %rcx
- addl %ecx, %eax
- movl %eax, %ecx
- shrl $31, %ecx
- shrl %eax
- addl %ecx, %eax
- movsbl %al, %eax
+_foo: ## @foo
+## BB#0: ## %entry
+ movb (%rsi), %al
+ movb (%rdi), %cl
+ cmpb %al, %cl
+ je LBB0_2
+LBB0_1: ## %if.then
+ cmpb %al, %cl
+ jmp LBB0_5
+LBB0_2: ## %if.end
+ movb 1(%rsi), %al
+ movb 1(%rdi), %cl
+ cmpb %al, %cl
+ jne LBB0_1
+## BB#3: ## %if.end38
+ movb 2(%rsi), %al
+ movb 2(%rdi), %cl
+ cmpb %al, %cl
+ jne LBB0_1
+## BB#4: ## %if.end60
+ movb 3(%rdi), %al
+ cmpb 3(%rsi), %al
+LBB0_5: ## %if.end60
+ setl %al
+ movzbl %al, %eax
ret
-GCC gets:
+Note that we generate jumps to LBB0_1 which does a redundant compare. The
+redundant compare also forces the register values to be live, which prevents
+folding one of the loads into the compare. In contrast, GCC 4.2 produces:
-_t:
- movl $86, %eax
- imulb %dil
- shrw $8, %ax
- sarb $7, %dil
- subb %dil, %al
- movsbl %al,%eax
+_foo:
+ movzbl (%rsi), %eax
+ cmpb %al, (%rdi)
+ jne L10
+L12:
+ movzbl 1(%rsi), %eax
+ cmpb %al, 1(%rdi)
+ jne L10
+ movzbl 2(%rsi), %eax
+ cmpb %al, 2(%rdi)
+ jne L10
+ movzbl 3(%rdi), %eax
+ cmpb 3(%rsi), %al
+L10:
+ setl %al
+ movzbl %al, %eax
ret
-which is nicer. This also happens for int, not just char.
+which is "perfect".
//===---------------------------------------------------------------------===//
-
-
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 9d42ac2e470c2..6fa928462b28a 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -597,9 +597,13 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
(AM.Base.Reg != 0 || AM.IndexReg != 0))
return false;
- // Can't handle TLS or DLLImport.
+ // Can't handle DLLImport.
+ if (GV->hasDLLImportLinkage())
+ return false;
+
+ // Can't handle TLS.
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
- if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage())
+ if (GVar->isThreadLocal())
return false;
// Okay, we've committed to selecting this global. Set up the basic address.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 27024b4e9e5a9..2f49dbcebf3c6 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -45,7 +45,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/VectorExtras.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
@@ -56,10 +55,6 @@ using namespace dwarf;
STATISTIC(NumTailCalls, "Number of tail calls");
-static cl::opt<bool>
-Disable256Bit("disable-256bit", cl::Hidden,
- cl::desc("Disable use of 256-bit vectors"));
-
// Forward declarations.
static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
SDValue V2);
@@ -225,7 +220,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
// X86 is weird, it always uses i8 for shift amounts and setcc results.
- setShiftAmountType(MVT::i8);
setBooleanContents(ZeroOrOneBooleanContent);
setSchedulingPreference(Sched::RegPressure);
setStackPointerRegisterToSaveRestore(X86StackPtr);
@@ -1713,7 +1707,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
else
llvm_unreachable("Unknown argument type!");
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
// If this is an 8 or 16-bit value, it is really passed promoted to 32
@@ -1845,7 +1839,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
DAG.getIntPtrConstant(Offset));
unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
- X86::GR64RegisterClass, dl);
+ X86::GR64RegisterClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -1861,7 +1855,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
SmallVector<SDValue, 11> SaveXMMOps;
SaveXMMOps.push_back(Chain);
- unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass, dl);
+ unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass);
SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
SaveXMMOps.push_back(ALVal);
@@ -1872,7 +1866,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
- X86::VR128RegisterClass, dl);
+ X86::VR128RegisterClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
SaveXMMOps.push_back(Val);
}
@@ -2693,6 +2687,10 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::MOVSD:
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
+ case X86ISD::VUNPCKLPS:
+ case X86ISD::VUNPCKLPD:
+ case X86ISD::VUNPCKLPSY:
+ case X86ISD::VUNPCKLPDY:
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLBW:
case X86ISD::PUNPCKLDQ:
@@ -2760,6 +2758,10 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::MOVSD:
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
+ case X86ISD::VUNPCKLPS:
+ case X86ISD::VUNPCKLPD:
+ case X86ISD::VUNPCKLPSY:
+ case X86ISD::VUNPCKLPDY:
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLBW:
case X86ISD::PUNPCKLDQ:
@@ -4178,7 +4180,8 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(Opc, dl, ShVT, SrcOp,
- DAG.getConstant(NumBits, TLI.getShiftAmountTy())));
+ DAG.getConstant(NumBits,
+ TLI.getShiftAmountTy(SrcOp.getValueType()))));
}
SDValue
@@ -4327,16 +4330,15 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// For AVX-length vectors, build the individual 128-bit pieces and
// use shuffles to put them in place.
- if (VT.getSizeInBits() > 256 &&
- Subtarget->hasAVX() &&
- !Disable256Bit &&
+ if (VT.getSizeInBits() > 256 &&
+ Subtarget->hasAVX() &&
!ISD::isBuildVectorAllZeros(Op.getNode())) {
SmallVector<SDValue, 8> V;
V.resize(NumElems);
for (unsigned i = 0; i < NumElems; ++i) {
V[i] = Op.getOperand(i);
}
-
+
EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
// Build the lower subvector.
@@ -5044,7 +5046,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
DAG.getIntPtrConstant(Elt1 / 2));
if ((Elt1 & 1) == 0)
InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt,
- DAG.getConstant(8, TLI.getShiftAmountTy()));
+ DAG.getConstant(8,
+ TLI.getShiftAmountTy(InsElt.getValueType())));
else if (Elt0 >= 0)
InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt,
DAG.getConstant(0xFF00, MVT::i16));
@@ -5058,7 +5061,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
Elt0Src, DAG.getIntPtrConstant(Elt0 / 2));
if ((Elt0 & 1) != 0)
InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0,
- DAG.getConstant(8, TLI.getShiftAmountTy()));
+ DAG.getConstant(8,
+ TLI.getShiftAmountTy(InsElt0.getValueType())));
else if (Elt1 >= 0)
InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0,
DAG.getConstant(0x00FF, MVT::i16));
@@ -5475,7 +5479,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
// Both of them can't be memory operations though.
if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2))
CanFoldLoad = false;
-
+
if (CanFoldLoad) {
if (HasSSE2 && NumElems == 2)
return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
@@ -6088,7 +6092,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
SDValue ScaledN2 = N2;
if (Upper)
ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2,
- DAG.getConstant(NumElems /
+ DAG.getConstant(NumElems /
(VT.getSizeInBits() / 128),
N2.getValueType()));
Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0,
@@ -9327,6 +9331,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::MOVSS: return "X86ISD::MOVSS";
case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS";
case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD";
+ case X86ISD::VUNPCKLPS: return "X86ISD::VUNPCKLPS";
+ case X86ISD::VUNPCKLPD: return "X86ISD::VUNPCKLPD";
+ case X86ISD::VUNPCKLPSY: return "X86ISD::VUNPCKLPSY";
+ case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY";
case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS";
case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD";
case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW";
@@ -11984,6 +11992,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PUNPCKLQDQ:
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
+ case X86ISD::VUNPCKLPS:
+ case X86ISD::VUNPCKLPD:
+ case X86ISD::VUNPCKLPSY:
+ case X86ISD::VUNPCKLPDY:
case X86ISD::MOVHLPS:
case X86ISD::MOVLHPS:
case X86ISD::PSHUFD:
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 419da3742cf81..6ec4a7de75580 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -159,16 +159,16 @@ namespace llvm {
/// PSHUFB - Shuffle 16 8-bit values within a vector.
PSHUFB,
-
+
/// PANDN - and with not'd value.
PANDN,
-
+
/// PSIGNB/W/D - Copy integer sign.
- PSIGNB, PSIGNW, PSIGND,
-
+ PSIGNB, PSIGNW, PSIGND,
+
/// PBLENDVB - Variable blend
PBLENDVB,
-
+
/// FMAX, FMIN - Floating point max and min.
///
FMAX, FMIN,
@@ -212,7 +212,7 @@ namespace llvm {
// ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
ADD, SUB, ADC, SBB, SMUL,
INC, DEC, OR, XOR, AND,
-
+
UMUL, // LOW, HI, FLAGS = umul LHS, RHS
// MUL_IMM - X86 specific multiply by immediate.
@@ -248,6 +248,10 @@ namespace llvm {
MOVSS,
UNPCKLPS,
UNPCKLPD,
+ VUNPCKLPS,
+ VUNPCKLPD,
+ VUNPCKLPSY,
+ VUNPCKLPDY,
UNPCKHPS,
UNPCKHPD,
PUNPCKLBW,
@@ -463,6 +467,8 @@ namespace llvm {
virtual unsigned getJumpTableEncoding() const;
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+
virtual const MCExpr *
LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB, unsigned uid,
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 344c14c112a0e..0660072589e4c 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -41,6 +41,8 @@ def MRM_F8 : Format<41>;
def MRM_F9 : Format<42>;
def RawFrmImm8 : Format<43>;
def RawFrmImm16 : Format<44>;
+def MRM_D0 : Format<45>;
+def MRM_D1 : Format<46>;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index ceb1b65398263..76a9b12b8aad8 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -369,8 +369,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::IMUL32rri8, X86::IMUL32rmi8, 0 },
{ X86::IMUL64rri32, X86::IMUL64rmi32, 0 },
{ X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
- { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 },
- { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 },
{ X86::Int_COMISDrr, X86::Int_COMISDrm, 0 },
{ X86::Int_COMISSrr, X86::Int_COMISSrm, 0 },
{ X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 },
@@ -568,6 +566,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::IMUL16rr, X86::IMUL16rm, 0 },
{ X86::IMUL32rr, X86::IMUL32rm, 0 },
{ X86::IMUL64rr, X86::IMUL64rm, 0 },
+ { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 },
+ { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 },
{ X86::MAXPDrr, X86::MAXPDrm, 16 },
{ X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 },
{ X86::MAXPSrr, X86::MAXPSrm, 16 },
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 1d44207872732..fcb5a25104ac4 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -311,6 +311,8 @@ namespace X86II {
MRM_F0 = 40,
MRM_F8 = 41,
MRM_F9 = 42,
+ MRM_D0 = 45,
+ MRM_D1 = 46,
/// RawFrmImm8 - This is used for the ENTER instruction, which has two
/// immediates, the first of which is a 16-bit immediate (specified by
@@ -577,6 +579,8 @@ namespace X86II {
case X86II::MRM_F0:
case X86II::MRM_F8:
case X86II::MRM_F9:
+ case X86II::MRM_D0:
+ case X86II::MRM_D1:
return -1;
}
}
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 87dc4bece7420..f832a7c85a8a5 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1296,6 +1296,9 @@ def : MnemonicAlias<"lret", "lretl">;
def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>;
def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"loopz", "loope">;
+def : MnemonicAlias<"loopnz", "loopne">;
+
def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>;
def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>;
def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>;
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 1a58ba0f96ef7..6a24d145c6967 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -388,3 +388,8 @@ def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
+let Defs = [RDX, RAX], Uses = [RCX] in
+ def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
+
+let Uses = [RDX, RAX, RCX] in
+ def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB;
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index e6dc74e65d795..0e3b5711f2b5e 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -979,6 +979,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
EmitByte(BaseOpcode, CurByte, OS);
EmitByte(0xF9, CurByte, OS);
break;
+ case X86II::MRM_D0:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xD0, CurByte, OS);
+ break;
+ case X86II::MRM_D1:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitByte(0xD1, CurByte, OS);
+ break;
}
// If there is a remaining operand, it must be a trailing immediate. Emit it
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index de768561f111c..1ee73123bbc61 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -342,9 +342,10 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
assert((!Is64Bit || HasX86_64) &&
"64-bit code requested on a subtarget that doesn't support it!");
- // Stack alignment is 16 bytes on Darwin and Linux (both 32 and 64 bit) and
- // for all 64-bit targets.
- if (isTargetDarwin() || isTargetLinux() || Is64Bit)
+ // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
+ // 32 and 64 bit) and for all 64-bit targets.
+ if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() ||
+ isTargetSolaris() || Is64Bit)
stackAlignment = 16;
if (StackAlignment)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 8a119b43cd91e..0a62a029554c7 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -166,6 +166,8 @@ public:
bool hasVectorUAMem() const { return HasVectorUAMem; }
bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; }
+ bool isTargetFreeBSD() const { return TargetTriple.getOS() == Triple::FreeBSD; }
+ bool isTargetSolaris() const { return TargetTriple.getOS() == Triple::Solaris; }
// ELF is a reasonably sane default and the only other X86 targets we
// support are Darwin and Windows. Just use "not those".
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 828d6f92caf40..4817787d75159 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -42,9 +42,9 @@
using namespace llvm;
const char *XCoreTargetLowering::
-getTargetNodeName(unsigned Opcode) const
+getTargetNodeName(unsigned Opcode) const
{
- switch (Opcode)
+ switch (Opcode)
{
case XCoreISD::BL : return "XCoreISD::BL";
case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper";
@@ -77,7 +77,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
// Division is expensive
setIntDivIsCheap(false);
- setShiftAmountType(MVT::i32);
setStackPointerRegisterToSaveRestore(XCore::SP);
setSchedulingPreference(Sched::RegPressure);
@@ -95,7 +94,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
// Stop the combiner recombining select and set_cc
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
-
+
// 64bit
setOperationAction(ISD::ADD, MVT::i64, Custom);
setOperationAction(ISD::SUB, MVT::i64, Custom);
@@ -106,14 +105,14 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
-
+
// Bit Manipulation
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::ROTL , MVT::i32, Expand);
setOperationAction(ISD::ROTR , MVT::i32, Expand);
-
+
setOperationAction(ISD::TRAP, MVT::Other, Legal);
-
+
// Jump tables.
setOperationAction(ISD::BR_JT, MVT::Other, Custom);
@@ -122,7 +121,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
// Thread Local Storage
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
-
+
// Conversion of i64 -> double produces constantpool nodes
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
@@ -143,7 +142,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::VAARG, MVT::Other, Custom);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
-
+
// Dynamic stack
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
@@ -163,7 +162,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
SDValue XCoreTargetLowering::
LowerOperation(SDValue Op, SelectionDAG &DAG) const {
- switch (Op.getOpcode())
+ switch (Op.getOpcode())
{
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
@@ -414,7 +413,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
DebugLoc DL = Op.getDebugLoc();
-
+
SDValue Base;
int64_t Offset;
if (!LD->isVolatile() &&
@@ -437,10 +436,10 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32);
SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32);
SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32);
-
+
SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset);
SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset);
-
+
SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
LowAddr, MachinePointerInfo(), false, false, 0);
SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
@@ -453,7 +452,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue Ops[] = { Result, Chain };
return DAG.getMergeValues(Ops, 2, DL);
}
-
+
if (LD->getAlignment() == 2) {
SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain,
BasePtr, LD->getPointerInfo(), MVT::i16,
@@ -473,16 +472,16 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue Ops[] = { Result, Chain };
return DAG.getMergeValues(Ops, 2, DL);
}
-
+
// Lower to a call to __misaligned_load(BasePtr).
const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
-
+
Entry.Ty = IntPtrTy;
Entry.Node = BasePtr;
Args.push_back(Entry);
-
+
std::pair<SDValue, SDValue> CallResult =
LowerCallTo(Chain, IntPtrTy, false, false,
false, false, 0, CallingConv::C, false,
@@ -515,7 +514,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
SDValue BasePtr = ST->getBasePtr();
SDValue Value = ST->getValue();
DebugLoc dl = Op.getDebugLoc();
-
+
if (ST->getAlignment() == 2) {
SDValue Low = Value;
SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
@@ -532,19 +531,19 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
ST->isNonTemporal(), 2);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
}
-
+
// Lower to a call to __misaligned_store(BasePtr, Value).
const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
-
+
Entry.Ty = IntPtrTy;
Entry.Node = BasePtr;
Args.push_back(Entry);
-
+
Entry.Node = Value;
Args.push_back(Entry);
-
+
std::pair<SDValue, SDValue> CallResult =
LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false,
false, false, 0, CallingConv::C, false,
@@ -722,7 +721,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const
}
DebugLoc dl = N->getDebugLoc();
-
+
// Extract components
SDValue LHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
N->getOperand(0), DAG.getConstant(0, MVT::i32));
@@ -732,7 +731,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const
N->getOperand(1), DAG.getConstant(0, MVT::i32));
SDValue RHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
N->getOperand(1), DAG.getConstant(1, MVT::i32));
-
+
// Expand
unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD :
XCoreISD::LSUB;
@@ -740,7 +739,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const
SDValue Carry = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
LHSL, RHSL, Zero);
SDValue Lo(Carry.getNode(), 1);
-
+
SDValue Ignored = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
LHSH, RHSH, Carry);
SDValue Hi(Ignored.getNode(), 1);
@@ -761,8 +760,8 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG) const
Node->getOperand(1), MachinePointerInfo(V),
false, false, 0);
// Increment the pointer, VAList, to the next vararg
- SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList,
- DAG.getConstant(VT.getSizeInBits(),
+ SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList,
+ DAG.getConstant(VT.getSizeInBits(),
getPointerTy()));
// Store the incremented VAList to the legalized pointer
Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1),
@@ -781,20 +780,20 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG) const
MachineFunction &MF = DAG.getMachineFunction();
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32);
- return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1),
+ return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1),
MachinePointerInfo(), false, false, 0);
}
SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
- // Depths > 0 not supported yet!
+ // Depths > 0 not supported yet!
if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
return SDValue();
-
+
MachineFunction &MF = DAG.getMachineFunction();
const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo();
- return DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl,
RegInfo->getFrameRegister(MF), MVT::i32);
}
@@ -919,7 +918,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
- Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes,
+ Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes,
getPointerTy(), true));
SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
@@ -944,8 +943,8 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
break;
}
-
- // Arguments that can be passed on register must be kept at
+
+ // Arguments that can be passed on register must be kept at
// RegsToPass vector
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
@@ -954,7 +953,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
int Offset = VA.getLocMemOffset();
- MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other,
+ MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other,
Chain, Arg,
DAG.getConstant(Offset/4, MVT::i32)));
}
@@ -963,16 +962,16 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// Transform all store nodes into one single node because
// all store nodes are independent of each other.
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOpChains[0], MemOpChains.size());
- // Build a sequence of copy-to-reg nodes chained together with token
+ // Build a sequence of copy-to-reg nodes chained together with token
// chain and flag operands which copy the outgoing args into registers.
// The InFlag in necessary since all emited instructions must be
// stuck together.
SDValue InFlag;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, InFlag);
InFlag = Chain.getValue(1);
}
@@ -986,7 +985,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
// XCoreBranchLink = #chain, #target_address, #opt_in_flags...
- // = Chain, Callee, Reg#1, Reg#2, ...
+ // = Chain, Callee, Reg#1, Reg#2, ...
//
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -994,7 +993,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
Ops.push_back(Chain);
Ops.push_back(Callee);
- // Add argument registers to the end of the list so that they are
+ // Add argument registers to the end of the list so that they are
// known live into the call.
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
@@ -1098,11 +1097,11 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
unsigned StackSlotSize = XCoreFrameLowering::stackSlotSize();
unsigned LRSaveSize = StackSlotSize;
-
+
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
-
+
if (VA.isRegLoc()) {
// Arguments passed in registers
EVT RegVT = VA.getLocVT();
@@ -1139,12 +1138,12 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
// Create the SelectionDAG nodes corresponding to a load
//from this parameter
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
- InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+ InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
false, false, 0));
}
}
-
+
if (isVarArg) {
/* Argument registers */
static const unsigned ArgRegs[] = {
@@ -1186,7 +1185,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
true));
}
}
-
+
return Chain;
}
@@ -1222,7 +1221,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
// Analize return values.
CCInfo.AnalyzeReturn(Outs, RetCC_XCore);
- // If this is the first return lowered for this function, add
+ // If this is the first return lowered for this function, add
// the regs to the liveout set for the function.
if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
for (unsigned i = 0; i != RVLocs.size(); ++i)
@@ -1237,7 +1236,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
OutVals[i], Flag);
// guarantee that all emitted copies are
@@ -1265,7 +1264,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
DebugLoc dl = MI->getDebugLoc();
assert((MI->getOpcode() == XCore::SELECT_CC) &&
"Unexpected instr type to insert");
-
+
// To "insert" a SELECT_CC instruction, we actually have to insert the diamond
// control-flow pattern. The incoming instruction knows the destination vreg
// to set, the condition code register to branch on, the true/false values to
@@ -1273,7 +1272,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = BB;
++It;
-
+
// thisMBB:
// ...
// TrueVal = ...
@@ -1296,7 +1295,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Next, add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
-
+
BuildMI(BB, dl, TII.get(XCore::BRFT_lru6))
.addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
@@ -1304,10 +1303,10 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %FalseValue = ...
// # fallthrough to sinkMBB
BB = copy0MBB;
-
+
// Update machine-CFG edges
BB->addSuccessor(sinkMBB);
-
+
// sinkMBB:
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
@@ -1316,7 +1315,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
TII.get(XCore::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
-
+
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -1354,7 +1353,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
// fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the
// low bit set
- if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
+ if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
APInt KnownZero, KnownOne;
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
@@ -1377,7 +1376,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
EVT VT = N0.getValueType();
// fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set
- if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
+ if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
APInt KnownZero, KnownOne;
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
@@ -1393,7 +1392,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
// fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the
// low bit set
- if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
+ if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
APInt KnownZero, KnownOne;
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
@@ -1557,7 +1556,7 @@ static inline bool isImmUs4(int64_t val)
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool
-XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
const Type *Ty) const {
if (Ty->getTypeID() == Type::VoidTyID)
return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
@@ -1568,7 +1567,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 &&
AM.BaseOffs%4 == 0;
}
-
+
switch (Size) {
case 1:
// reg + imm
@@ -1593,7 +1592,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
// reg + reg<<2
return AM.Scale == 4 && AM.BaseOffs == 0;
}
-
+
return false;
}
@@ -1603,7 +1602,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
std::vector<unsigned> XCoreTargetLowering::
getRegClassForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const
+ EVT VT) const
{
if (Constraint.size() != 1)
return std::vector<unsigned>();
@@ -1611,9 +1610,9 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
switch (Constraint[0]) {
default : break;
case 'r':
- return make_vector<unsigned>(XCore::R0, XCore::R1, XCore::R2,
- XCore::R3, XCore::R4, XCore::R5,
- XCore::R6, XCore::R7, XCore::R8,
+ return make_vector<unsigned>(XCore::R0, XCore::R1, XCore::R2,
+ XCore::R3, XCore::R4, XCore::R5,
+ XCore::R6, XCore::R7, XCore::R8,
XCore::R9, XCore::R10, XCore::R11, 0);
break;
}
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 7e5dd2e8e512e..bb3f2cc038e7f 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -20,11 +20,11 @@
#include "XCore.h"
namespace llvm {
-
+
// Forward delcarations
class XCoreSubtarget;
class XCoreTargetMachine;
-
+
namespace XCoreISD {
enum NodeType {
// Start the numbering where the builtin ops and target ops leave off.
@@ -38,16 +38,16 @@ namespace llvm {
// dp relative address
DPRelativeWrapper,
-
+
// cp relative address
CPRelativeWrapper,
-
+
// Store word to stack
STWSP,
// Corresponds to retsp instruction
RETSP,
-
+
// Corresponds to LADD instruction
LADD,
@@ -74,13 +74,14 @@ namespace llvm {
//===--------------------------------------------------------------------===//
// TargetLowering Implementation
//===--------------------------------------------------------------------===//
- class XCoreTargetLowering : public TargetLowering
+ class XCoreTargetLowering : public TargetLowering
{
public:
explicit XCoreTargetLowering(XCoreTargetMachine &TM);
virtual unsigned getJumpTableEncoding() const;
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
/// LowerOperation - Provide custom lowering hooks for some operations.
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
@@ -91,10 +92,10 @@ namespace llvm {
virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const;
- /// getTargetNodeName - This method returns the name of a target specific
+ /// getTargetNodeName - This method returns the name of a target specific
// DAG node.
virtual const char *getTargetNodeName(unsigned Opcode) const;
-
+
virtual MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB) const;
@@ -108,7 +109,7 @@ namespace llvm {
private:
const XCoreTargetMachine &TM;
const XCoreSubtarget &Subtarget;
-
+
// Lower Operand helpers
SDValue LowerCCCArguments(SDValue Chain,
CallingConv::ID CallConv,
@@ -148,12 +149,12 @@ namespace llvm {
SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
-
+
// Inline asm support
std::vector<unsigned>
getRegClassForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const;
-
+
// Expand specifics
SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const;
SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 38cc734ce7c3e..ecdd4cb630006 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -727,7 +727,7 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
"neg $dst, $b",
[(set GRRegs:$dst, (ineg GRRegs:$b))]>;
-// TODO setd, eet, eef, getts, setpt, outshr, inshr, testwct, tinitpc, tinitdp,
+// TODO setd, eet, eef, testwct, tinitpc, tinitdp,
// tinitsp, tinitcp, tsetmr, sext (reg), zext (reg)
let Constraints = "$src1 = $dst" in {
let neverHasSideEffects = 1 in
@@ -758,6 +758,14 @@ def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type),
"getr $dst, $type",
[(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>;
+def GETTS_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+ "getts $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_getts GRRegs:$r))]>;
+
+def SETPT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "setpt res[$r], $val",
+ [(int_xcore_setpt GRRegs:$r, GRRegs:$val)]>;
+
def OUTCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
"outct res[$r], $val",
[(int_xcore_outct GRRegs:$r, GRRegs:$val)]>;
@@ -774,6 +782,11 @@ def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
"out res[$r], $val",
[(int_xcore_out GRRegs:$r, GRRegs:$val)]>;
+let Constraints = "$src = $dst" in
+def OUTSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
+ "outshr res[$r], $src",
+ [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, GRRegs:$src))]>;
+
def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
"inct $dst, res[$r]",
[(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>;
@@ -786,6 +799,11 @@ def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
"in $dst, res[$r]",
[(set GRRegs:$dst, (int_xcore_in GRRegs:$r))]>;
+let Constraints = "$src = $dst" in
+def INSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
+ "inshr $dst, res[$r]",
+ [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, GRRegs:$src))]>;
+
def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
"chkct res[$r], $val",
[(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>;
@@ -799,7 +817,7 @@ def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
[(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
// Two operand long
-// TODO settw, setclk, setrdy, setpsc, endin, peek,
+// TODO setclk, setrdy, setpsc, endin, peek,
// getd, testlcl, tinitlr, getps, setps
def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
"bitrev $dst, $src",
@@ -813,13 +831,17 @@ def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
"clz $dst, $src",
[(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
-def SETC_l2r : _FRU6<(outs), (ins GRRegs:$r, GRRegs:$val),
+def SETC_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
"setc res[$r], $val",
[(int_xcore_setc GRRegs:$r, GRRegs:$val)]>;
+def SETTW_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+ "settw res[$r], $val",
+ [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>;
+
// One operand short
-// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, syncr, clrtp
-// setdp, setcp, setv, setev, kcall
+// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, clrtp
+// setdp, setcp, setev, kcall
// dgetreg
let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
def BAU_1r : _F1R<(outs), (ins GRRegs:$addr),
@@ -859,20 +881,41 @@ def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops),
[(XCoreBranchLink GRRegs:$addr)]>;
}
+def SYNCR_1r : _F1R<(outs), (ins GRRegs:$r),
+ "syncr res[$r]",
+ [(int_xcore_syncr GRRegs:$r)]>;
+
def FREER_1r : _F1R<(outs), (ins GRRegs:$r),
"freer res[$r]",
[(int_xcore_freer GRRegs:$r)]>;
+let Uses=[R11] in
+def SETV_1r : _F1R<(outs), (ins GRRegs:$r),
+ "setv res[$r], r11",
+ [(int_xcore_setv GRRegs:$r, R11)]>;
+
+def EEU_1r : _F1R<(outs), (ins GRRegs:$r),
+ "eeu res[$r]",
+ [(int_xcore_eeu GRRegs:$r)]>;
+
// Zero operand short
-// TODO waiteu, clre, ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
+// TODO ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
// stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret,
// dentsp, drestsp
+def CLRE_0R : _F0R<(outs), (ins), "clre", [(int_xcore_clre)]>;
+
let Defs = [R11] in
def GETID_0R : _F0R<(outs), (ins),
"get r11, id",
[(set R11, (int_xcore_getid))]>;
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1,
+ hasSideEffects = 1 in
+def WAITEU_0R : _F0R<(outs), (ins),
+ "waiteu",
+ [(brind (int_xcore_waitevent))]>;
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//