diff options
Diffstat (limited to 'lib/Target')
234 files changed, 5888 insertions, 3481 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 8d9c62253cdd1..b4dec0cfd126f 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -124,7 +124,8 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, def : Processor<"cortex-a8", CortexA8Itineraries, [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx, FeatureNEONForFP]>; -def : ProcNoItin<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>; +def : Processor<"cortex-a9", CortexA9Itineraries, + [ArchV7A, FeatureThumb2, FeatureNEON]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index ea62c3330e643..e68354a42f8da 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -151,22 +151,13 @@ namespace ARM_AM { if ((rotr32(Imm, RotAmt) & ~255U) == 0) return (32-RotAmt)&31; // HW rotates right, not left. - // For values like 0xF000000F, we should skip the first run of ones, then + // For values like 0xF000000F, we should ignore the low 6 bits, then // retry the hunt. - if (Imm & 1) { - unsigned TrailingOnes = CountTrailingZeros_32(~Imm); - if (TrailingOnes != 32) { // Avoid overflow on 0xFFFFFFFF - // Restart the search for a high-order bit after the initial seconds of - // ones. - unsigned TZ2 = CountTrailingZeros_32(Imm & ~((1 << TrailingOnes)-1)); - - // Rotate amount must be even. - unsigned RotAmt2 = TZ2 & ~1; - - // If this fits, use it. - if (RotAmt2 != 32 && (rotr32(Imm, RotAmt2) & ~255U) == 0) - return (32-RotAmt2)&31; // HW rotates right, not left. - } + if (Imm & 63U) { + unsigned TZ2 = CountTrailingZeros_32(Imm & ~63U); + unsigned RotAmt2 = TZ2 & ~1; + if ((rotr32(Imm, RotAmt2) & ~255U) == 0) + return (32-RotAmt2)&31; // HW rotates right, not left. } // Otherwise, we have no way to cover this span of bits with a single diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 1995f79fa3739..a1938582ae054 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -467,6 +467,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case TargetOpcode::KILL: case TargetOpcode::DBG_LABEL: case TargetOpcode::EH_LABEL: + case TargetOpcode::DBG_VALUE: return 0; } break; @@ -481,10 +482,11 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { // operand #2. return MI->getOperand(2).getImm(); case ARM::Int_eh_sjlj_setjmp: + case ARM::Int_eh_sjlj_setjmp_nofp: return 24; case ARM::tInt_eh_sjlj_setjmp: - return 14; case ARM::t2Int_eh_sjlj_setjmp: + case ARM::t2Int_eh_sjlj_setjmp_nofp: return 14; case ARM::BR_JTr: case ARM::BR_JTm: @@ -815,6 +817,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } } +MachineInstr* +ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const { + MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE)) + .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); + return &*MIB; +} + MachineInstr *ARMBaseInstrInfo:: foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops, int FI) const { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 292c4984dd63a..7a5630ea37c5f 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -269,6 +269,12 @@ public: unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC) const; + virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, + uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const; + virtual bool canFoldMemoryOperand(const MachineInstr *MI, const SmallVectorImpl<unsigned> &Ops) const; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index f1625469085ea..bc12187436841 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -38,11 +38,14 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" -using namespace llvm; +namespace llvm { cl::opt<bool> ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(true), cl::desc("Reuse repeated frame index values")); +} + +using namespace llvm; unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum, bool *isSPVFP) { @@ -478,7 +481,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, /// bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return ((NoFramePointerElim && MFI->hasCalls())|| + return ((DisableFramePointerElim(MF) && MFI->hasCalls())|| needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()); @@ -506,7 +509,7 @@ needsStackRealignment(const MachineFunction &MF) const { bool ARMBaseRegisterInfo:: cannotEliminateFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - if (NoFramePointerElim && MFI->hasCalls()) + if (DisableFramePointerElim(MF) && MFI->hasCalls()) return true; return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || needsStackRealignment(MF); @@ -1050,7 +1053,7 @@ emitLoadConstPool(MachineBasicBlock &MBB, unsigned PredReg) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); - Constant *C = + const Constant *C = ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); @@ -1180,6 +1183,13 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, SPAdj = 0; Offset += SPAdj; + // Special handling of dbg_value instructions. + if (MI.isDebugValue()) { + MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(i+1).ChangeToImmediate(Offset); + return 0; + } + // Modify MI as necessary to handle as much of 'Offset' as possible bool Done = false; if (!AFI->isThumbFunction()) diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index e7aa0c86d40ee..f84f85a86141b 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -64,7 +64,8 @@ namespace { static char ID; public: ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) - : MachineFunctionPass(&ID), JTI(0), II((ARMInstrInfo*)tm.getInstrInfo()), + : MachineFunctionPass(&ID), JTI(0), + II((const ARMInstrInfo *)tm.getInstrInfo()), TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} @@ -150,7 +151,7 @@ namespace { /// Routines that handle operands which add machine relocations which are /// fixed up by the relocation stage. - void emitGlobalAddress(GlobalValue *GV, unsigned Reloc, + void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, bool MayNeedFarStub, bool Indirect, intptr_t ACPV = 0); void emitExternalSymbolAddress(const char *ES, unsigned Reloc); @@ -174,9 +175,9 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { assert((MF.getTarget().getRelocationModel() != Reloc::Default || MF.getTarget().getRelocationModel() != Reloc::Static) && "JIT relocation model must be set to static or default!"); - JTI = ((ARMTargetMachine&)MF.getTarget()).getJITInfo(); - II = ((ARMTargetMachine&)MF.getTarget()).getInstrInfo(); - TD = ((ARMTargetMachine&)MF.getTarget()).getTargetData(); + JTI = ((ARMTargetMachine &)MF.getTarget()).getJITInfo(); + II = ((const ARMTargetMachine &)MF.getTarget()).getInstrInfo(); + TD = ((const ARMTargetMachine &)MF.getTarget()).getTargetData(); Subtarget = &TM.getSubtarget<ARMSubtarget>(); MCPEs = &MF.getConstantPool()->getConstants(); MJTEs = 0; @@ -249,14 +250,16 @@ unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, /// emitGlobalAddress - Emit the specified address to the code stream. /// -void ARMCodeEmitter::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, +void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, bool MayNeedFarStub, bool Indirect, intptr_t ACPV) { MachineRelocation MR = Indirect ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, - GV, ACPV, MayNeedFarStub) + const_cast<GlobalValue *>(GV), + ACPV, MayNeedFarStub) : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - GV, ACPV, MayNeedFarStub); + const_cast<GlobalValue *>(GV), ACPV, + MayNeedFarStub); MCE.addRelocation(MR); } @@ -391,7 +394,7 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n'); assert(ACPV->isGlobalValue() && "unsupported constant pool value"); - GlobalValue *GV = ACPV->getGV(); + const GlobalValue *GV = ACPV->getGV(); if (GV) { Reloc::Model RelocM = TM.getRelocationModel(); emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry, @@ -403,7 +406,7 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { } emitWordLE(0); } else { - Constant *CV = MCPE.Val.ConstVal; + const Constant *CV = MCPE.Val.ConstVal; DEBUG({ errs() << " ** Constant pool #" << CPI << " @ " @@ -415,7 +418,7 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { errs() << '\n'; }); - if (GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { + if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false); emitWordLE(0); } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { @@ -559,7 +562,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. if (MI.getOperand(0).getSymbolName()[0]) { - llvm_report_error("JIT does not support inline asm!"); + report_fatal_error("JIT does not support inline asm!"); } break; } @@ -704,7 +707,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, const TargetInstrDesc &TID = MI.getDesc(); if (TID.Opcode == ARM::BFC) { - llvm_report_error("ARMv6t2 JIT is not yet supported."); + report_fatal_error("ARMv6t2 JIT is not yet supported."); } // Part of binary is determined by TableGn. diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index 90dd0c7fd9622..f13ccc6384484 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -21,7 +21,7 @@ #include <cstdlib> using namespace llvm; -ARMConstantPoolValue::ARMConstantPoolValue(Constant *cval, unsigned id, +ARMConstantPoolValue::ARMConstantPoolValue(const Constant *cval, unsigned id, ARMCP::ARMCPKind K, unsigned char PCAdj, const char *Modif, @@ -39,16 +39,17 @@ ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, CVal(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPExtSymbol), PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {} -ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, const char *Modif) +ARMConstantPoolValue::ARMConstantPoolValue(const GlobalValue *gv, + const char *Modif) : MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())), CVal(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0), Modifier(Modif) {} -GlobalValue *ARMConstantPoolValue::getGV() const { +const GlobalValue *ARMConstantPoolValue::getGV() const { return dyn_cast_or_null<GlobalValue>(CVal); } -BlockAddress *ARMConstantPoolValue::getBlockAddress() const { +const BlockAddress *ARMConstantPoolValue::getBlockAddress() const { return dyn_cast_or_null<BlockAddress>(CVal); } diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 741acde27b1da..6f4eddf7361d9 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -36,7 +36,7 @@ namespace ARMCP { /// represent PC-relative displacement between the address of the load /// instruction and the constant being loaded, i.e. (&GV-(LPIC+8)). class ARMConstantPoolValue : public MachineConstantPoolValue { - Constant *CVal; // Constant being loaded. + const Constant *CVal; // Constant being loaded. const char *S; // ExtSymbol being loaded. unsigned LabelId; // Label id of the load. ARMCP::ARMCPKind Kind; // Kind of constant. @@ -46,20 +46,20 @@ class ARMConstantPoolValue : public MachineConstantPoolValue { bool AddCurrentAddress; public: - ARMConstantPoolValue(Constant *cval, unsigned id, + ARMConstantPoolValue(const Constant *cval, unsigned id, ARMCP::ARMCPKind Kind = ARMCP::CPValue, unsigned char PCAdj = 0, const char *Modifier = NULL, bool AddCurrentAddress = false); ARMConstantPoolValue(LLVMContext &C, const char *s, unsigned id, unsigned char PCAdj = 0, const char *Modifier = NULL, bool AddCurrentAddress = false); - ARMConstantPoolValue(GlobalValue *GV, const char *Modifier); + ARMConstantPoolValue(const GlobalValue *GV, const char *Modifier); ARMConstantPoolValue(); ~ARMConstantPoolValue(); - GlobalValue *getGV() const; + const GlobalValue *getGV() const; const char *getSymbol() const { return S; } - BlockAddress *getBlockAddress() const; + const BlockAddress *getBlockAddress() const; const char *getModifier() const { return Modifier; } bool hasModifier() const { return Modifier != NULL; } bool mustAddCurrentAddress() const { return AddCurrentAddress; } diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 1b8727d9848d8..845d088f56c3e 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -91,7 +91,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { LO16 = LO16.addImm(Lo16); HI16 = HI16.addImm(Hi16); } else { - GlobalValue *GV = MO.getGlobal(); + const GlobalValue *GV = MO.getGlobal(); unsigned TF = MO.getTargetFlags(); LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 7d486631897a2..36a1827ce6e8b 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -13,7 +13,6 @@ #include "ARM.h" #include "ARMAddressingModes.h" -#include "ARMISelLowering.h" #include "ARMTargetMachine.h" #include "llvm/CallingConv.h" #include "llvm/Constants.h" @@ -121,9 +120,6 @@ private: SDNode *SelectARMIndexedLoad(SDNode *N); SDNode *SelectT2IndexedLoad(SDNode *N); - /// SelectDYN_ALLOC - Select dynamic alloc for Thumb. - SDNode *SelectDYN_ALLOC(SDNode *N); - /// SelectVLD - Select NEON load intrinsics. NumVecs should be /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for /// loads of D registers and even subregs and odd subregs of Q registers. @@ -146,7 +142,7 @@ private: unsigned *QOpcodes1); /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. - SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, unsigned Opc); + SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned); /// SelectCMOVOp - Select CMOV instructions for ARM. SDNode *SelectCMOVOp(SDNode *N); @@ -939,59 +935,6 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { return NULL; } -SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); - EVT VT = N->getValueType(0); - SDValue Chain = N->getOperand(0); - SDValue Size = N->getOperand(1); - SDValue Align = N->getOperand(2); - SDValue SP = CurDAG->getRegister(ARM::SP, MVT::i32); - int32_t AlignVal = cast<ConstantSDNode>(Align)->getSExtValue(); - if (AlignVal < 0) - // We need to align the stack. Use Thumb1 tAND which is the only thumb - // instruction that can read and write SP. This matches to a pseudo - // instruction that has a chain to ensure the result is written back to - // the stack pointer. - SP = SDValue(CurDAG->getMachineNode(ARM::tANDsp, dl, VT, SP, Align), 0); - - bool isC = isa<ConstantSDNode>(Size); - uint32_t C = isC ? cast<ConstantSDNode>(Size)->getZExtValue() : ~0UL; - // Handle the most common case for both Thumb1 and Thumb2: - // tSUBspi - immediate is between 0 ... 508 inclusive. - if (C <= 508 && ((C & 3) == 0)) - // FIXME: tSUBspi encode scale 4 implicitly. - return CurDAG->SelectNodeTo(N, ARM::tSUBspi_, VT, MVT::Other, SP, - CurDAG->getTargetConstant(C/4, MVT::i32), - Chain); - - if (Subtarget->isThumb1Only()) { - // Use tADDspr since Thumb1 does not have a sub r, sp, r. ARMISelLowering - // should have negated the size operand already. FIXME: We can't insert - // new target independent node at this stage so we are forced to negate - // it earlier. Is there a better solution? - return CurDAG->SelectNodeTo(N, ARM::tADDspr_, VT, MVT::Other, SP, Size, - Chain); - } else if (Subtarget->isThumb2()) { - if (isC && Predicate_t2_so_imm(Size.getNode())) { - // t2SUBrSPi - SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain }; - return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi_, VT, MVT::Other, Ops, 3); - } else if (isC && Predicate_imm0_4095(Size.getNode())) { - // t2SUBrSPi12 - SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain }; - return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi12_, VT, MVT::Other, Ops, 3); - } else { - // t2SUBrSPs - SDValue Ops[] = { SP, Size, - getI32Imm(ARM_AM::getSORegOpc(ARM_AM::lsl,0)), Chain }; - return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPs_, VT, MVT::Other, Ops, 4); - } - } - - // FIXME: Add ADD / SUB sp instructions for ARM. - return 0; -} - /// PairDRegs - Insert a pair of double registers into an implicit def to /// form a quad register. SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { @@ -1052,7 +995,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, break; } - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; @@ -1142,7 +1085,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, break; } - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); SmallVector<SDValue, 10> Ops; @@ -1249,7 +1192,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, case MVT::v4i32: OpcodeIndex = 1; break; } - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); SmallVector<SDValue, 10> Ops; @@ -1305,10 +1248,42 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, } SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, - unsigned Opc) { + bool isSigned) { if (!Subtarget->hasV6T2Ops()) return NULL; + unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) + : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); + + + // For unsigned extracts, check for a shift right and mask + unsigned And_imm = 0; + if (N->getOpcode() == ISD::AND) { + if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { + + // The immediate is a mask of the low bits iff imm & (imm+1) == 0 + if (And_imm & (And_imm + 1)) + return NULL; + + unsigned Srl_imm = 0; + if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, + Srl_imm)) { + assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); + + unsigned Width = CountTrailingOnes_32(And_imm); + unsigned LSB = Srl_imm; + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + SDValue Ops[] = { N->getOperand(0).getOperand(0), + CurDAG->getTargetConstant(LSB, MVT::i32), + CurDAG->getTargetConstant(Width, MVT::i32), + getAL(CurDAG), Reg0 }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); + } + } + return NULL; + } + + // Otherwise, we're looking for a shift of a shift unsigned Shl_imm = 0; if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); @@ -1531,7 +1506,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDNode *ResNode; if (Subtarget->isThumb1Only()) { - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() }; ResNode = CurDAG->getMachineNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other, @@ -1571,16 +1546,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); } } - case ARMISD::DYN_ALLOC: - return SelectDYN_ALLOC(N); case ISD::SRL: - if (SDNode *I = SelectV6T2BitfieldExtractOp(N, - Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX)) + if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false)) return I; break; case ISD::SRA: - if (SDNode *I = SelectV6T2BitfieldExtractOp(N, - Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)) + if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true)) return I; break; case ISD::MUL: @@ -1624,6 +1595,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } break; case ISD::AND: { + // Check for unsigned bitfield extract + if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false)) + return I; + // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits // are entirely contributed by c2 and lower 16-bits are entirely contributed @@ -1708,7 +1683,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue AM5Opc = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32); - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain }; return CurDAG->getMachineNode(ARM::VLDMQ, dl, MVT::v2f64, MVT::Other, @@ -1724,7 +1699,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue AM5Opc = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32); - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(1), N->getOperand(2), AM5Opc, Pred, PredReg, Chain }; @@ -1816,7 +1791,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case MVT::v4f32: case MVT::v4i32: Opc = ARM::VZIPq32; break; } - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); @@ -1835,7 +1810,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case MVT::v4f32: case MVT::v4i32: Opc = ARM::VUZPq32; break; } - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); @@ -1854,7 +1829,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case MVT::v4f32: case MVT::v4i32: Opc = ARM::VTRNq32; break; } - SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue Pred = getAL(CurDAG); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 77fb0c3cdbd0a..d3842a69d833d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -40,12 +40,18 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/VectorExtras.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <sstream> using namespace llvm; +static cl::opt<bool> +EnableARMLongCalls("arm-long-calls", cl::Hidden, + cl::desc("Generate calls via indirect call instructions."), + cl::init(false)); + static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, @@ -90,6 +96,8 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); if (VT.isInteger()) { setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); @@ -376,10 +384,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // FIXME: Shouldn't need this, since no register is used, but the legalizer // doesn't yet know how to not do that for SjLj. setExceptionSelectorRegister(ARM::R0); - if (Subtarget->isThumb()) - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); - else - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) { @@ -783,7 +788,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; @@ -871,7 +876,7 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, - ISD::ArgFlagsTy Flags) { + ISD::ArgFlagsTy Flags) const { unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); @@ -889,7 +894,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, CCValAssign &VA, CCValAssign &NextVA, SDValue &StackPtr, SmallVector<SDValue, 8> &MemOpChains, - ISD::ArgFlagsTy Flags) { + ISD::ArgFlagsTy Flags) const { SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); @@ -918,7 +923,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // ARM target does not yet support tail call optimization. isTailCall = false; @@ -1025,8 +1030,44 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, bool isLocalARMFunc = false; MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - GlobalValue *GV = G->getGlobal(); + + if (EnableARMLongCalls) { + assert (getTargetMachine().getRelocationModel() == Reloc::Static + && "long-calls with non-static relocation model!"); + // Handle a global address or an external symbol. If it's not one of + // those, the target's already in a register, so we don't need to do + // anything extra. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + const GlobalValue *GV = G->getGlobal(); + // Create a constant pool entry for the callee address + unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, + ARMPCLabelIndex, + ARMCP::CPValue, 0); + // Get the address of the callee into a register + SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + Callee = DAG.getLoad(getPointerTy(), dl, + DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); + } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { + const char *Sym = S->getSymbol(); + + // Create a constant pool entry for the callee address + unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), + Sym, ARMPCLabelIndex, 0); + // Get the address of the callee into a register + SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + Callee = DAG.getLoad(getPointerTy(), dl, + DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0, + false, false, 0); + } + } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + const GlobalValue *GV = G->getGlobal(); isDirect = true; bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); bool isStub = (isExt && Subtarget->isTargetDarwin()) && @@ -1049,7 +1090,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); - } else + } else Callee = DAG.getTargetGlobalAddress(GV, getPointerTy()); } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { isDirect = true; @@ -1125,7 +1166,7 @@ SDValue ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location. SmallVector<CCValAssign, 16> RVLocs; @@ -1232,13 +1273,14 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); } -SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = 0; DebugLoc DL = Op.getDebugLoc(); EVT PtrVT = getPointerTy(); - BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); SDValue CPAddr; if (RelocM == Reloc::Static) { @@ -1264,7 +1306,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { // Lower ISD::GlobalTLSAddress using the "general dynamic" model SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = GA->getDebugLoc(); EVT PtrVT = getPointerTy(); unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; @@ -1303,8 +1345,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, // "local exec" model. SDValue ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, - SelectionDAG &DAG) { - GlobalValue *GV = GA->getGlobal(); + SelectionDAG &DAG) const { + const GlobalValue *GV = GA->getGlobal(); DebugLoc dl = GA->getDebugLoc(); SDValue Offset; SDValue Chain = DAG.getEntryNode(); @@ -1350,7 +1392,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, } SDValue -ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { +ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // TODO: implement the "local dynamic" model assert(Subtarget->isTargetELF() && "TLS not implemented for non-ELF targets"); @@ -1364,10 +1406,10 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { } SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); if (RelocM == Reloc::PIC_) { bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); @@ -1404,13 +1446,13 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, } SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = 0; EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); SDValue CPAddr; if (RelocM == Reloc::Static) @@ -1443,7 +1485,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, } SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, - SelectionDAG &DAG){ + SelectionDAG &DAG) const { assert(Subtarget->isTargetELF() && "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); MachineFunction &MF = DAG.getMachineFunction(); @@ -1466,7 +1508,8 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget) { + const ARMSubtarget *Subtarget) + const { unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); switch (IntNo) { @@ -1533,20 +1576,23 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, return Res; } -static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, - unsigned VarArgsFrameIndex) { +static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); + // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. DebugLoc dl = Op.getDebugLoc(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, false, false, 0); } SDValue -ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { +ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); EVT VT = Node->getValueType(0); @@ -1595,7 +1641,7 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, - DebugLoc dl) { + DebugLoc dl) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -1611,10 +1657,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue ArgValue2; if (NextVA.isMemLoc()) { - unsigned ArgSize = NextVA.getLocVT().getSizeInBits()/8; MachineFrameInfo *MFI = MF.getFrameInfo(); - int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset(), - true, false); + int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true, false); // Create load node to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); @@ -1635,7 +1679,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -1663,14 +1708,22 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, if (VA.needsCustom()) { // f64 and vector types are split up into multiple registers or // combinations of registers and stack slots. - RegVT = MVT::i32; - if (VA.getLocVT() == MVT::v2f64) { SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); VA = ArgLocs[++i]; // skip ahead to next loc - SDValue ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], - Chain, DAG, dl); + SDValue ArgValue2; + if (VA.isMemLoc()) { + int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), + true, false); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, + PseudoSourceValue::getFixedStack(FI), 0, + false, false, 0); + } else { + ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], + Chain, DAG, dl); + } ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); @@ -1758,10 +1811,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // to their spots on the stack so that they may be loaded by deferencing // the result of va_next. AFI->setVarArgsRegSaveSize(VARegSaveSize); - VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset + - VARegSaveSize - VARegSize, - true, false); - SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + AFI->setVarArgsFrameIndex( + MFI->CreateFixedObject(VARegSaveSize, + ArgOffset + VARegSaveSize - VARegSize, + true, false)); + SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), + getPointerTy()); SmallVector<SDValue, 4> MemOps; for (; NumGPRs < 4; ++NumGPRs) { @@ -1773,9 +1828,10 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0, - false, false, 0); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, + PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), 0, + false, false, 0); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getConstant(4, getPointerTy())); @@ -1785,7 +1841,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, &MemOps[0], MemOps.size()); } else // This will point to the next argument passed via stack. - VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset, true, false); + AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, + true, false)); } return Chain; @@ -1800,7 +1857,7 @@ static bool isFloatingPointZero(SDValue Op) { if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { SDValue WrapperOp = Op.getOperand(1).getOperand(0); if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) - if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) return CFP->getValueAPF().isPosZero(); } } @@ -1811,7 +1868,8 @@ static bool isFloatingPointZero(SDValue Op) { /// the given operands. SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) { + SDValue &ARMCC, SelectionDAG &DAG, + DebugLoc dl) const { if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { unsigned C = RHSC->getZExtValue(); if (!isLegalICmpImmediate(C)) { @@ -1877,7 +1935,7 @@ static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); } -SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); @@ -1911,7 +1969,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { return Result; } -SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); @@ -1945,7 +2003,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { return Res; } -SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Table = Op.getOperand(1); SDValue Index = Op.getOperand(2); @@ -2034,7 +2092,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); } -SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); @@ -2055,8 +2113,10 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff){ + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const { // Do repeated 4-byte loads and stores. To be improved. // This requires 4-byte alignment. if ((Align & 3) != 0) @@ -2157,11 +2217,25 @@ ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); } +/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to +/// expand a bit convert where either the source or destination type is i64 to +/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 +/// operand type is illegal (e.g., v2f32 for a target that doesn't support +/// vectors), since the legalizer won't know what to do with that. static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { - SDValue Op = N->getOperand(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); DebugLoc dl = N->getDebugLoc(); - if (N->getValueType(0) == MVT::f64) { - // Turn i64->f64 into VMOVDRR. + SDValue Op = N->getOperand(0); + + // This function is only supposed to be called for i64 types, either as the + // source or destination of the bit convert. + EVT SrcVT = Op.getValueType(); + EVT DstVT = N->getValueType(0); + assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && + "ExpandBIT_CONVERT called for non-i64 type"); + + // Turn i64->f64 into VMOVDRR. + if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(0, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, @@ -2170,11 +2244,14 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { } // Turn f64->i64 into VMOVRRD. - SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); + if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { + SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); + // Merge the pieces into a single i64 value. + return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); + } - // Merge the pieces into a single i64 value. - return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); + return SDValue(); } /// getZeroVector - Returns a vector of specified type with all zero elements. @@ -2227,7 +2304,8 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { /// LowerShiftRightParts - Lower SRA_PARTS, which returns two /// i32 values and take a 2 x i32 value to shift plus a shift amount. -SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, + SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); @@ -2262,7 +2340,8 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) { /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two /// i32 values and take a 2 x i32 value to shift plus a shift amount. -SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, + SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); @@ -3059,7 +3138,7 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val); } -SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); @@ -3072,7 +3151,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); - case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); + case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); @@ -3105,22 +3184,22 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { /// type with new values built out of custom code. void ARMTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { + SDValue Res; switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom expand this!"); - return; + break; case ISD::BIT_CONVERT: - Results.push_back(ExpandBIT_CONVERT(N, DAG)); - return; + Res = ExpandBIT_CONVERT(N, DAG); + break; case ISD::SRL: - case ISD::SRA: { - SDValue Res = LowerShift(N, DAG, Subtarget); - if (Res.getNode()) - Results.push_back(Res); - return; - } + case ISD::SRA: + Res = LowerShift(N, DAG, Subtarget); + break; } + if (Res.getNode()) + Results.push_back(Res); } //===----------------------------------------------------------------------===// @@ -3302,8 +3381,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); @@ -3387,12 +3465,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, sinkMBB); // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - // Also inform sdisel of the edge changes. for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) { - EM->insert(std::make_pair(*I, sinkMBB)); + E = BB->succ_end(); I != E; ++I) sinkMBB->addSuccessor(*I); - } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. while (!BB->succ_empty()) diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index fa33ad3075069..d8a230ff697c2 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -159,25 +159,24 @@ namespace llvm { // ARMTargetLowering - ARM Implementation of the TargetLowering interface class ARMTargetLowering : public TargetLowering { - int VarArgsFrameIndex; // FrameIndex for start of varargs area. public: explicit ARMTargetLowering(TargetMachine &TM); - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. /// virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG); + SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; virtual const char *getTargetNodeName(unsigned Opcode) const; - virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*>*) const; + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. @@ -237,7 +236,7 @@ namespace llvm { std::vector<SDValue> &Ops, SelectionDAG &DAG) const; - virtual const ARMSubtarget* getSubtarget() { + virtual const ARMSubtarget* getSubtarget() const { return Subtarget; } @@ -272,54 +271,57 @@ namespace llvm { CCValAssign &VA, CCValAssign &NextVA, SDValue &StackPtr, SmallVector<SDValue, 8> &MemOpChains, - ISD::ArgFlagsTy Flags); + ISD::ArgFlagsTy Flags) const; SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, - SDValue &Root, SelectionDAG &DAG, DebugLoc dl); + SDValue &Root, SelectionDAG &DAG, + DebugLoc dl) const; CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const; SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, - ISD::ArgFlagsTy Flags); - SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG); + ISD::ArgFlagsTy Flags) const; + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget); - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); + const ARMSubtarget *Subtarget) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, - SelectionDAG &DAG); + SelectionDAG &DAG) const; SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA, - SelectionDAG &DAG); - SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG); - SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG); - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); - SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG); - SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG); - SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG); + SelectionDAG &DAG) const; + SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff); + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, @@ -328,16 +330,16 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl); + SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const; MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index f2ab06f328f22..ce5f2f877efb3 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -124,6 +124,7 @@ def HasV6 : Predicate<"Subtarget->hasV6Ops()">; def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">; def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; def HasV7 : Predicate<"Subtarget->hasV7Ops()">; +def NoVFP : Predicate<"!Subtarget->hasVFP2()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">; def HasVFP3 : Predicate<"Subtarget->hasVFP3()">; def HasNEON : Predicate<"Subtarget->hasNEON()">; @@ -1231,7 +1232,7 @@ def LDRBT : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb), } def LDRSBT : AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base,am2offset:$offset), LdMiscFrm, IIC_iLoadru, + (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { let Inst{21} = 1; // overwrite } @@ -2533,7 +2534,23 @@ let Defs = "mov\tr0, #0\n\t" "add\tpc, pc, #0\n\t" "mov\tr0, #1 @ eh_setjmp end", "", - [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>; + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, + Requires<[IsARM, HasVFP2]>; +} + +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in { + def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val), + AddrModeNone, SizeSpecial, IndexModeNone, + Pseudo, NoItinerary, + "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t" + "add\t$val, pc, #8\n\t" + "str\t$val, [$src, #+4]\n\t" + "mov\tr0, #0\n\t" + "add\tpc, pc, #0\n\t" + "mov\tr0, #1 @ eh_setjmp end", "", + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, + Requires<[IsARM, NoVFP]>; } //===----------------------------------------------------------------------===// @@ -2747,7 +2764,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { def L_OFFSET : ACI<(outs), (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), - opc, "l\tp$cop, cr$CRd, $addr"> { + !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr"> { let Inst{31-28} = op31_28; let Inst{24} = 1; // P = 1 let Inst{21} = 0; // W = 0 @@ -2757,7 +2774,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { def L_PRE : ACI<(outs), (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), - opc, "l\tp$cop, cr$CRd, $addr!"> { + !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr!"> { let Inst{31-28} = op31_28; let Inst{24} = 1; // P = 1 let Inst{21} = 1; // W = 1 @@ -2767,7 +2784,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { def L_POST : ACI<(outs), (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset), - opc, "l\tp$cop, cr$CRd, [$base], $offset"> { + !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $offset"> { let Inst{31-28} = op31_28; let Inst{24} = 0; // P = 0 let Inst{21} = 1; // W = 1 @@ -2777,7 +2794,7 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { def L_OPTION : ACI<(outs), (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, nohash_imm:$option), - opc, "l\tp$cop, cr$CRd, [$base], $option"> { + !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $option"> { let Inst{31-28} = op31_28; let Inst{24} = 0; // P = 0 let Inst{23} = 1; // U = 1 diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index ed9d31d80f361..d5ce2b8468dff 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1621,12 +1621,13 @@ multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> { - def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin, + def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp, Commutable>; - def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin, + def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp, Commutable>; } @@ -1642,11 +1643,12 @@ multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8, // ....then also with element size of 8 bits: multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> - : N3VLInt_HS<op24, op23, op11_8, op4, itin, OpcodeStr, Dt, + : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp, Commutable> { - def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin, + def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp, Commutable>; } @@ -1711,21 +1713,22 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8, // Neon 3-argument intrinsics, // element sizes of 8, 16 and 32 bits: multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itinD, InstrItinClass itinQ, string OpcodeStr, string Dt, Intrinsic IntOp> { // 64-bit vector types. - def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D, + def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; - def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, + def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD, OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; - def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32D, + def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD, OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; // 128-bit vector types. - def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16Q, + def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; - def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16Q, + def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ, OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; - def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32Q, + def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ, OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; } @@ -1734,10 +1737,11 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, string OpcodeStr, string Dt, Intrinsic IntOp> { - def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, + def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; - def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi16D, + def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; } @@ -1751,9 +1755,10 @@ multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, // ....then also with element size of 8 bits: multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, string OpcodeStr, string Dt, Intrinsic IntOp> - : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, Dt, IntOp> { - def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D, + : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> { + def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; } @@ -2001,10 +2006,10 @@ def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", v4f32, v4f32, fadd, 1>; // VADDL : Vector Add Long (Q = D + D) -defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl", "s", - int_arm_neon_vaddls, 1>; -defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl", "u", - int_arm_neon_vaddlu, 1>; +defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, + "vaddl", "s", int_arm_neon_vaddls, 1>; +defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, + "vaddl", "u", int_arm_neon_vaddlu, 1>; // VADDW : Vector Add Wide (Q = Q + D) defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>; defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>; @@ -2118,10 +2123,10 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull", "s", - int_arm_neon_vmulls, 1>; -defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull", "u", - int_arm_neon_vmullu, 1>; +defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "s", int_arm_neon_vmulls, 1>; +defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "u", int_arm_neon_vmullu, 1>; def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", v8i16, v8i8, int_arm_neon_vmullp, 1>; defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", @@ -2130,10 +2135,10 @@ defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", int_arm_neon_vmullu>; // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) -defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull", "s", - int_arm_neon_vqdmull, 1>; -defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull", "s", - int_arm_neon_vqdmull>; +defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D, + "vqdmull", "s", int_arm_neon_vqdmull, 1>; +defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, + "vqdmull", "s", int_arm_neon_vqdmull>; // Vector Multiply-Accumulate and Multiply-Subtract Operations. @@ -2177,15 +2182,17 @@ def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLAL : Vector Multiply Accumulate Long (Q += D * D) -defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal", "s", int_arm_neon_vmlals>; -defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal", "u", int_arm_neon_vmlalu>; +defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlal", "s", int_arm_neon_vmlals>; +defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlal", "u", int_arm_neon_vmlalu>; defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>; defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>; // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) -defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal", "s", - int_arm_neon_vqdmlal>; +defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, + "vqdmlal", "s", int_arm_neon_vqdmlal>; defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; // VMLS : Vector Multiply Subtract (integer and floating-point) @@ -2227,15 +2234,17 @@ def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLSL : Vector Multiply Subtract Long (Q -= D * D) -defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl", "s", int_arm_neon_vmlsls>; -defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl", "u", int_arm_neon_vmlslu>; +defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlsl", "s", int_arm_neon_vmlsls>; +defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D, + "vmlsl", "u", int_arm_neon_vmlslu>; defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>; defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>; // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) -defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl", "s", - int_arm_neon_vqdmlsl>; +defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, + "vqdmlsl", "s", int_arm_neon_vqdmlsl>; defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; // Vector Subtract Operations. @@ -2248,26 +2257,26 @@ def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", v4f32, v4f32, fsub, 0>; // VSUBL : Vector Subtract Long (Q = D - D) -defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl", "s", - int_arm_neon_vsubls, 1>; -defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl", "u", - int_arm_neon_vsublu, 1>; +defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, + "vsubl", "s", int_arm_neon_vsubls, 1>; +defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, + "vsubl", "u", int_arm_neon_vsublu, 1>; // VSUBW : Vector Subtract Wide (Q = Q - D) defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>; defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>; // VHSUB : Vector Halving Subtract defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vhsub", "s", int_arm_neon_vhsubs, 0>; defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vhsub", "u", int_arm_neon_vhsubu, 0>; // VQSUB : Vector Saturing Subtract defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vqsub", "s", int_arm_neon_vqsubs, 0>; defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vqsub", "u", int_arm_neon_vqsubu, 0>; // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", @@ -2279,8 +2288,8 @@ defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", // Vector Comparisons. // VCEQ : Vector Compare Equal -defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vceq", "i", NEONvceq, 1>; +defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>; def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, NEONvceq, 1>; def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, @@ -2290,10 +2299,10 @@ defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", "$dst, $src, #0">; // VCGE : Vector Compare Greater Than or Equal -defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>; -defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcge", "u", NEONvcgeu, 0>; +defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>; +defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>; def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, NEONvcge, 0>; def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, @@ -2306,10 +2315,10 @@ defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", "$dst, $src, #0">; // VCGT : Vector Compare Greater Than -defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcgt", "s", NEONvcgt, 0>; -defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcgt", "u", NEONvcgtu, 0>; +defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>; +defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>; def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, NEONvcgt, 0>; def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, @@ -2387,11 +2396,11 @@ def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), // VMVN : Vector Bitwise NOT def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, - (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, + (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD, "vmvn", "$dst, $src", "", [(set DPR:$dst, (v2i32 (vnot8 DPR:$src)))]>; def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, - (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, + (outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD, "vmvn", "$dst, $src", "", [(set QPR:$dst, (v4i32 (vnot16 QPR:$src)))]>; def : Pat<(v2i32 (vnot8 DPR:$src)), (VMVNd DPR:$src)>; @@ -2447,10 +2456,10 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, // VABD : Vector Absolute Difference defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vabd", "s", int_arm_neon_vabds, 0>; defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vabd", "u", int_arm_neon_vabdu, 0>; def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>; @@ -2458,56 +2467,68 @@ def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>; // VABDL : Vector Absolute Difference Long (Q = | D - D |) -defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, +defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, "vabdl", "s", int_arm_neon_vabdls, 0>; -defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, +defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, "vabdl", "u", int_arm_neon_vabdlu, 0>; // VABA : Vector Absolute Difference and Accumulate -defm VABAs : N3VInt3_QHS<0,0,0b0111,1, "vaba", "s", int_arm_neon_vabas>; -defm VABAu : N3VInt3_QHS<1,0,0b0111,1, "vaba", "u", int_arm_neon_vabau>; +defm VABAs : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, + "vaba", "s", int_arm_neon_vabas>; +defm VABAu : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, + "vaba", "u", int_arm_neon_vabau>; // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) -defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal", "s", int_arm_neon_vabals>; -defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>; +defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD, + "vabal", "s", int_arm_neon_vabals>; +defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD, + "vabal", "u", int_arm_neon_vabalu>; // Vector Maximum and Minimum. // VMAX : Vector Maximum defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>; defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>; -def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmax", - "f32", v2f32, v2f32, int_arm_neon_vmaxs, 1>; -def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmax", - "f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>; +def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, + "vmax", "f32", + v2f32, v2f32, int_arm_neon_vmaxs, 1>; +def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, + "vmax", "f32", + v4f32, v4f32, int_arm_neon_vmaxs, 1>; // VMIN : Vector Minimum defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vmin", "s", int_arm_neon_vmins, 1>; defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm, - IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, + IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vmin", "u", int_arm_neon_vminu, 1>; -def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmin", - "f32", v2f32, v2f32, int_arm_neon_vmins, 1>; -def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmin", - "f32", v4f32, v4f32, int_arm_neon_vmins, 1>; +def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, + "vmin", "f32", + v2f32, v2f32, int_arm_neon_vmins, 1>; +def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, + "vmin", "f32", + v4f32, v4f32, int_arm_neon_vmins, 1>; // Vector Pairwise Operations. // VPADD : Vector Pairwise Add -def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd", - "i8", v8i8, v8i8, int_arm_neon_vpadd, 0>; -def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd", - "i16", v4i16, v4i16, int_arm_neon_vpadd, 0>; -def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd", - "i32", v2i32, v2i32, int_arm_neon_vpadd, 0>; -def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, IIC_VBIND, "vpadd", - "f32", v2f32, v2f32, int_arm_neon_vpadd, 0>; +def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD, + "vpadd", "i8", + v8i8, v8i8, int_arm_neon_vpadd, 0>; +def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD, + "vpadd", "i16", + v4i16, v4i16, int_arm_neon_vpadd, 0>; +def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD, + "vpadd", "i32", + v2i32, v2i32, int_arm_neon_vpadd, 0>; +def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, + IIC_VBIND, "vpadd", "f32", + v2f32, v2f32, int_arm_neon_vpadd, 0>; // VPADDL : Vector Pairwise Add Long defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", @@ -2522,35 +2543,35 @@ defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", int_arm_neon_vpadalu>; // VPMAX : Vector Pairwise Maximum -def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>; -def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>; -def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>; -def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>; -def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>; -def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>; -def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINi4D, "vpmax", +def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmax", "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>; // VPMIN : Vector Pairwise Minimum -def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>; -def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>; -def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>; -def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>; -def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>; -def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin", "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>; -def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINi4D, "vpmin", +def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VSUBi4D, "vpmin", "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>; // Vector Reciprocal and Reciprocal Square Root Estimate and Step. diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 262aae48913a2..742bd403cdde9 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -2386,9 +2386,25 @@ let Defs = "\tb\t1f\n" "\tmovs\tr0, #1\t@ end eh.setjmp\n" "1:", "", - [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>; + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, + Requires<[IsThumb2, HasVFP2]>; } +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in { + def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), + AddrModeNone, SizeSpecial, NoItinerary, + "str\t$val, [$src, #8]\t@ begin eh.setjmp\n" + "\tmov\t$val, pc\n" + "\tadds\t$val, #9\n" + "\tstr\t$val, [$src, #4]\n" + "\tmovs\tr0, #0\n" + "\tb\t1f\n" + "\tmovs\tr0, #1\t@ end eh.setjmp\n" + "1:", "", + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, + Requires<[IsThumb2, NoVFP]>; +} //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 045838928660d..36fcaa13049da 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -256,25 +256,25 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, // Between half-precision and single-precision. For disassembly only. def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f32.f16\t$dst, $a", + /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a", [/* For disassembly only; pattern left blank */]>; def : ARMPat<(f32_to_f16 SPR:$a), (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f16.f32\t$dst, $a", + /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a", [/* For disassembly only; pattern left blank */]>; def : ARMPat<(f16_to_f32 GPR:$a), (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f32.f16\t$dst, $a", + /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a", [/* For disassembly only; pattern left blank */]>; def VCVTTHS : ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), - /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f16.f32\t$dst, $a", + /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a", [/* For disassembly only; pattern left blank */]>; let neverHasSideEffects = 1 in { @@ -306,23 +306,23 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), // def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src), - IIC_VMOVSI, "vmov", "\t$dst, $src", + IIC_fpMOVSI, "vmov", "\t$dst, $src", [(set GPR:$dst, (bitconvert SPR:$src))]>; def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), - IIC_VMOVIS, "vmov", "\t$dst, $src", + IIC_fpMOVIS, "vmov", "\t$dst, $src", [(set SPR:$dst, (bitconvert GPR:$src))]>; def VMOVRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$wb, GPR:$dst2), (ins DPR:$src), - IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src", + IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src", [/* FIXME: Can't write pattern for multiple result instr*/]> { let Inst{7-6} = 0b00; } def VMOVRRS : AVConv3I<0b11000101, 0b1010, (outs GPR:$wb, GPR:$dst2), (ins SPR:$src1, SPR:$src2), - IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2", + IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2", [/* For disassembly only; pattern left blank */]> { let Inst{7-6} = 0b00; } @@ -332,14 +332,14 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010, def VMOVDRR : AVConv5I<0b11000100, 0b1011, (outs DPR:$dst), (ins GPR:$src1, GPR:$src2), - IIC_VMOVID, "vmov", "\t$dst, $src1, $src2", + IIC_fpMOVID, "vmov", "\t$dst, $src1, $src2", [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]> { let Inst{7-6} = 0b00; } def VMOVSRR : AVConv5I<0b11000100, 0b1010, (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2), - IIC_VMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", + IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", [/* For disassembly only; pattern left blank */]> { let Inst{7-6} = 0b00; } @@ -678,7 +678,7 @@ def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr", // Materialize FP immediates. VFP3 only. let isReMaterializable = 1 in { def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm), - VFPMiscFrm, IIC_VMOVImm, + VFPMiscFrm, IIC_fpUNA64, "vmov", ".f64\t$dst, $imm", [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> { let Inst{27-23} = 0b11101; @@ -689,7 +689,7 @@ def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm), } def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm), - VFPMiscFrm, IIC_VMOVImm, + VFPMiscFrm, IIC_fpUNA32, "vmov", ".f32\t$dst, $imm", [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { let Inst{27-23} = 0b11101; diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index 8c0b7206d22e1..b31a4fa343ba5 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -27,7 +27,7 @@ using namespace llvm; void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { - llvm_report_error("ARMJITInfo::replaceMachineCodeForFunction"); + report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction"); } /// JITCompilerFunction - This contains the address of the JIT function used to diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index cb762a4669fd7..8585c1e50105d 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1358,7 +1358,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, return false; unsigned Align = (*Op0->memoperands_begin())->getAlignment(); - Function *Func = MF->getFunction(); + const Function *Func = MF->getFunction(); unsigned ReqAlign = STI->hasV6Ops() ? TD->getPrefTypeAlignment(Type::getInt64Ty(Func->getContext())) : 8; // Pre-v6 need 8-byte align diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index c998edeb1fe46..013427635592b 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -85,6 +85,9 @@ class ARMFunctionInfo : public MachineFunctionInfo { unsigned ConstPoolEntryUId; + /// VarArgsFrameIndex - FrameIndex for start of varargs area. + int VarArgsFrameIndex; + public: ARMFunctionInfo() : isThumb(false), @@ -94,7 +97,7 @@ public: FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), - JumpTableUId(0), ConstPoolEntryUId(0) {} + JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {} explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), @@ -105,7 +108,7 @@ public: GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32), SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()), - JumpTableUId(0), ConstPoolEntryUId(0) {} + JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {} bool isThumbFunction() const { return isThumb; } bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } @@ -223,6 +226,9 @@ public: unsigned createConstPoolEntryUId() { return ConstPoolEntryUId++; } + + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index fc4c5f5830b00..b60ccca46867a 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -8,17 +8,6 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Functional units across ARM processors -// -def FU_Issue : FuncUnit; // issue -def FU_Pipe0 : FuncUnit; // pipeline 0 -def FU_Pipe1 : FuncUnit; // pipeline 1 -def FU_LdSt0 : FuncUnit; // pipeline 0 load/store -def FU_LdSt1 : FuncUnit; // pipeline 1 load/store -def FU_NPipe : FuncUnit; // NEON ALU/MUL pipe -def FU_NLSPipe : FuncUnit; // NEON LS pipe - -//===----------------------------------------------------------------------===// // Instruction Itinerary classes used for ARM // def IIC_iALUx : InstrItinClass; @@ -69,10 +58,16 @@ def IIC_fpCMP32 : InstrItinClass; def IIC_fpCMP64 : InstrItinClass; def IIC_fpCVTSD : InstrItinClass; def IIC_fpCVTDS : InstrItinClass; +def IIC_fpCVTSH : InstrItinClass; +def IIC_fpCVTHS : InstrItinClass; def IIC_fpCVTIS : InstrItinClass; def IIC_fpCVTID : InstrItinClass; def IIC_fpCVTSI : InstrItinClass; def IIC_fpCVTDI : InstrItinClass; +def IIC_fpMOVIS : InstrItinClass; +def IIC_fpMOVID : InstrItinClass; +def IIC_fpMOVSI : InstrItinClass; +def IIC_fpMOVDI : InstrItinClass; def IIC_fpALU32 : InstrItinClass; def IIC_fpALU64 : InstrItinClass; def IIC_fpMUL32 : InstrItinClass; @@ -125,6 +120,10 @@ def IIC_VSUBiD : InstrItinClass; def IIC_VSUBiQ : InstrItinClass; def IIC_VBINi4D : InstrItinClass; def IIC_VBINi4Q : InstrItinClass; +def IIC_VSUBi4D : InstrItinClass; +def IIC_VSUBi4Q : InstrItinClass; +def IIC_VABAD : InstrItinClass; +def IIC_VABAQ : InstrItinClass; def IIC_VSHLiD : InstrItinClass; def IIC_VSHLiQ : InstrItinClass; def IIC_VSHLi4D : InstrItinClass; @@ -153,8 +152,8 @@ def IIC_VTBX4 : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. -def GenericItineraries : ProcessorItineraries<[]>; - +def GenericItineraries : ProcessorItineraries<[], []>; include "ARMScheduleV6.td" -include "ARMScheduleV7.td" +include "ARMScheduleA8.td" +include "ARMScheduleA9.td" diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td new file mode 100644 index 0000000000000..bbfc0b2b47442 --- /dev/null +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -0,0 +1,618 @@ +//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the ARM Cortex A8 processors. +// +//===----------------------------------------------------------------------===// + +// +// Scheduling information derived from "Cortex-A8 Technical Reference Manual". +// Functional Units. +def A8_Issue : FuncUnit; // issue +def A8_Pipe0 : FuncUnit; // pipeline 0 +def A8_Pipe1 : FuncUnit; // pipeline 1 +def A8_LdSt0 : FuncUnit; // pipeline 0 load/store +def A8_LdSt1 : FuncUnit; // pipeline 1 load/store +def A8_NPipe : FuncUnit; // NEON ALU/MUL pipe +def A8_NLSPipe : FuncUnit; // NEON LS pipe +// +// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1 +// +def CortexA8Itineraries : ProcessorItineraries< + [A8_Issue, A8_Pipe0, A8_Pipe1, A8_LdSt0, A8_LdSt1, A8_NPipe, A8_NLSPipe], [ + // Two fully-pipelined integer ALU pipelines + // + // No operand cycles + InstrItinData<IIC_iALUx , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>, + // + // Binary Instructions that produce a result + InstrItinData<IIC_iALUi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iALUr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>, + InstrItinData<IIC_iALUsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, + InstrItinData<IIC_iALUsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>, + // + // Unary Instructions that produce a result + InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iUNAsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iUNAsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, + // + // Compare instructions + InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, + InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iCMPsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMPsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, + // + // Move instructions, unconditional + InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>, + InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, + InstrItinData<IIC_iMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, + InstrItinData<IIC_iMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>, + // + // Move instructions, conditional + InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, + InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, + + // Integer multiply pipeline + // Result written in E5, but that is relative to the last cycle of multicycle, + // so we use 6 for those cases + // + InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>, + InstrItinData<IIC_iMAC16 , [InstrStage<1, [A8_Pipe1], 0>, + InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, + InstrItinData<IIC_iMUL32 , [InstrStage<1, [A8_Pipe1], 0>, + InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>, + InstrItinData<IIC_iMAC32 , [InstrStage<1, [A8_Pipe1], 0>, + InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, + InstrItinData<IIC_iMUL64 , [InstrStage<2, [A8_Pipe1], 0>, + InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, + InstrItinData<IIC_iMAC64 , [InstrStage<2, [A8_Pipe1], 0>, + InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, + + // Integer load pipeline + // + // loads have an extra cycle of latency, but are fully pipelined + // use A8_Issue to enforce the 1 load/store per cycle limit + // + // Immediate offset + InstrItinData<IIC_iLoadi , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1]>, + // + // Register offset + InstrItinData<IIC_iLoadr , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + // + // Scaled register offset, issues over 2 cycles + InstrItinData<IIC_iLoadsi , [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [4, 1, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iLoadiu , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 2, 1]>, + // + // Register offset with update + InstrItinData<IIC_iLoadru , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 2, 1, 1]>, + // + // Scaled register offset with update, issues over 2 cycles + InstrItinData<IIC_iLoadsiu , [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [4, 3, 1, 1]>, + // + // Load multiple + InstrItinData<IIC_iLoadm , [InstrStage<2, [A8_Issue], 0>, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>]>, + + // Integer store pipeline + // + // use A8_Issue to enforce the 1 load/store per cycle limit + // + // Immediate offset + InstrItinData<IIC_iStorei , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1]>, + // + // Register offset + InstrItinData<IIC_iStorer , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + // + // Scaled register offset, issues over 2 cycles + InstrItinData<IIC_iStoresi , [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 1, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iStoreiu , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [2, 3, 1]>, + // + // Register offset with update + InstrItinData<IIC_iStoreru , [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [2, 3, 1, 1]>, + // + // Scaled register offset with update, issues over 2 cycles + InstrItinData<IIC_iStoresiu, [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>], [3, 3, 1, 1]>, + // + // Store multiple + InstrItinData<IIC_iStorem , [InstrStage<2, [A8_Issue], 0>, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0]>]>, + + // Branch + // + // no delay slots, so the latency of a branch is unimportant + InstrItinData<IIC_Br , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>, + + // VFP + // Issue through integer pipeline, and execute in NEON unit. We assume + // RunFast mode so that NFP pipeline is used for single-precision when + // possible. + // + // FP Special Register to Integer Register File Move + InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // Single-precision FP Unary + InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1]>, + // + // Double-precision FP Unary + InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<4, [A8_NPipe], 0>, + InstrStage<4, [A8_NLSPipe]>], [4, 1]>, + // + // Single-precision FP Compare + InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [1, 1]>, + // + // Double-precision FP Compare + InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<4, [A8_NPipe], 0>, + InstrStage<4, [A8_NLSPipe]>], [4, 1]>, + // + // Single to Double FP Convert + InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<7, [A8_NPipe], 0>, + InstrStage<7, [A8_NLSPipe]>], [7, 1]>, + // + // Double to Single FP Convert + InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<5, [A8_NPipe], 0>, + InstrStage<5, [A8_NLSPipe]>], [5, 1]>, + // + // Single-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1]>, + // + // Double-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<8, [A8_NPipe], 0>, + InstrStage<8, [A8_NLSPipe]>], [8, 1]>, + // + // Integer to Single-Precision FP Convert + InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1]>, + // + // Integer to Double-Precision FP Convert + InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<8, [A8_NPipe], 0>, + InstrStage<8, [A8_NLSPipe]>], [8, 1]>, + // + // Single-precision FP ALU + InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1, 1]>, + // + // Double-precision FP ALU + InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<9, [A8_NPipe], 0>, + InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>, + // + // Single-precision FP Multiply + InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 1, 1]>, + // + // Double-precision FP Multiply + InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<11, [A8_NPipe], 0>, + InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>, + // + // Single-precision FP MAC + InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>, + // + // Double-precision FP MAC + InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<19, [A8_NPipe], 0>, + InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, + // + // Single-precision FP DIV + InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<20, [A8_NPipe], 0>, + InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>, + // + // Double-precision FP DIV + InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<29, [A8_NPipe], 0>, + InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>, + // + // Single-precision FP SQRT + InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<19, [A8_NPipe], 0>, + InstrStage<19, [A8_NLSPipe]>], [19, 1]>, + // + // Double-precision FP SQRT + InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<29, [A8_NPipe], 0>, + InstrStage<29, [A8_NLSPipe]>], [29, 1]>, + // + // Single-precision FP Load + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // Double-precision FP Load + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // FP Load Multiple + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoadm, [InstrStage<3, [A8_Issue], 0>, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // Single-precision FP Store + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // Double-precision FP Store + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0], 0>, + InstrStage<1, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // FP Store Multiple + // use A8_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>, + InstrStage<2, [A8_Pipe0], 0>, + InstrStage<2, [A8_Pipe1]>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + + // NEON + // Issue through integer pipeline, and execute in NEON unit. + // + // VLD1 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // VLD2 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>, + // + // VLD3 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>, + // + // VLD4 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>, + // + // VST + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VST, [InstrStage<1, [A8_Issue], 0>, + InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_LdSt0], 0>, + InstrStage<1, [A8_NLSPipe]>]>, + // + // Double-register FP Unary + InstrItinData<IIC_VUNAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [5, 2]>, + // + // Quad-register FP Unary + // Result written in N5, but that is relative to the last cycle of multicycle, + // so we use 6 for those cases + InstrItinData<IIC_VUNAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [6, 2]>, + // + // Double-register FP Binary + InstrItinData<IIC_VBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [5, 2, 2]>, + // + // Quad-register FP Binary + // Result written in N5, but that is relative to the last cycle of multicycle, + // so we use 6 for those cases + InstrItinData<IIC_VBINQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [6, 2, 2]>, + // + // Move Immediate + InstrItinData<IIC_VMOVImm, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3]>, + // + // Double-register Permute Move + InstrItinData<IIC_VMOVD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [2, 1]>, + // + // Quad-register Permute Move + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 3 for those cases + InstrItinData<IIC_VMOVQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 1]>, + // + // Integer to Single-precision Move + InstrItinData<IIC_VMOVIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [2, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_VMOVID , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_VMOVSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [20, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_VMOVDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>, + // + // Integer to Lane Move + InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>, + // + // Double-register Permute + InstrItinData<IIC_VPERMD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>, + // + // Quad-register Permute + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 3 for those cases + InstrItinData<IIC_VPERMQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>, + // + // Quad-register Permute (3 cycle issue) + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 4 for those cases + InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>, + // + // Double-register FP Multiple-Accumulate + InstrItinData<IIC_VMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>, + // + // Quad-register FP Multiple-Accumulate + // Result written in N9, but that is relative to the last cycle of multicycle, + // so we use 10 for those cases + InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, + // + // Double-register Reciprical Step + InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [9, 2, 2]>, + // + // Quad-register Reciprical Step + InstrItinData<IIC_VRECSQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [10, 2, 2]>, + // + // Double-register Integer Count + InstrItinData<IIC_VCNTiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, + // + // Quad-register Integer Count + // Result written in N3, but that is relative to the last cycle of multicycle, + // so we use 4 for those cases + InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [4, 2, 2]>, + // + // Double-register Integer Unary + InstrItinData<IIC_VUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 2]>, + // + // Quad-register Integer Unary + InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 2]>, + // + // Double-register Integer Q-Unary + InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 1]>, + // + // Quad-register Integer CountQ-Unary + InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 1]>, + // + // Double-register Integer Binary + InstrItinData<IIC_VBINiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, + // + // Quad-register Integer Binary + InstrItinData<IIC_VBINiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, + // + // Double-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, + // + // Quad-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, + + // + // Double-register Integer Subtract + InstrItinData<IIC_VSUBiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3, 2, 1]>, + // + // Quad-register Integer Subtract + InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3, 2, 1]>, + // + // Double-register Integer Subtract + InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, + // + // Quad-register Integer Subtract + InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, + // + // Double-register Integer Shift + InstrItinData<IIC_VSHLiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [3, 1, 1]>, + // + // Quad-register Integer Shift + InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [4, 1, 1]>, + // + // Double-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [4, 1, 1]>, + // + // Quad-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [5, 1, 1]>, + // + // Double-register Integer Pair Add Long + InstrItinData<IIC_VPALiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [6, 3, 1]>, + // + // Quad-register Integer Pair Add Long + InstrItinData<IIC_VPALiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [7, 3, 1]>, + // + // Double-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>, + // + // Quad-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>, + + // + // Double-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [6, 2, 2]>, + // + // Double-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [7, 2, 1]>, + // + // Quad-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [7, 2, 2]>, + // + // Quad-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<3, [A8_NPipe]>], [9, 2, 1]>, + // + // Double-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>, + // + // Double-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>, + // + // Quad-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>, + // + // Quad-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NPipe]>, + InstrStage<2, [A8_NLSPipe], 0>, + InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>, + // + // Double-register VEXT + InstrItinData<IIC_VEXTD, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>, + // + // Quad-register VEXT + InstrItinData<IIC_VEXTQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>, + // + // VTB + InstrItinData<IIC_VTB1, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>, + InstrItinData<IIC_VTB2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>, + InstrItinData<IIC_VTB3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>, + InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 3, 1]>, + // + // VTBX + InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>, + InstrItinData<IIC_VTBX2, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>, + InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 1]>, + InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, + InstrStage<1, [A8_NLSPipe]>, + InstrStage<1, [A8_NPipe], 0>, + InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> +]>; diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td new file mode 100644 index 0000000000000..75320d9299523 --- /dev/null +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -0,0 +1,749 @@ +//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the ARM Cortex A9 processors. +// +//===----------------------------------------------------------------------===// + +// +// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical +// Reference Manual". +// +// Functional units +def A9_Issue : FuncUnit; // issue +def A9_Pipe0 : FuncUnit; // pipeline 0 +def A9_Pipe1 : FuncUnit; // pipeline 1 +def A9_LSPipe : FuncUnit; // LS pipe +def A9_NPipe : FuncUnit; // NEON ALU/MUL pipe +def A9_DRegsVFP: FuncUnit; // FP register set, VFP side +def A9_DRegsN : FuncUnit; // FP register set, NEON side + +// Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1 +// +def CortexA9Itineraries : ProcessorItineraries< + [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1, A9_Issue], [ + // VFP and NEON shares the same register file. This means that every VFP + // instruction should wait for full completion of the consecutive NEON + // instruction and vice-versa. We model this behavior with two artificial FUs: + // DRegsVFP and DRegsVFP. + // + // Every VFP instruction: + // - Acquires DRegsVFP resource for 1 cycle + // - Reserves DRegsN resource for the whole duration (including time to + // register file writeback!). + // Every NEON instruction does the same but with FUs swapped. + // + // Since the reserved FU cannot be acquired this models precisly "cross-domain" + // stalls. + + // VFP + // Issue through integer pipeline, and execute in NEON unit. + + // FP Special Register to Integer Register File Move + InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>]>, + // + // Single-precision FP Unary + InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + // Extra latency cycles since wbck is 2 cycles + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, + // + // Double-precision FP Unary + InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + // Extra latency cycles since wbck is 2 cycles + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, + + // + // Single-precision FP Compare + InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + // Extra latency cycles since wbck is 4 cycles + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, + // + // Double-precision FP Compare + InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + // Extra latency cycles since wbck is 4 cycles + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, + // + // Single to Double FP Convert + InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Double to Single FP Convert + InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + + // + // Single to Half FP Convert + InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Half to Single FP Convert + InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1]>, + + // + // Single-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Double-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Integer to Single-Precision FP Convert + InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Integer to Double-Precision FP Convert + InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Single-precision FP ALU + InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, + // + // Double-precision FP ALU + InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<5, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, + // + // Single-precision FP Multiply + InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<6, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [5, 1, 1]>, + // + // Double-precision FP Multiply + InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<7, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 1, 1]>, + // + // Single-precision FP MAC + InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<9, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>, + // + // Double-precision FP MAC + InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<10, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [9, 0, 1, 1]>, + // + // Single-precision FP DIV + InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<16, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<10, [A9_NPipe]>], [15, 1, 1]>, + // + // Double-precision FP DIV + InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<26, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<20, [A9_NPipe]>], [25, 1, 1]>, + // + // Single-precision FP SQRT + InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<18, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<13, [A9_NPipe]>], [17, 1]>, + // + // Double-precision FP SQRT + InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<33, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<28, [A9_NPipe]>], [32, 1]>, + + // + // Integer to Single-precision Move + InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + // Extra 1 latency cycle since wbck is 2 cycles + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + // Extra 1 latency cycle since wbck is 2 cycles + InstrStage<3, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [1, 1, 1]>, + // + // Single-precision FP Load + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // Double-precision FP Load + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // FP Load Multiple + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // Single-precision FP Store + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // Double-precision FP Store + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // FP Store Multiple + // use A9_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>, + InstrStage<2, [A9_DRegsN], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // NEON + // Issue through integer pipeline, and execute in NEON unit. + // FIXME: Neon pipeline and LdSt unit are multiplexed. + // Add some syntactic sugar to model this! + // VLD1 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // VLD2 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>], [2, 2, 1]>, + // + // VLD3 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>, + // + // VLD4 + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>, + // + // VST + // FIXME: We don't model this instruction properly + InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue], 0>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_NPipe]>]>, + // + // Double-register Integer Unary + InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2]>, + // + // Quad-register Integer Unary + InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2]>, + // + // Double-register Integer Q-Unary + InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Quad-register Integer CountQ-Unary + InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1]>, + // + // Double-register Integer Binary + InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, + // + // Quad-register Integer Binary + InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, + // + // Double-register Integer Subtract + InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 1]>, + // + // Quad-register Integer Subtract + InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 1]>, + // + // Double-register Integer Shift + InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 1, 1]>, + // + // Quad-register Integer Shift + InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 1, 1]>, + // + // Double-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, + // + // Quad-register Integer Shift (4 cycle) + InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, + // + // Double-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2, 2]>, + // + // Quad-register Integer Binary (4 cycle) + InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2, 2]>, + // + // Double-register Integer Subtract (4 cycle) + InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2, 1]>, + // + // Quad-register Integer Subtract (4 cycle) + InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [4, 2, 1]>, + + // + // Double-register Integer Count + InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, + // + // Quad-register Integer Count + // Result written in N3, but that is relative to the last cycle of multicycle, + // so we use 4 for those cases + InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [4, 2, 2]>, + // + // Double-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>, + // + // Quad-register Absolute Difference and Accumulate + InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>, + // + // Double-register Integer Pair Add Long + InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [6, 3, 1]>, + // + // Quad-register Integer Pair Add Long + InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 3, 1]>, + + // + // Double-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [6, 2, 2]>, + // + // Quad-register Integer Multiply (.8, .16) + InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [7, 2, 2]>, + + // + // Double-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [7, 2, 1]>, + // + // Quad-register Integer Multiply (.32) + InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<4, [A9_NPipe]>], [9, 2, 1]>, + // + // Double-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>, + // + // Double-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>, + // + // Quad-register Integer Multiply-Accumulate (.8, .16) + InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>, + // + // Quad-register Integer Multiply-Accumulate (.32) + InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>, + // + // Move Immediate + InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [3]>, + // + // Double-register Permute Move + InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>], [2, 1]>, + // + // Quad-register Permute Move + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 3 for those cases + InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<4, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1]>, + // + // Integer to Single-precision Move + InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1]>, + // + // Integer to Double-precision Move + InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1, 1]>, + // + // Single-precision to Integer Move + InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1]>, + // + // Double-precision to Integer Move + InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<3, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 2, 1]>, + // + // Integer to Lane Move + InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>, + // FIXME: all latencies are arbitrary, no information is available + InstrStage<4, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1, 1]>, + + // + // Double-register FP Unary + InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [5, 2]>, + // + // Quad-register FP Unary + // Result written in N5, but that is relative to the last cycle of multicycle, + // so we use 6 for those cases + InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 2]>, + // + // Double-register FP Binary + // FIXME: We're using this itin for many instructions and [2, 2] here is too + // optimistic. + InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [5, 2, 2]>, + // + // Quad-register FP Binary + // Result written in N5, but that is relative to the last cycle of multicycle, + // so we use 6 for those cases + // FIXME: We're using this itin for many instructions and [2, 2] here is too + // optimistic. + InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 8 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 2, 2]>, + // + // Double-register FP Multiple-Accumulate + InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>, + // + // Quad-register FP Multiple-Accumulate + // Result written in N9, but that is relative to the last cycle of multicycle, + // so we use 10 for those cases + InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>, + // + // Double-register Reciprical Step + InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [6, 2, 2]>, + // + // Quad-register Reciprical Step + InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<10, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<4, [A9_NPipe]>], [8, 2, 2]>, + // + // Double-register Permute + InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 6 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>, + // + // Quad-register Permute + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 3 for those cases + InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>, + // + // Quad-register Permute (3 cycle issue) + // Result written in N2, but that is relative to the last cycle of multicycle, + // so we use 4 for those cases + InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 8 cycles + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>, + + // + // Double-register VEXT + InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_NPipe]>], [2, 1, 1]>, + // + // Quad-register VEXT + InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 9 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1, 1]>, + // + // VTB + InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 2, 1]>, + InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>, + InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 8 cycles + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>, + InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 8 cycles + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>, + // + // VTBX + InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>, + InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>, + InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 8 cycles + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>, + InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 8 cycles + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]> +]>; diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index 0fef466ad18cf..f813022d8741c 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -13,103 +13,107 @@ // Model based on ARM1176 // +// Functional Units +def V6_Pipe : FuncUnit; // pipeline + // Scheduling information derived from "ARM1176JZF-S Technical Reference Manual". // -def ARMV6Itineraries : ProcessorItineraries<[ +def ARMV6Itineraries : ProcessorItineraries< + [V6_Pipe], [ // // No operand cycles - InstrItinData<IIC_iALUx , [InstrStage<1, [FU_Pipe0]>]>, + InstrItinData<IIC_iALUx , [InstrStage<1, [V6_Pipe]>]>, // // Binary Instructions that produce a result - InstrItinData<IIC_iALUi , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, - InstrItinData<IIC_iALUr , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>, - InstrItinData<IIC_iALUsi , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>, - InstrItinData<IIC_iALUsr , [InstrStage<2, [FU_Pipe0]>], [3, 3, 2, 1]>, + InstrItinData<IIC_iALUi , [InstrStage<1, [V6_Pipe]>], [2, 2]>, + InstrItinData<IIC_iALUr , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>, + InstrItinData<IIC_iALUsi , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>, + InstrItinData<IIC_iALUsr , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>, // // Unary Instructions that produce a result - InstrItinData<IIC_iUNAr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, - InstrItinData<IIC_iUNAsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, - InstrItinData<IIC_iUNAsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>, + InstrItinData<IIC_iUNAr , [InstrStage<1, [V6_Pipe]>], [2, 2]>, + InstrItinData<IIC_iUNAsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>, + InstrItinData<IIC_iUNAsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>, // // Compare instructions - InstrItinData<IIC_iCMPi , [InstrStage<1, [FU_Pipe0]>], [2]>, - InstrItinData<IIC_iCMPr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, - InstrItinData<IIC_iCMPsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, - InstrItinData<IIC_iCMPsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>, + InstrItinData<IIC_iCMPi , [InstrStage<1, [V6_Pipe]>], [2]>, + InstrItinData<IIC_iCMPr , [InstrStage<1, [V6_Pipe]>], [2, 2]>, + InstrItinData<IIC_iCMPsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>, + InstrItinData<IIC_iCMPsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>, // // Move instructions, unconditional - InstrItinData<IIC_iMOVi , [InstrStage<1, [FU_Pipe0]>], [2]>, - InstrItinData<IIC_iMOVr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, - InstrItinData<IIC_iMOVsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, - InstrItinData<IIC_iMOVsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>, + InstrItinData<IIC_iMOVi , [InstrStage<1, [V6_Pipe]>], [2]>, + InstrItinData<IIC_iMOVr , [InstrStage<1, [V6_Pipe]>], [2, 2]>, + InstrItinData<IIC_iMOVsi , [InstrStage<1, [V6_Pipe]>], [2, 1]>, + InstrItinData<IIC_iMOVsr , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>, // // Move instructions, conditional - InstrItinData<IIC_iCMOVi , [InstrStage<1, [FU_Pipe0]>], [3]>, - InstrItinData<IIC_iCMOVr , [InstrStage<1, [FU_Pipe0]>], [3, 2]>, - InstrItinData<IIC_iCMOVsi , [InstrStage<1, [FU_Pipe0]>], [3, 1]>, - InstrItinData<IIC_iCMOVsr , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>, + InstrItinData<IIC_iCMOVi , [InstrStage<1, [V6_Pipe]>], [3]>, + InstrItinData<IIC_iCMOVr , [InstrStage<1, [V6_Pipe]>], [3, 2]>, + InstrItinData<IIC_iCMOVsi , [InstrStage<1, [V6_Pipe]>], [3, 1]>, + InstrItinData<IIC_iCMOVsr , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>, // Integer multiply pipeline // - InstrItinData<IIC_iMUL16 , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>, - InstrItinData<IIC_iMAC16 , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1, 2]>, - InstrItinData<IIC_iMUL32 , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1]>, - InstrItinData<IIC_iMAC32 , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1, 2]>, - InstrItinData<IIC_iMUL64 , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1]>, - InstrItinData<IIC_iMAC64 , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1, 2]>, + InstrItinData<IIC_iMUL16 , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>, + InstrItinData<IIC_iMAC16 , [InstrStage<1, [V6_Pipe]>], [4, 1, 1, 2]>, + InstrItinData<IIC_iMUL32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1]>, + InstrItinData<IIC_iMAC32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1, 2]>, + InstrItinData<IIC_iMUL64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1]>, + InstrItinData<IIC_iMAC64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1, 2]>, // Integer load pipeline // // Immediate offset - InstrItinData<IIC_iLoadi , [InstrStage<1, [FU_Pipe0]>], [4, 1]>, + InstrItinData<IIC_iLoadi , [InstrStage<1, [V6_Pipe]>], [4, 1]>, // // Register offset - InstrItinData<IIC_iLoadr , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>, + InstrItinData<IIC_iLoadr , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData<IIC_iLoadsi , [InstrStage<2, [FU_Pipe0]>], [5, 2, 1]>, + InstrItinData<IIC_iLoadsi , [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>, // // Immediate offset with update - InstrItinData<IIC_iLoadiu , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>, + InstrItinData<IIC_iLoadiu , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>, // // Register offset with update - InstrItinData<IIC_iLoadru , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1, 1]>, + InstrItinData<IIC_iLoadru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData<IIC_iLoadsiu , [InstrStage<2, [FU_Pipe0]>], [5, 2, 2, 1]>, + InstrItinData<IIC_iLoadsiu , [InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>, // // Load multiple - InstrItinData<IIC_iLoadm , [InstrStage<3, [FU_Pipe0]>]>, + InstrItinData<IIC_iLoadm , [InstrStage<3, [V6_Pipe]>]>, // Integer store pipeline // // Immediate offset - InstrItinData<IIC_iStorei , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, + InstrItinData<IIC_iStorei , [InstrStage<1, [V6_Pipe]>], [2, 1]>, // // Register offset - InstrItinData<IIC_iStorer , [InstrStage<1, [FU_Pipe0]>], [2, 1, 1]>, + InstrItinData<IIC_iStorer , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>, // // Scaled register offset, issues over 2 cycles - InstrItinData<IIC_iStoresi , [InstrStage<2, [FU_Pipe0]>], [2, 2, 1]>, + InstrItinData<IIC_iStoresi , [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>, // // Immediate offset with update - InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>, + InstrItinData<IIC_iStoreiu , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>, // // Register offset with update - InstrItinData<IIC_iStoreru , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1, 1]>, + InstrItinData<IIC_iStoreru , [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>, // // Scaled register offset with update, issues over 2 cycles - InstrItinData<IIC_iStoresiu, [InstrStage<2, [FU_Pipe0]>], [2, 2, 2, 1]>, + InstrItinData<IIC_iStoresiu, [InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>, // // Store multiple - InstrItinData<IIC_iStorem , [InstrStage<3, [FU_Pipe0]>]>, + InstrItinData<IIC_iStorem , [InstrStage<3, [V6_Pipe]>]>, // Branch // // no delay slots, so the latency of a branch is unimportant - InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>, + InstrItinData<IIC_Br , [InstrStage<1, [V6_Pipe]>]>, // VFP // Issue through integer pipeline, and execute in NEON unit. We assume @@ -117,84 +121,84 @@ def ARMV6Itineraries : ProcessorItineraries<[ // possible. // // FP Special Register to Integer Register File Move - InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0]>], [3]>, + InstrItinData<IIC_fpSTAT , [InstrStage<1, [V6_Pipe]>], [3]>, // // Single-precision FP Unary - InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + InstrItinData<IIC_fpUNA32 , [InstrStage<1, [V6_Pipe]>], [5, 2]>, // // Double-precision FP Unary - InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + InstrItinData<IIC_fpUNA64 , [InstrStage<1, [V6_Pipe]>], [5, 2]>, // // Single-precision FP Compare - InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, + InstrItinData<IIC_fpCMP32 , [InstrStage<1, [V6_Pipe]>], [2, 2]>, // // Double-precision FP Compare - InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, + InstrItinData<IIC_fpCMP64 , [InstrStage<1, [V6_Pipe]>], [2, 2]>, // // Single to Double FP Convert - InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + InstrItinData<IIC_fpCVTSD , [InstrStage<1, [V6_Pipe]>], [5, 2]>, // // Double to Single FP Convert - InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + InstrItinData<IIC_fpCVTDS , [InstrStage<1, [V6_Pipe]>], [5, 2]>, // // Single-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + InstrItinData<IIC_fpCVTSI , [InstrStage<1, [V6_Pipe]>], [9, 2]>, // // Double-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + InstrItinData<IIC_fpCVTDI , [InstrStage<1, [V6_Pipe]>], [9, 2]>, // // Integer to Single-Precision FP Convert - InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + InstrItinData<IIC_fpCVTIS , [InstrStage<1, [V6_Pipe]>], [9, 2]>, // // Integer to Double-Precision FP Convert - InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + InstrItinData<IIC_fpCVTID , [InstrStage<1, [V6_Pipe]>], [9, 2]>, // // Single-precision FP ALU - InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>, + InstrItinData<IIC_fpALU32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>, // // Double-precision FP ALU - InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>, + InstrItinData<IIC_fpALU64 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>, // // Single-precision FP Multiply - InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>, + InstrItinData<IIC_fpMUL32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>, // // Double-precision FP Multiply - InstrItinData<IIC_fpMUL64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2]>, + InstrItinData<IIC_fpMUL64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2]>, // // Single-precision FP MAC - InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2, 2]>, + InstrItinData<IIC_fpMAC32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>, // // Double-precision FP MAC - InstrItinData<IIC_fpMAC64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2, 2]>, + InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>, // // Single-precision FP DIV - InstrItinData<IIC_fpDIV32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>, + InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>, // // Double-precision FP DIV - InstrItinData<IIC_fpDIV64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>, + InstrItinData<IIC_fpDIV64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>, // // Single-precision FP SQRT - InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>, + InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>, // // Double-precision FP SQRT - InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>, + InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>, // // Single-precision FP Load - InstrItinData<IIC_fpLoad32 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>, + InstrItinData<IIC_fpLoad32 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>, // // Double-precision FP Load - InstrItinData<IIC_fpLoad64 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>, + InstrItinData<IIC_fpLoad64 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>, // // FP Load Multiple - InstrItinData<IIC_fpLoadm , [InstrStage<3, [FU_Pipe0]>]>, + InstrItinData<IIC_fpLoadm , [InstrStage<3, [V6_Pipe]>]>, // // Single-precision FP Store - InstrItinData<IIC_fpStore32 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>, + InstrItinData<IIC_fpStore32 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>, // // Double-precision FP Store // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStore64 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>, + InstrItinData<IIC_fpStore64 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>, // // FP Store Multiple - InstrItinData<IIC_fpStorem , [InstrStage<3, [FU_Pipe0]>]> + InstrItinData<IIC_fpStorem , [InstrStage<3, [V6_Pipe]>]> ]>; diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td deleted file mode 100644 index bbbf41397566f..0000000000000 --- a/lib/Target/ARM/ARMScheduleV7.td +++ /dev/null @@ -1,587 +0,0 @@ -//===- ARMScheduleV7.td - ARM v7 Scheduling Definitions ----*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the itinerary class data for the ARM v7 processors. -// -//===----------------------------------------------------------------------===// - -// -// Scheduling information derived from "Cortex-A8 Technical Reference Manual". -// -// Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1 -// -def CortexA8Itineraries : ProcessorItineraries<[ - - // Two fully-pipelined integer ALU pipelines - // - // No operand cycles - InstrItinData<IIC_iALUx , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, - // - // Binary Instructions that produce a result - InstrItinData<IIC_iALUi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iALUr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 2]>, - InstrItinData<IIC_iALUsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 1]>, - InstrItinData<IIC_iALUsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2, 1, 1]>, - // - // Unary Instructions that produce a result - InstrItinData<IIC_iUNAr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iUNAsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iUNAsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>, - // - // Compare instructions - InstrItinData<IIC_iCMPi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2]>, - InstrItinData<IIC_iCMPr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iCMPsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMPsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>, - // - // Move instructions, unconditional - InstrItinData<IIC_iMOVi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1]>, - InstrItinData<IIC_iMOVr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1]>, - InstrItinData<IIC_iMOVsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1]>, - InstrItinData<IIC_iMOVsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [1, 1, 1]>, - // - // Move instructions, conditional - InstrItinData<IIC_iCMOVi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2]>, - InstrItinData<IIC_iCMOVr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMOVsi , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMOVsr , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>], [2, 1, 1]>, - - // Integer multiply pipeline - // Result written in E5, but that is relative to the last cycle of multicycle, - // so we use 6 for those cases - // - InstrItinData<IIC_iMUL16 , [InstrStage<1, [FU_Pipe0]>], [5, 1, 1]>, - InstrItinData<IIC_iMAC16 , [InstrStage<1, [FU_Pipe1], 0>, - InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>, - InstrItinData<IIC_iMUL32 , [InstrStage<1, [FU_Pipe1], 0>, - InstrStage<2, [FU_Pipe0]>], [6, 1, 1]>, - InstrItinData<IIC_iMAC32 , [InstrStage<1, [FU_Pipe1], 0>, - InstrStage<2, [FU_Pipe0]>], [6, 1, 1, 4]>, - InstrItinData<IIC_iMUL64 , [InstrStage<2, [FU_Pipe1], 0>, - InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>, - InstrItinData<IIC_iMAC64 , [InstrStage<2, [FU_Pipe1], 0>, - InstrStage<3, [FU_Pipe0]>], [6, 6, 1, 1]>, - - // Integer load pipeline - // - // loads have an extra cycle of latency, but are fully pipelined - // use FU_Issue to enforce the 1 load/store per cycle limit - // - // Immediate offset - InstrItinData<IIC_iLoadi , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1]>, - // - // Register offset - InstrItinData<IIC_iLoadr , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, - // - // Scaled register offset, issues over 2 cycles - InstrItinData<IIC_iLoadsi , [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [4, 1, 1]>, - // - // Immediate offset with update - InstrItinData<IIC_iLoadiu , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 2, 1]>, - // - // Register offset with update - InstrItinData<IIC_iLoadru , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 2, 1, 1]>, - // - // Scaled register offset with update, issues over 2 cycles - InstrItinData<IIC_iLoadsiu , [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [4, 3, 1, 1]>, - // - // Load multiple - InstrItinData<IIC_iLoadm , [InstrStage<2, [FU_Issue], 0>, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>]>, - - // Integer store pipeline - // - // use FU_Issue to enforce the 1 load/store per cycle limit - // - // Immediate offset - InstrItinData<IIC_iStorei , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1]>, - // - // Register offset - InstrItinData<IIC_iStorer , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, - // - // Scaled register offset, issues over 2 cycles - InstrItinData<IIC_iStoresi , [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 1, 1]>, - // - // Immediate offset with update - InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [2, 3, 1]>, - // - // Register offset with update - InstrItinData<IIC_iStoreru , [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [2, 3, 1, 1]>, - // - // Scaled register offset with update, issues over 2 cycles - InstrItinData<IIC_iStoresiu, [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>], [3, 3, 1, 1]>, - // - // Store multiple - InstrItinData<IIC_iStorem , [InstrStage<2, [FU_Issue], 0>, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0]>]>, - - // Branch - // - // no delay slots, so the latency of a branch is unimportant - InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, - - // VFP - // Issue through integer pipeline, and execute in NEON unit. We assume - // RunFast mode so that NFP pipeline is used for single-precision when - // possible. - // - // FP Special Register to Integer Register File Move - InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // Single-precision FP Unary - InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, - // - // Double-precision FP Unary - InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<4, [FU_NPipe], 0>, - InstrStage<4, [FU_NLSPipe]>], [4, 1]>, - // - // Single-precision FP Compare - InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [1, 1]>, - // - // Double-precision FP Compare - InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<4, [FU_NPipe], 0>, - InstrStage<4, [FU_NLSPipe]>], [4, 1]>, - // - // Single to Double FP Convert - InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<7, [FU_NPipe], 0>, - InstrStage<7, [FU_NLSPipe]>], [7, 1]>, - // - // Double to Single FP Convert - InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<5, [FU_NPipe], 0>, - InstrStage<5, [FU_NLSPipe]>], [5, 1]>, - // - // Single-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, - // - // Double-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<8, [FU_NPipe], 0>, - InstrStage<8, [FU_NLSPipe]>], [8, 1]>, - // - // Integer to Single-Precision FP Convert - InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, - // - // Integer to Double-Precision FP Convert - InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<8, [FU_NPipe], 0>, - InstrStage<8, [FU_NLSPipe]>], [8, 1]>, - // - // Single-precision FP ALU - InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1, 1]>, - // - // Double-precision FP ALU - InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<9, [FU_NPipe], 0>, - InstrStage<9, [FU_NLSPipe]>], [9, 1, 1]>, - // - // Single-precision FP Multiply - InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1, 1]>, - // - // Double-precision FP Multiply - InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<11, [FU_NPipe], 0>, - InstrStage<11, [FU_NLSPipe]>], [11, 1, 1]>, - // - // Single-precision FP MAC - InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 2, 1, 1]>, - // - // Double-precision FP MAC - InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<19, [FU_NPipe], 0>, - InstrStage<19, [FU_NLSPipe]>], [19, 2, 1, 1]>, - // - // Single-precision FP DIV - InstrItinData<IIC_fpDIV32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<20, [FU_NPipe], 0>, - InstrStage<20, [FU_NLSPipe]>], [20, 1, 1]>, - // - // Double-precision FP DIV - InstrItinData<IIC_fpDIV64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<29, [FU_NPipe], 0>, - InstrStage<29, [FU_NLSPipe]>], [29, 1, 1]>, - // - // Single-precision FP SQRT - InstrItinData<IIC_fpSQRT32, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<19, [FU_NPipe], 0>, - InstrStage<19, [FU_NLSPipe]>], [19, 1]>, - // - // Double-precision FP SQRT - InstrItinData<IIC_fpSQRT64, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<29, [FU_NPipe], 0>, - InstrStage<29, [FU_NLSPipe]>], [29, 1]>, - // - // Single-precision FP Load - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoad32, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // Double-precision FP Load - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoad64, [InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // FP Load Multiple - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoadm, [InstrStage<3, [FU_Issue], 0>, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // Single-precision FP Store - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStore32,[InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // Double-precision FP Store - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStore64,[InstrStage<2, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0], 0>, - InstrStage<1, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // FP Store Multiple - // use FU_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStorem, [InstrStage<3, [FU_Issue], 0>, - InstrStage<2, [FU_Pipe0], 0>, - InstrStage<2, [FU_Pipe1]>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - - // NEON - // Issue through integer pipeline, and execute in NEON unit. - // - // VLD1 - InstrItinData<IIC_VLD1, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // VLD2 - InstrItinData<IIC_VLD2, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 1]>, - // - // VLD3 - InstrItinData<IIC_VLD3, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 1]>, - // - // VLD4 - InstrItinData<IIC_VLD4, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 2, 2, 1]>, - // - // VST - InstrItinData<IIC_VST, [InstrStage<1, [FU_Issue], 0>, - InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_LdSt0], 0>, - InstrStage<1, [FU_NLSPipe]>]>, - // - // Double-register FP Unary - InstrItinData<IIC_VUNAD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [5, 2]>, - // - // Quad-register FP Unary - // Result written in N5, but that is relative to the last cycle of multicycle, - // so we use 6 for those cases - InstrItinData<IIC_VUNAQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [6, 2]>, - // - // Double-register FP Binary - InstrItinData<IIC_VBIND, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [5, 2, 2]>, - // - // Quad-register FP Binary - // Result written in N5, but that is relative to the last cycle of multicycle, - // so we use 6 for those cases - InstrItinData<IIC_VBINQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [6, 2, 2]>, - // - // Move Immediate - InstrItinData<IIC_VMOVImm, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3]>, - // - // Double-register Permute Move - InstrItinData<IIC_VMOVD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 1]>, - // - // Quad-register Permute Move - // Result written in N2, but that is relative to the last cycle of multicycle, - // so we use 3 for those cases - InstrItinData<IIC_VMOVQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 1]>, - // - // Integer to Single-precision Move - InstrItinData<IIC_VMOVIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 1]>, - // - // Integer to Double-precision Move - InstrItinData<IIC_VMOVID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>, - // - // Single-precision to Integer Move - InstrItinData<IIC_VMOVSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [20, 1]>, - // - // Double-precision to Integer Move - InstrItinData<IIC_VMOVDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [20, 20, 1]>, - // - // Integer to Lane Move - InstrItinData<IIC_VMOVISL , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>, - // - // Double-register Permute - InstrItinData<IIC_VPERMD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 2, 1, 1]>, - // - // Quad-register Permute - // Result written in N2, but that is relative to the last cycle of multicycle, - // so we use 3 for those cases - InstrItinData<IIC_VPERMQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 3, 1, 1]>, - // - // Quad-register Permute (3 cycle issue) - // Result written in N2, but that is relative to the last cycle of multicycle, - // so we use 4 for those cases - InstrItinData<IIC_VPERMQ3, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 4, 1, 1]>, - // - // Double-register FP Multiple-Accumulate - InstrItinData<IIC_VMACD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [9, 2, 2, 3]>, - // - // Quad-register FP Multiple-Accumulate - // Result written in N9, but that is relative to the last cycle of multicycle, - // so we use 10 for those cases - InstrItinData<IIC_VMACQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [10, 2, 2, 3]>, - // - // Double-register Reciprical Step - InstrItinData<IIC_VRECSD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [9, 2, 2]>, - // - // Quad-register Reciprical Step - InstrItinData<IIC_VRECSQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [10, 2, 2]>, - // - // Double-register Integer Count - InstrItinData<IIC_VCNTiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, - // - // Quad-register Integer Count - // Result written in N3, but that is relative to the last cycle of multicycle, - // so we use 4 for those cases - InstrItinData<IIC_VCNTiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [4, 2, 2]>, - // - // Double-register Integer Unary - InstrItinData<IIC_VUNAiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 2]>, - // - // Quad-register Integer Unary - InstrItinData<IIC_VUNAiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 2]>, - // - // Double-register Integer Q-Unary - InstrItinData<IIC_VQUNAiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1]>, - // - // Quad-register Integer CountQ-Unary - InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1]>, - // - // Double-register Integer Binary - InstrItinData<IIC_VBINiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, - // - // Quad-register Integer Binary - InstrItinData<IIC_VBINiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 2]>, - // - // Double-register Integer Binary (4 cycle) - InstrItinData<IIC_VBINi4D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 2, 1]>, - // - // Quad-register Integer Binary (4 cycle) - InstrItinData<IIC_VBINi4Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 2, 1]>, - // - // Double-register Integer Subtract - InstrItinData<IIC_VSUBiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 1]>, - // - // Quad-register Integer Subtract - InstrItinData<IIC_VSUBiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 2, 1]>, - // - // Double-register Integer Shift - InstrItinData<IIC_VSHLiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [3, 1, 1]>, - // - // Quad-register Integer Shift - InstrItinData<IIC_VSHLiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [4, 1, 1]>, - // - // Double-register Integer Shift (4 cycle) - InstrItinData<IIC_VSHLi4D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [4, 1, 1]>, - // - // Quad-register Integer Shift (4 cycle) - InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [5, 1, 1]>, - // - // Double-register Integer Pair Add Long - InstrItinData<IIC_VPALiD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [6, 3, 2, 1]>, - // - // Quad-register Integer Pair Add Long - InstrItinData<IIC_VPALiQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [7, 3, 2, 1]>, - // - // Double-register Integer Multiply (.8, .16) - InstrItinData<IIC_VMULi16D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [6, 2, 2]>, - // - // Double-register Integer Multiply (.32) - InstrItinData<IIC_VMULi32D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [7, 2, 1]>, - // - // Quad-register Integer Multiply (.8, .16) - InstrItinData<IIC_VMULi16Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [7, 2, 2]>, - // - // Quad-register Integer Multiply (.32) - InstrItinData<IIC_VMULi32Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>, - InstrStage<2, [FU_NLSPipe], 0>, - InstrStage<3, [FU_NPipe]>], [9, 2, 1]>, - // - // Double-register Integer Multiply-Accumulate (.8, .16) - InstrItinData<IIC_VMACi16D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [6, 2, 2, 3]>, - // - // Double-register Integer Multiply-Accumulate (.32) - InstrItinData<IIC_VMACi32D, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [7, 2, 1, 3]>, - // - // Quad-register Integer Multiply-Accumulate (.8, .16) - InstrItinData<IIC_VMACi16Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NPipe]>], [7, 2, 2, 3]>, - // - // Quad-register Integer Multiply-Accumulate (.32) - InstrItinData<IIC_VMACi32Q, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>, - InstrStage<2, [FU_NLSPipe], 0>, - InstrStage<3, [FU_NPipe]>], [9, 2, 1, 3]>, - // - // Double-register VEXT - InstrItinData<IIC_VEXTD, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>], [2, 1, 1]>, - // - // Quad-register VEXT - InstrItinData<IIC_VEXTQ, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 1, 1]>, - // - // VTB - InstrItinData<IIC_VTB1, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 2, 1]>, - InstrItinData<IIC_VTB2, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 2, 2, 1]>, - InstrItinData<IIC_VTB3, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 1]>, - InstrItinData<IIC_VTB4, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 2, 2, 3, 3, 1]>, - // - // VTBX - InstrItinData<IIC_VTBX1, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 1]>, - InstrItinData<IIC_VTBX2, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<2, [FU_NLSPipe]>], [3, 1, 2, 2, 1]>, - InstrItinData<IIC_VTBX3, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 1]>, - InstrItinData<IIC_VTBX4, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NLSPipe]>, - InstrStage<1, [FU_NPipe], 0>, - InstrStage<2, [FU_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> -]>; diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp new file mode 100644 index 0000000000000..c04ee3829c1d5 --- /dev/null +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ARMSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-selectiondag-info" +#include "ARMSelectionDAGInfo.h" +using namespace llvm; + +ARMSelectionDAGInfo::ARMSelectionDAGInfo() { +} + +ARMSelectionDAGInfo::~ARMSelectionDAGInfo() { +} diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h new file mode 100644 index 0000000000000..afe9a47201bb6 --- /dev/null +++ b/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- ARMSelectionDAGInfo.h - ARM SelectionDAG Info -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ARM subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMSELECTIONDAGINFO_H +#define ARMSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class ARMSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + ARMSelectionDAGInfo(); + ~ARMSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 9e55cd870031b..b11580a65c009 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -116,7 +116,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. bool -ARMSubtarget::GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const { +ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, + Reloc::Model RelocM) const { if (RelocM == Reloc::Static) return false; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index fa56a9195bc26..288a19a45712b 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -160,7 +160,7 @@ protected: /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect /// symbol. - bool GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const; + bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const; }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 95f57b7b34fa2..662e61e36c4b1 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -102,8 +102,12 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { // FIXME: temporarily disabling load / store optimization pass for Thumb1. - if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) - PM.add(createARMLoadStoreOptimizationPass()); + if (OptLevel != CodeGenOpt::None) { + if (!Subtarget.isThumb1Only()) + PM.add(createARMLoadStoreOptimizationPass()); + if (Subtarget.hasNEON()) + PM.add(createNEONMoveFixPass()); + } // Expand some pseudo instructions into multiple instructions to allow // proper scheduling. @@ -118,8 +122,6 @@ bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None) { if (!Subtarget.isThumb1Only()) PM.add(createIfConverterPass()); - if (Subtarget.hasNEON()) - PM.add(createNEONMoveFixPass()); } if (Subtarget.isThumb2()) { diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index c32f16c77a244..4e205df9a3131 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -71,8 +71,8 @@ public: return &InstrInfo.getRegisterInfo(); } - virtual ARMTargetLowering *getTargetLowering() const { - return const_cast<ARMTargetLowering*>(&TLInfo); + virtual const ARMTargetLowering *getTargetLowering() const { + return &TLInfo; } virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; } @@ -97,8 +97,8 @@ public: return &InstrInfo->getRegisterInfo(); } - virtual ARMTargetLowering *getTargetLowering() const { - return const_cast<ARMTargetLowering*>(&TLInfo); + virtual const ARMTargetLowering *getTargetLowering() const { + return &TLInfo; } /// returns either Thumb1InstrInfo or Thumb2InstrInfo diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 680d03254f10c..091a3b3d8497b 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -9,6 +9,7 @@ #include "ARMTargetObjectFile.h" #include "ARMSubtarget.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/Dwarf.h" #include "llvm/Target/TargetMachine.h" @@ -25,12 +26,14 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) { StaticCtorSection = - getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getDataRel()); + getContext().getELFSection(".init_array", MCSectionELF::SHT_INIT_ARRAY, + MCSectionELF::SHF_WRITE | + MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); StaticDtorSection = - getELFSection(".fini_array", MCSectionELF::SHT_FINI_ARRAY, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getDataRel()); + getContext().getELFSection(".fini_array", MCSectionELF::SHT_FINI_ARRAY, + MCSectionELF::SHF_WRITE | + MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); } } diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp new file mode 100644 index 0000000000000..f859d1b1c95f0 --- /dev/null +++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp @@ -0,0 +1,140 @@ +//===-- ARMAsmLexer.cpp - Tokenize ARM assembly to AsmTokens --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "ARMTargetMachine.h" + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" + +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" + +#include "llvm/Target/TargetAsmLexer.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegistry.h" + +#include <string> +#include <map> + +using namespace llvm; + +namespace { + + class ARMBaseAsmLexer : public TargetAsmLexer { + const MCAsmInfo &AsmInfo; + + const AsmToken &lexDefinite() { + return getLexer()->Lex(); + } + + AsmToken LexTokenUAL(); + protected: + typedef std::map <std::string, unsigned> rmap_ty; + + rmap_ty RegisterMap; + + void InitRegisterMap(const TargetRegisterInfo *info) { + unsigned numRegs = info->getNumRegs(); + + for (unsigned i = 0; i < numRegs; ++i) { + const char *regName = info->getName(i); + if (regName) + RegisterMap[regName] = i; + } + } + + unsigned MatchRegisterName(StringRef Name) { + rmap_ty::iterator iter = RegisterMap.find(Name.str()); + if (iter != RegisterMap.end()) + return iter->second; + else + return 0; + } + + AsmToken LexToken() { + if (!Lexer) { + SetError(SMLoc(), "No MCAsmLexer installed"); + return AsmToken(AsmToken::Error, "", 0); + } + + switch (AsmInfo.getAssemblerDialect()) { + default: + SetError(SMLoc(), "Unhandled dialect"); + return AsmToken(AsmToken::Error, "", 0); + case 0: + return LexTokenUAL(); + } + } + public: + ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI) + : TargetAsmLexer(T), AsmInfo(MAI) { + } + }; + + class ARMAsmLexer : public ARMBaseAsmLexer { + public: + ARMAsmLexer(const Target &T, const MCAsmInfo &MAI) + : ARMBaseAsmLexer(T, MAI) { + std::string tripleString("arm-unknown-unknown"); + std::string featureString; + OwningPtr<const TargetMachine> + targetMachine(T.createTargetMachine(tripleString, featureString)); + InitRegisterMap(targetMachine->getRegisterInfo()); + } + }; + + class ThumbAsmLexer : public ARMBaseAsmLexer { + public: + ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI) + : ARMBaseAsmLexer(T, MAI) { + std::string tripleString("thumb-unknown-unknown"); + std::string featureString; + OwningPtr<const TargetMachine> + targetMachine(T.createTargetMachine(tripleString, featureString)); + InitRegisterMap(targetMachine->getRegisterInfo()); + } + }; +} + +AsmToken ARMBaseAsmLexer::LexTokenUAL() { + const AsmToken &lexedToken = lexDefinite(); + + switch (lexedToken.getKind()) { + default: + return AsmToken(lexedToken); + case AsmToken::Error: + SetError(Lexer->getErrLoc(), Lexer->getErr()); + return AsmToken(lexedToken); + case AsmToken::Identifier: + { + std::string upperCase = lexedToken.getString().str(); + std::string lowerCase = LowercaseString(upperCase); + StringRef lowerRef(lowerCase); + + unsigned regID = MatchRegisterName(lowerRef); + + if (regID) { + return AsmToken(AsmToken::Register, + lexedToken.getString(), + static_cast<int64_t>(regID)); + } else { + return AsmToken(lexedToken); + } + } + } +} + +extern "C" void LLVMInitializeARMAsmLexer() { + RegisterAsmLexer<ARMAsmLexer> X(TheARMTarget); + RegisterAsmLexer<ThumbAsmLexer> Y(TheThumbTarget); +} + diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index cf55377bc6776..bfa89c4a4696e 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -812,8 +812,11 @@ bool ARMAsmParser::ParseDirectiveCode(SMLoc L) { return false; } +extern "C" void LLVMInitializeARMAsmLexer(); + /// Force static initialization. extern "C" void LLVMInitializeARMAsmParser() { RegisterAsmParser<ARMAsmParser> X(TheARMTarget); RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget); + LLVMInitializeARMAsmLexer(); } diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt index 308c6cff8da90..9ba7c0125d7f5 100644 --- a/lib/Target/ARM/AsmParser/CMakeLists.txt +++ b/lib/Target/ARM/AsmParser/CMakeLists.txt @@ -1,6 +1,7 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) add_llvm_library(LLVMARMAsmParser + ARMAsmLexer.cpp ARMAsmParser.cpp ) diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 15c52946cf589..80a9d2d714e6e 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -21,6 +21,7 @@ #include "ARMMachineFunctionInfo.h" #include "ARMMCInstLower.h" #include "ARMTargetMachine.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Type.h" @@ -239,7 +240,7 @@ namespace { } else if (ACPV->isBlockAddress()) { O << *GetBlockAddressSymbol(ACPV->getBlockAddress()); } else if (ACPV->isGlobalValue()) { - GlobalValue *GV = ACPV->getGV(); + const GlobalValue *GV = ACPV->getGV(); bool isIndirect = Subtarget->isTargetDarwin() && Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()); if (!isIndirect) @@ -352,7 +353,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, return; case MachineOperand::MO_GlobalAddress: { bool isCallOp = Modifier && !strcmp(Modifier, "call"); - GlobalValue *GV = MO.getGlobal(); + const GlobalValue *GV = MO.getGlobal(); if ((Modifier && strcmp(Modifier, "lo16") == 0) || (TF & ARMII::MO_LO16)) @@ -504,7 +505,6 @@ void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op, if (!MO1.getReg()) { unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); - assert(ImmOffs && "Malformed indexed load / store!"); O << "#" << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << ImmOffs; @@ -556,7 +556,6 @@ void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op, } unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); - assert(ImmOffs && "Malformed indexed load / store!"); O << "#" << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs; @@ -1110,6 +1109,24 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallString<128> Str; raw_svector_ostream OS(Str); + if (MI->getOpcode() == ARM::DBG_VALUE) { + unsigned NOps = MI->getNumOperands(); + assert(NOps==4); + OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); + OS << V.getName(); + OS << " <- "; + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); + OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS); + OS << ']'; + OS << "+"; + printOperand(MI, NOps-2, OS); + OutStreamer.EmitRawText(OS.str()); + return; + } + printInstruction(MI, OS); OutStreamer.EmitRawText(OS.str()); @@ -1129,22 +1146,23 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { // avoid out-of-range branches that are due a fundamental limitation of // the way symbol offsets are encoded with the current Darwin ARM // relocations. - TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering()); + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<const TargetLoweringObjectFileMachO &>( + getObjFileLowering()); OutStreamer.SwitchSection(TLOFMacho.getTextSection()); OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection()); OutStreamer.SwitchSection(TLOFMacho.getConstTextCoalSection()); if (RelocM == Reloc::DynamicNoPIC) { const MCSection *sect = - TLOFMacho.getMachOSection("__TEXT", "__symbol_stub4", - MCSectionMachO::S_SYMBOL_STUBS, - 12, SectionKind::getText()); + OutContext.getMachOSection("__TEXT", "__symbol_stub4", + MCSectionMachO::S_SYMBOL_STUBS, + 12, SectionKind::getText()); OutStreamer.SwitchSection(sect); } else { const MCSection *sect = - TLOFMacho.getMachOSection("__TEXT", "__picsymbolstub4", - MCSectionMachO::S_SYMBOL_STUBS, - 16, SectionKind::getText()); + OutContext.getMachOSection("__TEXT", "__picsymbolstub4", + MCSectionMachO::S_SYMBOL_STUBS, + 16, SectionKind::getText()); OutStreamer.SwitchSection(sect); } } @@ -1201,8 +1219,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { if (Subtarget->isTargetDarwin()) { // All darwin targets use mach-o. - TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering()); + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); MachineModuleInfoMachO &MMIMacho = MMI->getObjFileInfo<MachineModuleInfoMachO>(); diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index ef5ead6e473b6..ac6331f931fc9 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -18,6 +18,7 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -330,7 +331,6 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, if (!MO1.getReg()) { unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); - assert(ImmOffs && "Malformed indexed load / store!"); O << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << ImmOffs; @@ -380,7 +380,6 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, } unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); - assert(ImmOffs && "Malformed indexed load / store!"); O << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs; @@ -779,3 +778,22 @@ void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, O << '#' << MI->getOperand(OpNum).getImm(); } +void ARMInstPrinter::printHex8ImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff); +} + +void ARMInstPrinter::printHex16ImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff); +} + +void ARMInstPrinter::printHex32ImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff); +} + +void ARMInstPrinter::printHex64ImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm()); +} diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index dd006fc52e747..be0b7c1eb0278 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -104,10 +104,10 @@ public: void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printHex8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {} - void printHex16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {} - void printHex32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {} - void printHex64ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {} + void printHex8ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printHex16ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printHex32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printHex64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); // FIXME: Implement. diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index bbc0095f6ae7a..29e66e13b168e 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -10,6 +10,7 @@ tablegen(ARMGenAsmWriter.inc -gen-asm-writer) tablegen(ARMGenDAGISel.inc -gen-dag-isel) tablegen(ARMGenCallingConv.inc -gen-callingconv) tablegen(ARMGenSubtarget.inc -gen-subtarget) +tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info) add_llvm_target(ARMCodeGen ARMBaseInstrInfo.cpp @@ -36,6 +37,7 @@ add_llvm_target(ARMCodeGen Thumb2InstrInfo.cpp Thumb2RegisterInfo.cpp Thumb2SizeReduction.cpp + ARMSelectionDAGInfo.cpp ) target_link_libraries (LLVMARMCodeGen LLVMSelectionDAG) diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 47c31043d9c05..4de697e8bf676 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -18,6 +18,7 @@ #include "ARMDisassembler.h" #include "ARMDisassemblerCore.h" +#include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Debug.h" @@ -38,7 +39,9 @@ /// #include "../ARMGenDecoderTables.inc" -namespace llvm { +#include "../ARMGenEDInfo.inc" + +using namespace llvm; /// showBitVector - Use the raw_ostream to log a diagnostic message describing /// the inidividual bits of the instruction. @@ -247,27 +250,27 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) { case ARM::t2LDR_POST: case ARM::t2LDR_PRE: case ARM::t2LDRi12: case ARM::t2LDRi8: - case ARM::t2LDRs: + case ARM::t2LDRs: case ARM::t2LDRT: return ARM::t2LDRpci; case ARM::t2LDRB_POST: case ARM::t2LDRB_PRE: case ARM::t2LDRBi12: case ARM::t2LDRBi8: - case ARM::t2LDRBs: + case ARM::t2LDRBs: case ARM::t2LDRBT: return ARM::t2LDRBpci; case ARM::t2LDRH_POST: case ARM::t2LDRH_PRE: case ARM::t2LDRHi12: case ARM::t2LDRHi8: - case ARM::t2LDRHs: + case ARM::t2LDRHs: case ARM::t2LDRHT: return ARM::t2LDRHpci; case ARM::t2LDRSB_POST: case ARM::t2LDRSB_PRE: case ARM::t2LDRSBi12: case ARM::t2LDRSBi8: - case ARM::t2LDRSBs: + case ARM::t2LDRSBs: case ARM::t2LDRSBT: return ARM::t2LDRSBpci; case ARM::t2LDRSH_POST: case ARM::t2LDRSH_PRE: case ARM::t2LDRSHi12: case ARM::t2LDRSHi8: - case ARM::t2LDRSHs: + case ARM::t2LDRSHs: case ARM::t2LDRSHT: return ARM::t2LDRSHpci; } } @@ -404,7 +407,6 @@ bool ARMDisassembler::getInstruction(MCInst &MI, }); ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format); - if (!Builder) return false; @@ -492,11 +494,11 @@ bool ThumbDisassembler::getInstruction(MCInst &MI, }); ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format); - Builder->setSession(const_cast<Session *>(&SO)); - if (!Builder) return false; + Builder->SetSession(const_cast<Session *>(&SO)); + if (!Builder->Build(MI, insn)) return false; @@ -506,17 +508,37 @@ bool ThumbDisassembler::getInstruction(MCInst &MI, } // A8.6.50 +// Valid return values are {1, 2, 3, 4}, with 0 signifying an error condition. static unsigned short CountITSize(unsigned ITMask) { // First count the trailing zeros of the IT mask. unsigned TZ = CountTrailingZeros_32(ITMask); - assert(TZ <= 3 && "Encoding error"); + if (TZ > 3) { + DEBUG(errs() << "Encoding error: IT Mask '0000'"); + return 0; + } return (4 - TZ); } -/// Init ITState. -void Session::InitIT(unsigned short bits7_0) { +/// Init ITState. Note that at least one bit is always 1 in mask. +bool Session::InitIT(unsigned short bits7_0) { ITCounter = CountITSize(slice(bits7_0, 3, 0)); + if (ITCounter == 0) + return false; + + // A8.6.50 IT + unsigned short FirstCond = slice(bits7_0, 7, 4); + if (FirstCond == 0xF) { + DEBUG(errs() << "Encoding error: IT FirstCond '1111'"); + return false; + } + if (FirstCond == 0xE && ITCounter != 1) { + DEBUG(errs() << "Encoding error: IT FirstCond '1110' && Mask != '1000'"); + return false; + } + ITState = bits7_0; + + return true; } /// Update ITState if necessary. @@ -547,4 +569,10 @@ extern "C" void LLVMInitializeARMDisassembler() { createThumbDisassembler); } -} // namespace llvm +EDInstInfo *ARMDisassembler::getEDInfo() const { + return instInfoARM; +} + +EDInstInfo *ThumbDisassembler::getEDInfo() const { + return instInfoARM; +} diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.h b/lib/Target/ARM/Disassembler/ARMDisassembler.h index 44592e0f15672..0a74a3866eedc 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.h +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.h @@ -24,6 +24,8 @@ class MCInst; class MemoryObject; class raw_ostream; +struct EDInstInfo; + /// ARMDisassembler - ARM disassembler for all ARM platforms. class ARMDisassembler : public MCDisassembler { public: @@ -42,6 +44,9 @@ public: const MemoryObject ®ion, uint64_t address, raw_ostream &vStream) const; + + /// getEDInfo - See MCDisassembler. + EDInstInfo *getEDInfo() const; private: }; @@ -55,7 +60,7 @@ public: Session() : ITCounter(0), ITState(0) {} ~Session() {} /// InitIT - Initializes ITCounter/ITState. - void InitIT(unsigned short bits7_0); + bool InitIT(unsigned short bits7_0); /// UpdateIT - Updates ITCounter/ITState as IT Block progresses. void UpdateIT(); @@ -82,6 +87,9 @@ public: const MemoryObject ®ion, uint64_t address, raw_ostream &vStream) const; + + /// getEDInfo - See MCDisassembler. + EDInstInfo *getEDInfo() const; private: Session SO; }; diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp index db921ef0b628a..adb7795a746c3 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp @@ -13,8 +13,12 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "arm-disassembler" + #include "ARMDisassemblerCore.h" #include "ARMAddressingModes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" /// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const /// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s @@ -75,7 +79,7 @@ const char *ARMUtils::OpcodeName(unsigned Opcode) { // Return the register enum Based on RegClass and the raw register number. // For DRegPair, see comments below. // FIXME: Auto-gened? -static unsigned getRegisterEnum(unsigned RegClassID, unsigned RawRegister, +static unsigned getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister, bool DRegPair = false) { if (DRegPair && RegClassID == ARM::QPRRegClassID) { @@ -345,7 +349,9 @@ static unsigned getRegisterEnum(unsigned RegClassID, unsigned RawRegister, } break; } - assert(0 && "Invalid (RegClassID, RawRegister) combination"); + DEBUG(errs() << "Invalid (RegClassID, RawRegister) combination\n"); + // Encoding error. Mark the builder with error code != 0. + B->SetErr(-1); return 0; } @@ -509,7 +515,7 @@ static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn, // Inst{3-0} => Rm // Inst{11-8} => Rs static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; unsigned short NumDefs = TID.getNumDefs(); @@ -529,26 +535,26 @@ static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (NumDefs == 2) { assert(NumOps >= 4 && OpInfo[3].RegClass == ARM::GPRRegClassID && "Expect 4th register operand"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; } // The destination register: RdHi{19-16} or Rd{19-16}. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); // The two src regsiters: Rn{3-0}, then Rm{11-8}. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); OpIdx += 3; // Many multiply instructions (e.g., MLA) have three src registers. // The third register operand is Ra{15-12}. if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; } @@ -610,7 +616,7 @@ static inline unsigned GetCopOpc(uint32_t insn) { // and friends // static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 5 && "Num of operands >= 5 for coprocessor instr"); @@ -631,7 +637,7 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateImm(decodeRd(insn))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); if (PW) { @@ -651,11 +657,11 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn)) : MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); MI.addOperand(OneCopOpc ? MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn))) : MCOperand::CreateImm(decodeRn(insn))); @@ -688,18 +694,19 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, // SRSW/SRS: addrmode4:$addr mode_imm // RFEW/RFE: addrmode4:$addr Rn static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { if (CoprocessorOpcode(Opcode)) - return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded, B); const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; // MRS and MRSsys take one GPR reg Rd. if (Opcode == ARM::MRS || Opcode == ARM::MRSsys) { assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); NumOpsAdded = 1; return true; @@ -708,7 +715,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::BXJ) { assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); NumOpsAdded = 1; return true; @@ -717,7 +724,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::MSR || Opcode == ARM::MSRsys) { assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16))); NumOpsAdded = 2; @@ -748,7 +755,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::SRSW || Opcode == ARM::SRS) MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); else - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); NumOpsAdded = 3; return true; @@ -791,9 +798,11 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // BLXr9, BXr9 // BRIND, BX_RET static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; + unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -806,7 +815,7 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::BLXr9 || Opcode == ARM::BRIND) { assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); OpIdx = 1; return true; @@ -817,9 +826,9 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // InOperandList with GPR:$target and GPR:$idx regs. assert(NumOps == 4 && "Expect 4 operands"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); // Fill in the two remaining imm operands to signify build completion. @@ -835,7 +844,7 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // InOperandList with GPR::$target reg. assert(NumOps == 3 && "Expect 3 operands"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); // Fill in the two remaining imm operands to signify build completion. @@ -852,13 +861,13 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // See also ARMAddressingModes.h (Addressing Mode #2). assert(NumOps == 5 && getIBit(insn) == 1 && "Expect 5 operands && I-bit=1"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; // Disassemble the offset reg (Rm), shift type, and immediate shift length. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); // Inst{6-5} encodes the shift opcode. ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5)); @@ -882,14 +891,19 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return false; } -static inline uint32_t getBFCInvMask(uint32_t insn) { +static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) { uint32_t lsb = slice(insn, 11, 7); uint32_t msb = slice(insn, 20, 16); uint32_t Val = 0; - assert(lsb <= msb && "Encoding error: lsb > msb"); + if (msb < lsb) { + DEBUG(errs() << "Encoding error: msb < lsb\n"); + return false; + } + for (uint32_t i = lsb; i <= msb; ++i) Val |= (1 << i); - return ~Val; + mask = ~Val; + return true; } static inline bool SaturateOpcode(unsigned Opcode) { @@ -924,7 +938,7 @@ static inline unsigned decodeSaturatePos(unsigned Opcode, uint32_t insn) { // operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm. // They are QADD, QDADD, QDSUB, and QSUB. static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; unsigned short NumDefs = TID.getNumDefs(); @@ -936,7 +950,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Disassemble register def if there is one. if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; } @@ -949,7 +963,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (SaturateOpcode(Opcode)) { MI.addOperand(MCOperand::CreateImm(decodeSaturatePos(Opcode, insn))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); if (Opcode == ARM::SSAT16 || Opcode == ARM::USAT16) { @@ -977,14 +991,18 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::BFC || Opcode == ARM::BFI) { // TIED_TO operand skipped for BFC and Inst{3-0} (Reg) for BFI. MI.addOperand(MCOperand::CreateReg(Opcode == ARM::BFC ? 0 - : getRegisterEnum(ARM::GPRRegClassID, + : getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); - MI.addOperand(MCOperand::CreateImm(getBFCInvMask(insn))); + uint32_t mask = 0; + if (!getBFCInvMask(insn, mask)) + return false; + + MI.addOperand(MCOperand::CreateImm(mask)); OpIdx += 2; return true; } if (Opcode == ARM::SBFX || Opcode == ARM::UBFX) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 7))); MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 16) + 1)); @@ -1000,7 +1018,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, RmRn ? decodeRm(insn) : decodeRn(insn)))); ++OpIdx; } @@ -1021,7 +1039,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // routed here as well. // assert(getIBit(insn) == 0 && "I_Bit != '0' reg/reg form"); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, RmRn? decodeRn(insn) : decodeRm(insn)))); ++OpIdx; } else if (Opcode == ARM::MOVi16 || Opcode == ARM::MOVTi16) { @@ -1046,7 +1064,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; unsigned short NumDefs = TID.getNumDefs(); @@ -1058,7 +1076,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Disassemble register def if there is one. if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; } @@ -1071,7 +1089,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (!isUnary) { assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1094,11 +1112,11 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1. unsigned Rs = slice(insn, 4, 4); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); if (Rs) { // Register-controlled shifts: [Rm, Rs, shift]. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); // Inst{6-5} encodes the shift opcode. ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5)); @@ -1121,24 +1139,26 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool isStore) { + unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; - unsigned short NumDefs = TID.getNumDefs(); bool isPrePost = isPrePostLdSt(TID.TSFlags); const TargetOperandInfo *OpInfo = TID.OpInfo; + if (!OpInfo) return false; + unsigned &OpIdx = NumOpsAdded; OpIdx = 0; - assert(((!isStore && NumDefs > 0) || (isStore && (NumDefs == 0 || isPrePost))) + assert(((!isStore && TID.getNumDefs() > 0) || + (isStore && (TID.getNumDefs() == 0 || isPrePost))) && "Invalid arguments"); // Operand 0 of a pre- and post-indexed store is the address base writeback. if (isPrePost && isStore) { assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1149,7 +1169,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; @@ -1157,7 +1177,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (isPrePost && !isStore) { assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1170,7 +1190,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, "Reg operand expected"); assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) && "Index mode or tied_to operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; @@ -1194,7 +1214,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateImm(Offset)); } else { // Disassemble the offset reg (Rm), shift type, and immediate shift length. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); // Inst{6-5} encodes the shift opcode. ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5)); @@ -1212,13 +1232,13 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } static bool DisassembleLdFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false); + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false, B); } static bool DisassembleStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true); + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B); } static bool HasDualReg(unsigned Opcode) { @@ -1232,24 +1252,26 @@ static bool HasDualReg(unsigned Opcode) { } static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool isStore) { + unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; - unsigned short NumDefs = TID.getNumDefs(); bool isPrePost = isPrePostLdSt(TID.TSFlags); const TargetOperandInfo *OpInfo = TID.OpInfo; + if (!OpInfo) return false; + unsigned &OpIdx = NumOpsAdded; OpIdx = 0; - assert(((!isStore && NumDefs > 0) || (isStore && (NumDefs == 0 || isPrePost))) + assert(((!isStore && TID.getNumDefs() > 0) || + (isStore && (TID.getNumDefs() == 0 || isPrePost))) && "Invalid arguments"); // Operand 0 of a pre- and post-indexed store is the address base writeback. if (isPrePost && isStore) { assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1262,13 +1284,13 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; // Fill in LDRD and STRD's second operand. if (DualReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn) + 1))); ++OpIdx; } @@ -1277,7 +1299,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (isPrePost && !isStore) { assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1290,7 +1312,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, "Reg operand expected"); assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) && "Index mode or tied_to operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; @@ -1315,7 +1337,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateImm(Offset)); } else { // Disassemble the offset reg (Rm). - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0); MI.addOperand(MCOperand::CreateImm(Offset)); @@ -1326,13 +1348,14 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } static bool DisassembleLdMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false); + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false, + B); } static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true); + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B); } // The algorithm for disassembly of LdStMulFrm is different from others because @@ -1340,7 +1363,7 @@ static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // and operand 1 (the AM4 mode imm). After operand 3, we need to populate the // reglist with each affected register encoded as an MCOperand. static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 5 && "LdStMulFrm expects NumOps >= 5"); @@ -1348,7 +1371,7 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; - unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn)); + unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); // Writeback to base, if necessary. if (Opcode == ARM::LDM_UPD || Opcode == ARM::STM_UPD) { @@ -1372,7 +1395,7 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned RegListBits = insn & ((1 << 16) - 1); for (unsigned i = 0; i < 16; ++i) { if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, i))); ++OpIdx; } @@ -1388,9 +1411,11 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // // SWP, SWPB: Rd Rm Rn static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; + unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1404,29 +1429,29 @@ static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn, bool isDW = (Opcode == ARM::LDREXD || Opcode == ARM::STREXD); // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; // Store register Exclusive needs a source operand. if (isStore) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); ++OpIdx; if (isDW) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)+1))); ++OpIdx; } } else if (isDW) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)+1))); ++OpIdx; } // Finally add the pointer operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; @@ -1438,7 +1463,7 @@ static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // PKHBT, PKHTB: Rd Rn Rm , LSL/ASR #imm5 // RBIT, REV, REV16, REVSH: Rd Rm static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; @@ -1452,18 +1477,18 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; if (ThreeReg) { assert(NumOps >= 4 && "Expect >= 4 operands"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); ++OpIdx; @@ -1485,7 +1510,7 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the // three register operand form. Otherwise, Rn=0b1111 and only Rm is used. static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; @@ -1499,17 +1524,17 @@ static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn, bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); ++OpIdx; @@ -1591,7 +1616,7 @@ static uint64_t VFPExpandImm(unsigned char byte, unsigned N) { // VCVTDS, VCVTSD: converts between double-precision and single-precision // The rest of the instructions have homogeneous [VFP]Rd and [VFP]Rm registers. static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 1 && "VFPUnaryFrm expects NumOps >= 1"); @@ -1606,7 +1631,7 @@ static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, bool isSP = (RegClass == ARM::SPRRegClassID); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, decodeVFPRd(insn, isSP)))); + getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP)))); ++OpIdx; // Early return for compare with zero instructions. @@ -1620,7 +1645,7 @@ static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, isSP = (RegClass == ARM::SPRRegClassID); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, decodeVFPRm(insn, isSP)))); + getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP)))); ++OpIdx; return true; @@ -1631,7 +1656,7 @@ static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // InOperandList to that of the dst. As far as asm printing is concerned, this // tied_to operand is simply skipped. static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 3 && "VFPBinaryFrm expects NumOps >= 3"); @@ -1647,7 +1672,7 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, bool isSP = (RegClass == ARM::SPRRegClassID); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, decodeVFPRd(insn, isSP)))); + getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP)))); ++OpIdx; // Skip tied_to operand constraint. @@ -1658,11 +1683,11 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, decodeVFPRn(insn, isSP)))); + getRegisterEnum(B, RegClass, decodeVFPRn(insn, isSP)))); ++OpIdx; MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, decodeVFPRm(insn, isSP)))); + getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP)))); ++OpIdx; return true; @@ -1675,12 +1700,13 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // A8.6.297 vcvt (floating-point and fixed-point) // Dd|Sd Dd|Sd(TIED_TO) #fbits(= 16|32 - UInt(imm4:i)) static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 2 && "VFPConv1Frm expects NumOps >= 2"); const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; + if (!OpInfo) return false; bool SP = slice(insn, 8, 8) == 0; // A8.6.295 & A8.6.297 bool fixed_point = slice(insn, 17, 17) == 1; // A8.6.297 @@ -1692,7 +1718,7 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, int size = slice(insn, 7, 7) == 0 ? 16 : 32; int fbits = size - (slice(insn,3,0) << 1 | slice(insn,5,5)); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClassID, + getRegisterEnum(B, RegClassID, decodeVFPRd(insn, SP)))); assert(TID.getOperandConstraint(1, TOI::TIED_TO) != -1 && @@ -1712,15 +1738,15 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, if (slice(insn, 18, 18) == 1) { // to_integer operation d = decodeVFPRd(insn, true /* Is Single Precision */); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::SPRRegClassID, d))); + getRegisterEnum(B, ARM::SPRRegClassID, d))); m = decodeVFPRm(insn, SP); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, m))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, m))); } else { d = decodeVFPRd(insn, SP); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, d))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, d))); m = decodeVFPRm(insn, true /* Is Single Precision */); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::SPRRegClassID, m))); + getRegisterEnum(B, ARM::SPRRegClassID, m))); } NumOpsAdded = 2; } @@ -1731,13 +1757,13 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, // VMOVRS - A8.6.330 // Rt => Rd; Sn => UInt(Vn:N) static bool DisassembleVFPConv2Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 2 && "VFPConv2Frm expects NumOps >= 2"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, decodeVFPRn(insn, true)))); NumOpsAdded = 2; return true; @@ -1749,29 +1775,29 @@ static bool DisassembleVFPConv2Frm(MCInst &MI, unsigned Opcode, uint32_t insn, // VMOVRRS - A8.6.331 // Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1 static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 3 && "VFPConv3Frm expects NumOps >= 3"); const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); OpIdx = 2; if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) { unsigned Sm = decodeVFPRm(insn, true); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, Sm))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, Sm+1))); OpIdx += 2; } else { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::DPRRegClassID, + getRegisterEnum(B, ARM::DPRRegClassID, decodeVFPRm(insn, false)))); ++OpIdx; } @@ -1781,13 +1807,13 @@ static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn, // VMOVSR - A8.6.330 // Rt => Rd; Sn => UInt(Vn:N) static bool DisassembleVFPConv4Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 2 && "VFPConv4Frm expects NumOps >= 2"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, decodeVFPRn(insn, true)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); NumOpsAdded = 2; return true; @@ -1799,7 +1825,7 @@ static bool DisassembleVFPConv4Frm(MCInst &MI, unsigned Opcode, uint32_t insn, // VMOVRRS - A8.6.331 // Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1 static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 3 && "VFPConv5Frm expects NumOps >= 3"); @@ -1810,21 +1836,21 @@ static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn, if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) { unsigned Sm = decodeVFPRm(insn, true); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, Sm))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::SPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID, Sm+1))); OpIdx += 2; } else { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::DPRRegClassID, + getRegisterEnum(B, ARM::DPRRegClassID, decodeVFPRm(insn, false)))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); OpIdx += 2; return true; @@ -1833,7 +1859,7 @@ static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn, // VFP Load/Store Instructions. // VLDRD, VLDRS, VSTRD, VSTRS static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3"); @@ -1843,9 +1869,9 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Extract Dd/Sd for operand 0. unsigned RegD = decodeVFPRd(insn, isSPVFP); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, RegD))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, RegD))); - unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn)); + unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); MI.addOperand(MCOperand::CreateReg(Base)); // Next comes the AM5 Opcode. @@ -1865,7 +1891,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // // VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD] static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 5 && "VFPLdStMulFrm expects NumOps >= 5"); @@ -1873,7 +1899,7 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; - unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn)); + unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); // Writeback to base, if necessary. if (Opcode == ARM::VLDMD_UPD || Opcode == ARM::VLDMS_UPD || @@ -1886,6 +1912,12 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Next comes the AM5 Opcode. ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn)); + // Must be either "ia" or "db" submode. + if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) { + DEBUG(errs() << "Illegal addressing mode 5 sub-mode!\n"); + return false; + } + unsigned char Imm8 = insn & 0xFF; MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(SubMode, Imm8))); @@ -1906,7 +1938,7 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Fill the variadic part of reglist. unsigned Regs = isSPVFP ? Imm8 : Imm8/2; for (unsigned i = 0; i < Regs; ++i) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, RegD + i))); ++OpIdx; } @@ -1920,7 +1952,7 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // FCONSTS (SPR and a VFPf32Imm operand) // VMRS/VMSR (GPR operand) static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; @@ -1935,13 +1967,13 @@ static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned RegEnum = 0; switch (OpInfo[0].RegClass) { case ARM::DPRRegClassID: - RegEnum = getRegisterEnum(ARM::DPRRegClassID, decodeVFPRd(insn, false)); + RegEnum = getRegisterEnum(B, ARM::DPRRegClassID, decodeVFPRd(insn, false)); break; case ARM::SPRRegClassID: - RegEnum = getRegisterEnum(ARM::SPRRegClassID, decodeVFPRd(insn, true)); + RegEnum = getRegisterEnum(B, ARM::SPRRegClassID, decodeVFPRd(insn, true)); break; case ARM::GPRRegClassID: - RegEnum = getRegisterEnum(ARM::GPRRegClassID, decodeRd(insn)); + RegEnum = getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)); break; default: assert(0 && "Invalid reg class id"); @@ -1986,7 +2018,7 @@ static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // D = Inst{22}, Vd = Inst{15-12} static unsigned decodeNEONRd(uint32_t insn) { return ((insn >> ARMII::NEON_D_BitShift) & 1) << 4 - | (insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask; + | ((insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask); } // Extract/Decode NEON N/Vn: @@ -1997,7 +2029,7 @@ static unsigned decodeNEONRd(uint32_t insn) { // N = Inst{7}, Vn = Inst{19-16} static unsigned decodeNEONRn(uint32_t insn) { return ((insn >> ARMII::NEON_N_BitShift) & 1) << 4 - | (insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask; + | ((insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask); } // Extract/Decode NEON M/Vm: @@ -2008,7 +2040,7 @@ static unsigned decodeNEONRn(uint32_t insn) { // M = Inst{5}, Vm = Inst{3-0} static unsigned decodeNEONRm(uint32_t insn) { return ((insn >> ARMII::NEON_M_BitShift) & 1) << 4 - | (insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask; + | ((insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask); } namespace { @@ -2072,7 +2104,7 @@ static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) { case ESize64: { for (unsigned i = 0; i < 8; ++i) if ((Imm8 >> i) & 1) - Imm64 |= 0xFF << 8*i; + Imm64 |= (uint64_t)0xFF << 8*i; break; } default: @@ -2200,6 +2232,22 @@ static unsigned decodeN3VImm(uint32_t insn) { return (insn >> 8) & 0xF; } +static bool UseDRegPair(unsigned Opcode) { + switch (Opcode) { + default: + return false; + case ARM::VLD1q8_UPD: + case ARM::VLD1q16_UPD: + case ARM::VLD1q32_UPD: + case ARM::VLD1q64_UPD: + case ARM::VST1q8_UPD: + case ARM::VST1q16_UPD: + case ARM::VST1q32_UPD: + case ARM::VST1q64_UPD: + return true; + } +} + // VLD* // D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm] // VLD*LN* @@ -2211,7 +2259,8 @@ static unsigned decodeN3VImm(uint32_t insn) { // // Correctly set VLD*/VST*'s TIED_TO GPR, as the asm printer needs it. static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced) { + unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced, + BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -2239,7 +2288,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, // LLVM Addressing Mode #6. unsigned RmEnum = 0; if (WB && Rm != 13) - RmEnum = getRegisterEnum(ARM::GPRRegClassID, Rm); + RmEnum = getRegisterEnum(B, ARM::GPRRegClassID, Rm); if (Store) { // Consume possible WB, AddrMode6, possible increment reg, the DPR/QPR's, @@ -2248,14 +2297,14 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, "Reg operand expected"); if (WB) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); ++OpIdx; } assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored? OpIdx += 2; @@ -2272,10 +2321,9 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, RegClass = OpInfo[OpIdx].RegClass; while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) { - if (Opcode >= ARM::VST1q16 && Opcode <= ARM::VST1q8) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd,true))); - else - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd))); + MI.addOperand(MCOperand::CreateReg( + getRegisterEnum(B, RegClass, Rd, + UseDRegPair(Opcode)))); Rd += Inc; ++OpIdx; } @@ -2293,23 +2341,22 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, RegClass = OpInfo[0].RegClass; while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) { - if (Opcode >= ARM::VLD1q16 && Opcode <= ARM::VLD1q8) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd,true))); - else - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass,Rd))); + MI.addOperand(MCOperand::CreateReg( + getRegisterEnum(B, RegClass, Rd, + UseDRegPair(Opcode)))); Rd += Inc; ++OpIdx; } if (WB) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); ++OpIdx; } assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored? OpIdx += 2; @@ -2342,7 +2389,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, // Find out about double-spaced-ness of the Opcode and pass it on to // DisassembleNLdSt0(). static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const StringRef Name = ARMInsts[Opcode].Name; bool DblSpaced = false; @@ -2377,13 +2424,13 @@ static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn, } return DisassembleNLdSt0(MI, Opcode, insn, NumOps, NumOpsAdded, - slice(insn, 21, 21) == 0, DblSpaced); + slice(insn, 21, 21) == 0, DblSpaced, B); } // VMOV (immediate) // Qd/Dd imm static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -2395,7 +2442,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, "Expect 1 reg operand followed by 1 imm operand"); // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[0].RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass, decodeNEONRd(insn)))); ElemSize esize = ESizeNA; @@ -2415,6 +2462,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, case ARM::VMOVv1i64: case ARM::VMOVv2i64: esize = ESize64; + break; default: assert(0 && "Unreachable code!"); return false; @@ -2451,7 +2499,7 @@ enum N2VFlag { // // Others static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag = N2V_None) { + unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag, BO B) { const TargetInstrDesc &TID = ARMInsts[Opc]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -2478,7 +2526,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, } // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeNEONRd(insn)))); ++OpIdx; @@ -2490,7 +2538,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, } // Dm = Inst{5:3-0} => NEON Rm - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeNEONRm(insn)))); ++OpIdx; @@ -2523,21 +2571,22 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, } static bool DisassembleN2RegFrm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded); + return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded, + N2V_None, B); } static bool DisassembleNVCVTFrm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded, - N2V_VectorConvert_Between_Float_Fixed); + N2V_VectorConvert_Between_Float_Fixed, B); } static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded, - N2V_VectorDupLane); + N2V_VectorDupLane, B); } // Vector Shift [Accumulate] Instructions. @@ -2547,7 +2596,7 @@ static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn, // VSHLLi16, VSHLLi32, VSHLLi8: Qd Dm imm (== size) // static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift) { + unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -2564,7 +2613,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeNEONRd(insn)))); ++OpIdx; @@ -2579,7 +2628,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, "Reg operand expected"); // Qm/Dm = Inst{5:3-0} => NEON Rm - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeNEONRm(insn)))); ++OpIdx; @@ -2611,15 +2660,17 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, // Left shift instructions. static bool DisassembleN2RegVecShLFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, true); + return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, true, + B); } // Right shift instructions have different shift amount interpretation. static bool DisassembleN2RegVecShRFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, false); + return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, false, + B); } namespace { @@ -2644,7 +2695,7 @@ enum N3VFlag { // // Others static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag = N3V_None) { + unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -2673,7 +2724,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, } // Qd/Dd = Inst{22:15-12} => NEON Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(OpInfo[OpIdx].RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeNEONRd(insn)))); ++OpIdx; @@ -2688,7 +2739,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, // or // Dm = Inst{5:3-0} => NEON Rm MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(OpInfo[OpIdx].RegClass, + getRegisterEnum(B, OpInfo[OpIdx].RegClass, VdVnVm ? decodeNEONRn(insn) : decodeNEONRm(insn)))); ++OpIdx; @@ -2708,7 +2759,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, : decodeNEONRn(insn); MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(OpInfo[OpIdx].RegClass, m))); + getRegisterEnum(B, OpInfo[OpIdx].RegClass, m))); ++OpIdx; if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 @@ -2732,27 +2783,28 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, } static bool DisassembleN3RegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, + N3V_None, B); } static bool DisassembleN3RegVecShFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, - N3V_VectorShift); + N3V_VectorShift, B); } static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, - N3V_VectorExtract); + N3V_VectorExtract, B); } static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded, - N3V_Multiply_By_Scalar); + N3V_Multiply_By_Scalar, B); } // Vector Table Lookup @@ -2762,10 +2814,11 @@ static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode, // VTBL3, VTBX3: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dm // VTBL4, VTBX4: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dn+3 Dm static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; + if (!OpInfo) return false; assert(NumOps >= 3 && OpInfo[0].RegClass == ARM::DPRRegClassID && @@ -2786,7 +2839,7 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned Len = slice(insn, 9, 8) + 1; // Dd (the destination vector) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, decodeNEONRd(insn)))); ++OpIdx; @@ -2801,7 +2854,7 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, for (unsigned i = 0; i < Len; ++i) { assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID && "Reg operand expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, Rn + i))); ++OpIdx; } @@ -2809,7 +2862,7 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Dm (the index vector) assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID && "Reg operand (index vector) expected"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, decodeNEONRm(insn)))); ++OpIdx; @@ -2825,13 +2878,13 @@ static bool DisassembleNEONFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Vector Get Lane (move scalar to ARM core register) Instructions. // VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; - unsigned short NumDefs = TID.getNumDefs(); const TargetOperandInfo *OpInfo = TID.OpInfo; + if (!OpInfo) return false; - assert(NumDefs == 1 && NumOps >= 3 && + assert(TID.getNumDefs() == 1 && NumOps >= 3 && OpInfo[0].RegClass == ARM::GPRRegClassID && OpInfo[1].RegClass == ARM::DPRRegClassID && OpInfo[2].RegClass == 0 && @@ -2843,11 +2896,11 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, : ESize32); // Rt = Inst{15-12} => ARM Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); // Dn = Inst{7:19-16} => NEON Rn - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, decodeNEONRn(insn)))); MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize))); @@ -2859,13 +2912,13 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Vector Set Lane (move ARM core register to scalar) Instructions. // VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; - unsigned short NumDefs = TID.getNumDefs(); const TargetOperandInfo *OpInfo = TID.OpInfo; + if (!OpInfo) return false; - assert(NumDefs == 1 && NumOps >= 3 && + assert(TID.getNumDefs() == 1 && NumOps >= 3 && OpInfo[0].RegClass == ARM::DPRRegClassID && OpInfo[1].RegClass == ARM::DPRRegClassID && TID.getOperandConstraint(1, TOI::TIED_TO) != -1 && @@ -2879,14 +2932,14 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, : ESize32); // Dd = Inst{7:19-16} => NEON Rn - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::DPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID, decodeNEONRn(insn)))); // TIED_TO operand. MI.addOperand(MCOperand::CreateReg(0)); // Rt = Inst{15-12} => ARM Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize))); @@ -2898,7 +2951,7 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Vector Duplicate Instructions (from ARM core register to all elements). // VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt static bool DisassembleNEONDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; @@ -2911,11 +2964,11 @@ static bool DisassembleNEONDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned RegClass = OpInfo[0].RegClass; // Qd/Dd = Inst{7:19-16} => NEON Rn - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(RegClass, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClass, decodeNEONRn(insn)))); // Rt = Inst{15-12} => ARM Rd - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); NumOpsAdded = 2; @@ -2945,13 +2998,13 @@ static inline bool PreLoadOpcode(unsigned Opcode) { } static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { // Preload Data/Instruction requires either 2 or 4 operands. // PLDi, PLDWi, PLIi: Rn [+/-]imm12 add = (U == '1') // PLDr[a|m], PLDWr[a|m], PLIr[a|m]: Rn Rm addrmode2_opc - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); if (Opcode == ARM::PLDi || Opcode == ARM::PLDWi || Opcode == ARM::PLIi) { @@ -2961,7 +3014,7 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateImm(Offset)); NumOpsAdded = 2; } else { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; @@ -2982,7 +3035,7 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { if (MemBarrierInstr(insn)) return true; @@ -3031,7 +3084,7 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } if (PreLoadOpcode(Opcode)) - return DisassemblePreLoadFrm(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassemblePreLoadFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B); assert(0 && "Unexpected misc instruction!"); return false; @@ -3147,7 +3200,7 @@ bool ARMBasicMCBuilder::BuildIt(MCInst &MI, uint32_t insn) { unsigned NumOpsAdded = 0; bool OK = (*Disasm)(MI, Opcode, insn, NumOps, NumOpsAdded, this); - if (!OK) return false; + if (!OK || this->Err != 0) return false; if (NumOpsAdded >= NumOps) return true; @@ -3156,6 +3209,49 @@ bool ARMBasicMCBuilder::BuildIt(MCInst &MI, uint32_t insn) { return TryPredicateAndSBitModifier(MI, Opcode, insn, NumOps - NumOpsAdded); } +// A8.3 Conditional execution +// A8.3.1 Pseudocode details of conditional execution +// Condition bits '111x' indicate the instruction is always executed. +static uint32_t CondCode(uint32_t CondField) { + if (CondField == 0xF) + return ARMCC::AL; + return CondField; +} + +/// DoPredicateOperands - DoPredicateOperands process the predicate operands +/// of some Thumb instructions which come before the reglist operands. It +/// returns true if the two predicate operands have been processed. +bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode, + uint32_t /* insn */, unsigned short NumOpsRemaining) { + + assert(NumOpsRemaining > 0 && "Invalid argument"); + + const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + unsigned Idx = MI.getNumOperands(); + + // First, we check whether this instr specifies the PredicateOperand through + // a pair of TargetOperandInfos with isPredicate() property. + if (NumOpsRemaining >= 2 && + OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() && + OpInfo[Idx].RegClass == 0 && OpInfo[Idx+1].RegClass == ARM::CCRRegClassID) + { + // If we are inside an IT block, get the IT condition bits maintained via + // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond(). + // See also A2.5.2. + if (InITBlock()) + MI.addOperand(MCOperand::CreateImm(GetITCond())); + else + MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); + MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); + return true; + } + + return false; +} + +/// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process +/// the possible Predicate and SBitModifier, to build the remaining MCOperand +/// constituents. bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, uint32_t insn, unsigned short NumOpsRemaining) { @@ -3183,27 +3279,24 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, // // A8.6.16 B if (Name == "t2Bcc") - MI.addOperand(MCOperand::CreateImm(slice(insn, 25, 22))); + MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 25, 22)))); else if (Name == "tBcc") - MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 8))); + MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 11, 8)))); else MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); } else { - // ARM Instructions. Check condition field. - int64_t CondVal = getCondField(insn); - if (CondVal == 0xF) - MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); - else - MI.addOperand(MCOperand::CreateImm(CondVal)); + // ARM instructions get their condition field from Inst{31-28}. + MI.addOperand(MCOperand::CreateImm(CondCode(getCondField(insn)))); } } MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); Idx += 2; NumOpsRemaining -= 2; - if (NumOpsRemaining == 0) - return true; } + if (NumOpsRemaining == 0) + return true; + // Next, if OptionalDefOperand exists, we check whether the 'S' bit is set. if (OpInfo[Idx].isOptionalDef() && OpInfo[Idx].RegClass==ARM::CCRRegClassID) { MI.addOperand(MCOperand::CreateReg(getSBit(insn) == 1 ? ARM::CPSR : 0)); @@ -3224,7 +3317,7 @@ bool ARMBasicMCBuilder::RunBuildAfterHook(bool Status, MCInst &MI, if (!SP) return Status; if (Opcode == ARM::t2IT) - SP->InitIT(slice(insn, 7, 0)); + Status = SP->InitIT(slice(insn, 7, 0)) ? Status : false; else if (InITBlock()) SP->UpdateIT(); @@ -3234,7 +3327,7 @@ bool ARMBasicMCBuilder::RunBuildAfterHook(bool Status, MCInst &MI, /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder. ARMBasicMCBuilder::ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num) - : Opcode(opc), Format(format), NumOps(num), SP(0) { + : Opcode(opc), Format(format), NumOps(num), SP(0), Err(0) { unsigned Idx = (unsigned)format; assert(Idx < (array_lengthof(FuncPtrs) - 1) && "Unknown format"); Disasm = FuncPtrs[Idx]; @@ -3246,6 +3339,11 @@ ARMBasicMCBuilder::ARMBasicMCBuilder(unsigned opc, ARMFormat format, /// are responsible for freeing up of the allocated memory. Cacheing can be /// performed by the API clients to improve performance. ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) { + // For "Unknown format", fail by returning a NULL pointer. + if ((unsigned)Format >= (array_lengthof(FuncPtrs) - 1)) { + DEBUG(errs() << "Unknown format\n"); + return 0; + } return new ARMBasicMCBuilder(Opcode, Format, ARMInsts[Opcode].getNumOperands()); diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h index 307523037089d..b1d90df341775 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h @@ -171,30 +171,51 @@ typedef ARMBasicMCBuilder *BO; typedef bool (*DisassembleFP)(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO Builder); +/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC +/// infrastructure of an MCInst given the Opcode and Format of the instr. +/// Return NULL if it fails to create/return a proper builder. API clients +/// are responsible for freeing up of the allocated memory. Cacheing can be +/// performed by the API clients to improve performance. +extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format); + /// ARMBasicMCBuilder - ARMBasicMCBuilder represents an ARM MCInst builder that /// knows how to build up the MCOperand list. class ARMBasicMCBuilder { + friend ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format); unsigned Opcode; ARMFormat Format; unsigned short NumOps; DisassembleFP Disasm; Session *SP; + int Err; // !=0 if the builder encounters some error condition during build. + +private: + /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder. + ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num); public: ARMBasicMCBuilder(ARMBasicMCBuilder &B) : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm), - SP(B.SP) - {} - - /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder. - ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num); + SP(B.SP) { + Err = 0; + } virtual ~ARMBasicMCBuilder() {} - void setSession(Session *sp) { + void SetSession(Session *sp) { SP = sp; } + void SetErr(int ErrCode) { + Err = ErrCode; + } + + /// DoPredicateOperands - DoPredicateOperands process the predicate operands + /// of some Thumb instructions which come before the reglist operands. It + /// returns true if the two predicate operands have been processed. + bool DoPredicateOperands(MCInst& MI, unsigned Opcode, + uint32_t insn, unsigned short NumOpsRemaning); + /// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process /// the possible Predicate and SBitModifier, to build the remaining MCOperand /// constituents. @@ -236,13 +257,6 @@ private: } }; -/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC -/// infrastructure of an MCInst given the Opcode and Format of the instr. -/// Return NULL if it fails to create/return a proper builder. API clients -/// are responsible for freeing up of the allocated memory. Cacheing can be -/// performed by the API clients to improve performance. -extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format); - } // namespace llvm #endif diff --git a/lib/Target/ARM/Disassembler/Makefile b/lib/Target/ARM/Disassembler/Makefile new file mode 100644 index 0000000000000..031b6aca5a484 --- /dev/null +++ b/lib/Target/ARM/Disassembler/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/ARM/Disassembler/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMARMDisassembler + +# Hack: we need to include 'main' arm target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h index 481f25d6f4866..4b2e308248c58 100644 --- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h @@ -193,14 +193,18 @@ static inline unsigned getShiftAmtBits(uint32_t insn) { // A8.6.17 BFC // Encoding T1 ARMv6T2, ARMv7 // LLVM-specific encoding for #<lsb> and #<width> -static inline uint32_t getBitfieldInvMask(uint32_t insn) { +static inline bool getBitfieldInvMask(uint32_t insn, uint32_t &mask) { uint32_t lsb = getImm3(insn) << 2 | getImm2(insn); uint32_t msb = getMsb(insn); uint32_t Val = 0; - assert(lsb <= msb && "Encoding error: lsb > msb"); + if (msb < lsb) { + DEBUG(errs() << "Encoding error: msb < lsb\n"); + return false; + } for (uint32_t i = lsb; i <= msb; ++i) Val |= (1 << i); - return ~Val; + mask = ~Val; + return true; } // A8.4 Shifts applied to a register @@ -342,7 +346,7 @@ static inline unsigned decodeRotate(uint32_t insn) { // Special case: // tMOVSr: tRd tRn static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; @@ -360,14 +364,14 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, // Add the destination operand. MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::tGPRRegClassID, + getRegisterEnum(B, ARM::tGPRRegClassID, UseRt ? getT1tRt(insn) : getT1tRd(insn)))); ++OpIdx; // Check whether the next operand to be added is a CCR Register. if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) { assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected"); - MI.addOperand(MCOperand::CreateReg(Builder->InITBlock() ? 0 : ARM::CPSR)); + MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR)); ++OpIdx; } @@ -376,7 +380,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) { // For UseRt, the reg operand is tied to the first reg operand. MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::tGPRRegClassID, + getRegisterEnum(B, ARM::tGPRRegClassID, UseRt ? getT1tRt(insn) : getT1tRn(insn)))); ++OpIdx; } @@ -388,7 +392,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, // The next available operand is either a reg operand or an imm operand. if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) { // Three register operand instructions. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRm(insn)))); } else { assert(OpInfo[OpIdx].RegClass == 0 && @@ -409,7 +413,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, // tMVN, tRSB: tRd CPSR tRn // Others: tRd CPSR tRd(TIED_TO) tRn static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -423,14 +427,14 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, && "Invalid arguments"); // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRd(insn)))); ++OpIdx; // Check whether the next operand to be added is a CCR Register. if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) { assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected"); - MI.addOperand(MCOperand::CreateReg(Builder->InITBlock() ? 0 : ARM::CPSR)); + MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR)); ++OpIdx; } @@ -449,7 +453,7 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, // Process possible next reg operand. if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) { // Add tRn operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRn(insn)))); ++OpIdx; } @@ -466,7 +470,7 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, // tBX_RET_vararg: Rm // tBLXr_r9: Rm static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { // tBX_RET has 0 operand. if (NumOps == 0) @@ -474,7 +478,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, // BX/BLX has 1 reg operand: Rm. if (NumOps == 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, getT1Rm(insn)))); NumOpsAdded = 1; return true; @@ -489,7 +493,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, // Add the destination operand. unsigned RegClass = OpInfo[OpIdx].RegClass; MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, + getRegisterEnum(B, RegClass, IsGPR(RegClass) ? getT1Rd(insn) : getT1tRd(insn)))); ++OpIdx; @@ -509,7 +513,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpIdx < NumOps && "More operands expected"); RegClass = OpInfo[OpIdx].RegClass; MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(RegClass, + getRegisterEnum(B, RegClass, IsGPR(RegClass) ? getT1Rm(insn) : getT1tRn(insn)))); ++OpIdx; @@ -521,9 +525,10 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, // // tLDRpci: tRt imm8*4 static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && (OpInfo[1].RegClass == 0 && @@ -532,7 +537,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn, && "Invalid arguments"); // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRt(insn)))); // And the (imm8 << 2) operand. @@ -564,7 +569,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn, // Load/Store Register (reg|imm): tRd tRn imm5 tRm // Load Register Signed Byte|Halfword: tRd tRn tRm static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -581,9 +586,9 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, && "Expect >= 2 operands and first two as thumb reg operands"); // Add the destination reg and the base reg. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRn(insn)))); OpIdx = 2; @@ -603,9 +608,10 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, // The next reg operand is tRm, the offset. assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID && "Thumb reg operand expected"); - MI.addOperand(MCOperand::CreateReg(Imm5 ? 0 - : getRegisterEnum(ARM::tGPRRegClassID, - getT1tRm(insn)))); + MI.addOperand(MCOperand::CreateReg( + Imm5 ? 0 + : getRegisterEnum(B, ARM::tGPRRegClassID, + getT1tRm(insn)))); ++OpIdx; return true; @@ -615,12 +621,13 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, // // Load/Store Register SP relative: tRt ARM::SP imm8 static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert((Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) - && "Invalid opcode"); + && "Unexpected opcode"); const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; assert(NumOps >= 3 && OpInfo[0].RegClass == ARM::tGPRRegClassID && @@ -630,7 +637,7 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn, !OpInfo[2].isOptionalDef()) && "Invalid arguments"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRt(insn)))); MI.addOperand(MCOperand::CreateReg(ARM::SP)); MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); @@ -643,11 +650,12 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn, // // tADDrPCi: tRt imm8 static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - assert(Opcode == ARM::tADDrPCi && "Invalid opcode"); + assert(Opcode == ARM::tADDrPCi && "Unexpected opcode"); const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && (OpInfo[1].RegClass == 0 && @@ -655,7 +663,7 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn, !OpInfo[1].isOptionalDef()) && "Invalid arguments"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRt(insn)))); MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); NumOpsAdded = 2; @@ -667,11 +675,12 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn, // // tADDrSPi: tRt ARM::SP imm8 static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - assert(Opcode == ARM::tADDrSPi && "Invalid opcode"); + assert(Opcode == ARM::tADDrSPi && "Unexpected opcode"); const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; assert(NumOps >= 3 && OpInfo[0].RegClass == ARM::tGPRRegClassID && @@ -681,7 +690,7 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn, !OpInfo[2].isOptionalDef()) && "Invalid arguments"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRt(insn)))); MI.addOperand(MCOperand::CreateReg(ARM::SP)); MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); @@ -697,23 +706,27 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn, // "low registers" is specified by Inst{7-0} // lr|pc is specified by Inst{8} static bool DisassembleThumb1PushPop(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - assert((Opcode == ARM::tPUSH || Opcode == ARM::tPOP) && "Invalid opcode"); + assert((Opcode == ARM::tPUSH || Opcode == ARM::tPOP) && "Unexpected opcode"); unsigned &OpIdx = NumOpsAdded; // Handling the two predicate operands before the reglist. - MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); - MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - OpIdx = 2; + if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) + OpIdx += 2; + else { + DEBUG(errs() << "Expected predicate operands not found.\n"); + return false; + } - // Fill the variadic part of reglist. unsigned RegListBits = slice(insn, 8, 8) << (Opcode == ARM::tPUSH ? 14 : 15) | slice(insn, 7, 0); + + // Fill the variadic part of reglist. for (unsigned i = 0; i < 16; ++i) { if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, i))); ++OpIdx; } @@ -735,13 +748,13 @@ static bool DisassembleThumb1PushPop(MCInst &MI, unsigned Opcode, uint32_t insn, // no operand // Others: tRd tRn static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { if (NumOps == 0) return true; if (Opcode == ARM::tPUSH || Opcode == ARM::tPOP) - return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded, B); const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; @@ -799,16 +812,16 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, && "Expect >=2 operands"); // Add the destination operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRd(insn)))); if (OpInfo[1].RegClass == ARM::tGPRRegClassID) { // Two register instructions. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRn(insn)))); } else { // CBNZ, CBZ - assert((Opcode == ARM::tCBNZ || Opcode == ARM::tCBZ) && "Invalid opcode"); + assert((Opcode == ARM::tCBNZ || Opcode == ARM::tCBZ) &&"Unexpected opcode"); MI.addOperand(MCOperand::CreateImm(getT1Imm6(insn) * 2)); } @@ -823,42 +836,47 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, // tLDM_UPD/tSTM_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list // tLDM: tRt AM4ModeImm Pred-Imm Pred-CCR register_list static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert((Opcode == ARM::tLDM || Opcode == ARM::tLDM_UPD || - Opcode == ARM::tSTM_UPD) && "Invalid opcode"); + Opcode == ARM::tSTM_UPD) && "Unexpected opcode"); unsigned &OpIdx = NumOpsAdded; unsigned tRt = getT1tRt(insn); - unsigned RegListBits = slice(insn, 7, 0); OpIdx = 0; // WB register, if necessary. if (Opcode == ARM::tLDM_UPD || Opcode == ARM::tSTM_UPD) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, tRt))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, tRt))); ++OpIdx; // A8.6.53 LDM / LDMIA / LDMFD - Encoding T1 + // A8.6.53 STM / STMIA / STMEA - Encoding T1 MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))); ++OpIdx; // Handling the two predicate operands before the reglist. - MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); - MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - OpIdx += 2; + if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) + OpIdx += 2; + else { + DEBUG(errs() << "Expected predicate operands not found.\n"); + return false; + } + + unsigned RegListBits = slice(insn, 7, 0); // Fill the variadic part of reglist. for (unsigned i = 0; i < 8; ++i) { if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::tGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, i))); ++OpIdx; } @@ -868,13 +886,15 @@ static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode, } static bool DisassembleThumb1LdMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { - return DisassembleThumb1LdStMul(true, MI, Opcode, insn, NumOps, NumOpsAdded); + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + return DisassembleThumb1LdStMul(true, MI, Opcode, insn, NumOps, NumOpsAdded, + B); } static bool DisassembleThumb1StMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { - return DisassembleThumb1LdStMul(false, MI, Opcode, insn, NumOps, NumOpsAdded); + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + return DisassembleThumb1LdStMul(false, MI, Opcode, insn, NumOps, NumOpsAdded, + B); } // A8.6.16 B Encoding T1 @@ -885,12 +905,14 @@ static bool DisassembleThumb1StMul(MCInst &MI, unsigned Opcode, uint32_t insn, // tSVC: imm8 Pred-Imm Pred-CCR // tTRAP: 0 operand (early return) static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO) { if (Opcode == ARM::tTRAP) return true; const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; + assert(NumOps == 3 && OpInfo[0].RegClass == 0 && OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID && "Exactly 3 operands expected"); @@ -912,9 +934,11 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn, // // tB: offset static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; + assert(NumOps == 1 && OpInfo[0].RegClass == 0 && "1 imm operand expected"); unsigned Imm11 = getT1Imm11(insn); @@ -952,9 +976,8 @@ static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn, // 1101xx Conditional branch, and Supervisor Call on page A6-13 // 11100x Unconditional Branch, see B on page A8-44 // -static bool DisassembleThumb1(uint16_t op, - MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) { +static bool DisassembleThumb1(uint16_t op, MCInst &MI, unsigned Opcode, + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { unsigned op1 = slice(op, 5, 4); unsigned op2 = slice(op, 3, 2); @@ -963,27 +986,27 @@ static bool DisassembleThumb1(uint16_t op, switch (op1) { case 0: // A6.2.1 Shift (immediate), add, subtract, move, and compare - return DisassembleThumb1General(MI, Opcode, insn, NumOps, NumOpsAdded, - Builder); + return DisassembleThumb1General(MI, Opcode, insn, NumOps, NumOpsAdded, B); case 1: switch (op2) { case 0: switch (op3) { case 0: // A6.2.2 Data-processing - return DisassembleThumb1DP(MI, Opcode, insn, NumOps, NumOpsAdded, - Builder); + return DisassembleThumb1DP(MI, Opcode, insn, NumOps, NumOpsAdded, B); case 1: // A6.2.3 Special data instructions and branch and exchange - return DisassembleThumb1Special(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1Special(MI, Opcode, insn, NumOps, NumOpsAdded, + B); default: // A8.6.59 LDR (literal) - return DisassembleThumb1LdPC(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1LdPC(MI, Opcode, insn, NumOps, NumOpsAdded, B); } break; default: // A6.2.4 Load/store single data item - return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded, + B); break; } break; @@ -991,21 +1014,24 @@ static bool DisassembleThumb1(uint16_t op, switch (op2) { case 0: // A6.2.4 Load/store single data item - return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded, + B); case 1: // A6.2.4 Load/store single data item - return DisassembleThumb1LdStSP(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1LdStSP(MI, Opcode, insn, NumOps, NumOpsAdded, B); case 2: if (op3 <= 1) { // A8.6.10 ADR - return DisassembleThumb1AddPCi(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1AddPCi(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } else { // A8.6.8 ADD (SP plus immediate) - return DisassembleThumb1AddSPi(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1AddSPi(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } default: // A6.2.5 Miscellaneous 16-bit instructions - return DisassembleThumb1Misc(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1Misc(MI, Opcode, insn, NumOps, NumOpsAdded, B); } break; case 3: @@ -1013,17 +1039,17 @@ static bool DisassembleThumb1(uint16_t op, case 0: if (op3 <= 1) { // A8.6.189 STM / STMIA / STMEA - return DisassembleThumb1StMul(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1StMul(MI, Opcode, insn, NumOps, NumOpsAdded, B); } else { // A8.6.53 LDM / LDMIA / LDMFD - return DisassembleThumb1LdMul(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1LdMul(MI, Opcode, insn, NumOps, NumOpsAdded, B); } case 1: // A6.2.6 Conditional branch, and Supervisor Call - return DisassembleThumb1CondBr(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1CondBr(MI, Opcode, insn, NumOps, NumOpsAdded, B); case 2: // Unconditional Branch, see B on page A8-44 - return DisassembleThumb1Br(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb1Br(MI, Opcode, insn, NumOps, NumOpsAdded, B); default: assert(0 && "Unreachable code"); break; @@ -1079,32 +1105,32 @@ static bool DisassembleThumb2SRS(MCInst &MI, unsigned Opcode, uint32_t insn, // t2RFE[IA|DB]W/t2RFE[IA|DB]: Rn static bool DisassembleThumb2RFE(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); NumOpsAdded = 1; return true; } static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { if (Thumb2SRSOpcode(Opcode)) return DisassembleThumb2SRS(MI, Opcode, insn, NumOps, NumOpsAdded); if (Thumb2RFEOpcode(Opcode)) - return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded, B); assert((Opcode == ARM::t2LDM || Opcode == ARM::t2LDM_UPD || Opcode == ARM::t2STM || Opcode == ARM::t2STM_UPD) - && "Invalid opcode"); + && "Unexpected opcode"); assert(NumOps >= 5 && "Thumb2 LdStMul expects NumOps >= 5"); unsigned &OpIdx = NumOpsAdded; OpIdx = 0; - unsigned Base = getRegisterEnum(ARM::GPRRegClassID, decodeRn(insn)); + unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); // Writeback to base. if (Opcode == ARM::t2LDM_UPD || Opcode == ARM::t2STM_UPD) { @@ -1120,15 +1146,19 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn, ++OpIdx; // Handling the two predicate operands before the reglist. - MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); - MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - OpIdx += 2; + if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) + OpIdx += 2; + else { + DEBUG(errs() << "Expected predicate operands not found.\n"); + return false; + } - // Fill the variadic part of reglist. unsigned RegListBits = insn & ((1 << 16) - 1); + + // Fill the variadic part of reglist. for (unsigned i = 0; i < 16; ++i) { if ((RegListBits >> i) & 1) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, i))); ++OpIdx; } @@ -1144,9 +1174,11 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn, // t2STREXD: Rm Rd Rs Rn // t2STREXB, t2STREXH: Rm Rd Rn static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; + unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1163,25 +1195,25 @@ static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn, // Add the destination operand for store. if (isStore) { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, isSW ? decodeRs(insn) : decodeRm(insn)))); ++OpIdx; } // Source operand for store and destination operand for load. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; // Thumb2 doubleword complication: with an extra source/destination operand. if (isDW) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); ++OpIdx; } // Finally add the pointer operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; @@ -1198,9 +1230,10 @@ static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn, // Ditto for t2LDRD_PRE, t2LDRD_POST, t2STRD_PRE, t2STRD_POST, which are for // disassembly only and do not have a tied_to writeback base register operand. static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; assert(NumOps >= 4 && OpInfo[0].RegClass == ARM::GPRRegClassID @@ -1210,11 +1243,11 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, && "Expect >= 4 operands and first 3 as reg operands"); // Add the <Rt> <Rt2> operands. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); // Finally add (+/-)imm8*4, depending on the U bit. @@ -1235,15 +1268,15 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, // // t2TBBgen, t2TBHgen: Rn Rm Pred-Imm Pred-CCR static bool DisassembleThumb2TB(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { assert(NumOps >= 2 && "Expect >= 2 operands"); // The generic version of TBB/TBH needs a base register. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); // Add the index register. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); NumOpsAdded = 2; @@ -1278,7 +1311,7 @@ static inline bool Thumb2ShiftOpcode(unsigned Opcode) { // nothing else, because the shift amount is already specified. // Similar case holds for t2MOVrx, t2ADDrr, ..., etc. static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -1293,7 +1326,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, && OpInfo[3].RegClass == 0 && "Exactlt 4 operands expect and first two as reg operands"); // Only need to populate the src reg operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); MI.addOperand(MCOperand::CreateReg(0)); MI.addOperand(MCOperand::CreateImm(0)); @@ -1315,7 +1348,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, // Build the register operands, followed by the constant shift specifier. MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, NoDstReg ? decodeRn(insn) : decodeRs(insn)))); ++OpIdx; @@ -1324,15 +1357,18 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { // Process tied_to operand constraint. MI.addOperand(MI.getOperand(Idx)); - } else { - assert(!NoDstReg && "Internal error"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + ++OpIdx; + } else if (!NoDstReg) { + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); + ++OpIdx; + } else { + DEBUG(errs() << "Thumb2 encoding error: d==15 for three-reg operands.\n"); + return false; } - ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); ++OpIdx; @@ -1373,7 +1409,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, // // ModImm = ThumbExpandImm(i:imm3:imm8) static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; @@ -1389,13 +1425,16 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, // Build the register operands, followed by the modified immediate. MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(ARM::GPRRegClassID, + getRegisterEnum(B, ARM::GPRRegClassID, NoDstReg ? decodeRn(insn) : decodeRs(insn)))); ++OpIdx; if (TwoReg) { - assert(!NoDstReg && "Internal error"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + if (NoDstReg) { + DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n"); + return false; + } + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } @@ -1437,7 +1476,7 @@ static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) { case ARM::t2USAT16: return slice(insn, 3, 0); default: - assert(0 && "Invalid opcode passed in"); + assert(0 && "Unexpected opcode"); return 0; } } @@ -1459,7 +1498,7 @@ static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) { // o t2SSAT[lsl|asr], t2USAT[lsl|asr]: Rs sat_pos Rn shamt // o t2SSAT16, t2USAT16: Rs sat_pos Rn static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -1474,7 +1513,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, // Build the register operand(s), followed by the immediate(s). - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); ++OpIdx; @@ -1482,7 +1521,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, if (Thumb2SaturateOpcode(Opcode)) { MI.addOperand(MCOperand::CreateImm(decodeThumb2SaturatePos(Opcode, insn))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); if (Opcode == ARM::t2SSAT16 || Opcode == ARM::t2USAT16) { @@ -1510,7 +1549,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, MI.addOperand(MI.getOperand(Idx)); } else { // Add src reg operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); } ++OpIdx; @@ -1528,15 +1567,22 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn))); else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) MI.addOperand(MCOperand::CreateImm(getImm16(insn))); - else if (Opcode == ARM::t2BFC) - MI.addOperand(MCOperand::CreateImm(getBitfieldInvMask(insn))); - else { + else if (Opcode == ARM::t2BFC) { + uint32_t mask = 0; + if (getBitfieldInvMask(insn, mask)) + MI.addOperand(MCOperand::CreateImm(mask)); + else + return false; + } else { // Handle the case of: lsb width assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX || - Opcode == ARM::t2BFI) && "Invalid opcode"); + Opcode == ARM::t2BFI) && "Unexpected opcode"); MI.addOperand(MCOperand::CreateImm(getLsb(insn))); if (Opcode == ARM::t2BFI) { - assert(getMsb(insn) >= getLsb(insn) && "Encoding error"); + if (getMsb(insn) < getLsb(insn)) { + DEBUG(errs() << "Encoding error: msb < lsb\n"); + return false; + } MI.addOperand(MCOperand::CreateImm(getMsb(insn) - getLsb(insn) + 1)); } else MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1)); @@ -1585,7 +1631,7 @@ static inline bool t2MiscCtrlInstr(uint32_t insn) { // t2MSR/t2MSRsys -> Rn mask=Inst{11-8} // t2SMC -> imm4 = Inst{19-16} static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { if (NumOps == 0) return true; @@ -1627,21 +1673,21 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, // MRS and MRSsys take one GPR reg Rs. if (Opcode == ARM::t2MRS || Opcode == ARM::t2MRSsys) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); NumOpsAdded = 1; return true; } // BXJ takes one GPR reg Rn. if (Opcode == ARM::t2BXJ) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); NumOpsAdded = 1; return true; } // MSR and MSRsys take one GPR reg Rn, followed by the mask. if (Opcode == ARM::t2MSR || Opcode == ARM::t2MSRsys || Opcode == ARM::t2BXJ) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 8))); NumOpsAdded = 2; @@ -1659,7 +1705,7 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, switch (Opcode) { default: - assert(0 && "Unreachable code"); + assert(0 && "Unexpected opcode"); return false; case ARM::t2B: Offset = decodeImm32_B_EncodingT4(insn); @@ -1700,7 +1746,7 @@ static inline bool Thumb2PreloadOpcode(unsigned Opcode) { } static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { // Preload Data/Instruction requires either 2 or 3 operands. // t2PLDi12, t2PLDi8, t2PLDpci: Rn [+/-]imm12/imm8 @@ -1718,12 +1764,12 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, OpInfo[0].RegClass == ARM::GPRRegClassID && "Expect >= 2 operands and first one as reg operand"); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); } else { assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() @@ -1765,9 +1811,10 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, // These instrs calculate an address from the PC value and an immediate offset. // Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1) static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + if (!OpInfo) return false; assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID && @@ -1776,7 +1823,7 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, // Build the register operand, followed by the (+/-)imm12 immediate. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); MI.addOperand(MCOperand::CreateImm(decodeImm12(insn))); @@ -1812,16 +1859,16 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, // Delegates to DisassembleThumb2PreLoad() for preload data/instruction. // Delegates to DisassembleThumb2Ldpci() for load * literal operations. static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, - uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded) { + uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { unsigned Rn = decodeRn(insn); if (Thumb2PreloadOpcode(Opcode)) - return DisassembleThumb2PreLoad(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2PreLoad(MI, Opcode, insn, NumOps, NumOpsAdded, B); // See, for example, A6.3.7 Load word: Table A6-18 Load word. if (Load && Rn == 15) - return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B); const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -1870,13 +1917,16 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, Imm = decodeImm8(insn); } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, R0))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + R0))); ++OpIdx; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, R1))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + R1))); ++OpIdx; if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID,R2))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + R2))); ++OpIdx; } @@ -1900,7 +1950,7 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, // // Miscellaneous operations: Rs [Rn] Rm static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -1917,17 +1967,17 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); ++OpIdx; if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); ++OpIdx; @@ -1954,7 +2004,7 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, // Unsigned Sum of Absolute Differences [and Accumulate] // Rs Rn Rm [Ra=Inst{15-12}] static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; @@ -1968,17 +2018,17 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn, bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); if (FourReg) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); NumOpsAdded = FourReg ? 4 : 3; @@ -1999,7 +2049,7 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn, // // Signed/Unsigned divide: t2SDIV, t2UDIV: Rs Rn Rm static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; @@ -2014,16 +2064,16 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn, // Build the register operands. if (FourReg) - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); if (FourReg) @@ -2059,38 +2109,41 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn, // 1xxxxxx - Coprocessor instructions on page A6-40 // static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, - MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded) { + MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, + unsigned &NumOpsAdded, BO B) { switch (op1) { case 1: if (slice(op2, 6, 5) == 0) { if (slice(op2, 2, 2) == 0) { // Load/store multiple. - return DisassembleThumb2LdStMul(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2LdStMul(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } // Load/store dual, load/store exclusive, table branch, otherwise. - assert(slice(op2, 2, 2) == 1 && "Encoding error"); + assert(slice(op2, 2, 2) == 1 && "Thumb2 encoding error!"); if ((ARM::t2LDREX <= Opcode && Opcode <= ARM::t2LDREXH) || (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH)) { // Load/store exclusive. - return DisassembleThumb2LdStEx(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2LdStEx(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } if (Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST || Opcode == ARM::t2STRDi8 || Opcode == ARM::t2STRD_PRE || Opcode == ARM::t2STRD_POST) { // Load/store dual. - return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } if (Opcode == ARM::t2TBBgen || Opcode == ARM::t2TBHgen) { // Table branch. - return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded, B); } } else if (slice(op2, 6, 5) == 1) { // Data-processing (shifted register). - return DisassembleThumb2DPSoReg(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2DPSoReg(MI, Opcode, insn, NumOps, NumOpsAdded, B); } // FIXME: A6.3.18 Coprocessor instructions @@ -2101,14 +2154,17 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, if (op == 0) { if (slice(op2, 5, 5) == 0) { // Data-processing (modified immediate) - return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } else { // Data-processing (plain binary immediate) - return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } } else { // Branches and miscellaneous control on page A6-20. - return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } break; @@ -2119,7 +2175,8 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, if (slice(op2, 0, 0) == 0) { if (slice(op2, 4, 4) == 0) { // Store single data item on page A6-30 - return DisassembleThumb2LdSt(false, MI,Opcode,insn,NumOps,NumOpsAdded); + return DisassembleThumb2LdSt(false, MI,Opcode,insn,NumOps,NumOpsAdded, + B); } else { // FIXME: Advanced SIMD element or structure load/store instructions. // But see ThumbDisassembler::getInstruction(). @@ -2127,19 +2184,20 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, } } else { // Table A6-9 32-bit Thumb instruction encoding: Load byte|halfword|word - return DisassembleThumb2LdSt(true, MI,Opcode,insn,NumOps,NumOpsAdded); + return DisassembleThumb2LdSt(true, MI,Opcode,insn,NumOps,NumOpsAdded, B); } break; case 1: if (slice(op2, 4, 4) == 0) { // A6.3.12 Data-processing (register) - return DisassembleThumb2DPReg(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2DPReg(MI, Opcode, insn, NumOps, NumOpsAdded, B); } else if (slice(op2, 3, 3) == 0) { // A6.3.16 Multiply, multiply accumulate, and absolute difference - return DisassembleThumb2Mul(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2Mul(MI, Opcode, insn, NumOps, NumOpsAdded, B); } else { // A6.3.17 Long multiply, long multiply accumulate, and divide - return DisassembleThumb2LongMul(MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2LongMul(MI, Opcode, insn, NumOps, NumOpsAdded, + B); } break; default: @@ -2151,7 +2209,7 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op, break; default: - assert(0 && "Encoding error for Thumb2 instruction!"); + assert(0 && "Thumb2 encoding error!"); break; } @@ -2174,8 +2232,10 @@ static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned bits15_11 = slice(HalfWord, 15, 11); // A6.1 Thumb instruction set encoding - assert((bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) && - "Bits [15:11] of first halfword of a Thumb2 instruction out of range"); + if (!(bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F)) { + assert("Bits[15:11] first halfword of Thumb2 instruction is out of range"); + return false; + } // A6.3 32-bit Thumb instruction encoding @@ -2183,5 +2243,6 @@ static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn, uint16_t op2 = slice(HalfWord, 10, 4); uint16_t op = slice(insn, 15, 15); - return DisassembleThumb2(op1, op2, op, MI, Opcode, insn, NumOps, NumOpsAdded); + return DisassembleThumb2(op1, op2, op, MI, Opcode, insn, NumOps, NumOpsAdded, + Builder); } diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index a8dd38cb362e2..9e3ff29e07c42 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -16,8 +16,9 @@ BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \ ARMGenRegisterInfo.inc ARMGenInstrNames.inc \ ARMGenInstrInfo.inc ARMGenAsmWriter.inc \ ARMGenDAGISel.inc ARMGenSubtarget.inc \ - ARMGenCodeEmitter.inc ARMGenCallingConv.inc + ARMGenCodeEmitter.inc ARMGenCallingConv.inc \ + ARMGenDecoderTables.inc ARMGenEDInfo.inc -DIRS = AsmPrinter AsmParser TargetInfo +DIRS = AsmPrinter AsmParser Disassembler TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index a5dfcb34f7bc6..2f635fe50ad5f 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -36,9 +36,12 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -using namespace llvm; +namespace llvm { extern cl::opt<bool> ReuseFrameIndexVals; +} + +using namespace llvm; Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &sti) @@ -56,7 +59,7 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, unsigned PredReg) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); - Constant *C = ConstantInt::get( + const Constant *C = ConstantInt::get( Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); @@ -461,6 +464,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset -= AFI->getFramePtrSpillOffset(); } + // Special handling of dbg_value instructions. + if (MI.isDebugValue()) { + MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(i+1).ChangeToImmediate(Offset); + return 0; + } + unsigned Opcode = MI.getOpcode(); const TargetInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index de4605669315c..b143bd978ba5e 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -44,18 +44,22 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); - if (DestRC == ARM::GPRRegisterClass && - SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); - return true; - } else if (DestRC == ARM::GPRRegisterClass && - SrcRC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg); - return true; - } else if (DestRC == ARM::tGPRRegisterClass && - SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg); - return true; + if (DestRC == ARM::GPRRegisterClass) { + if (SrcRC == ARM::GPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); + return true; + } else if (SrcRC == ARM::tGPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg); + return true; + } + } else if (DestRC == ARM::tGPRRegisterClass) { + if (SrcRC == ARM::GPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg); + return true; + } else if (SrcRC == ARM::tGPRRegisterClass) { + BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg); + return true; + } } // Handle SPR, DPR, and QPR copies. diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index f24d3e256ff3f..07dd0be078d7c 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -52,7 +52,7 @@ void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, unsigned PredReg) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); - Constant *C = ConstantInt::get( + const Constant *C = ConstantInt::get( Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 2bc75f28d5039..8fe2e42a7cdd9 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -656,15 +656,8 @@ static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) { bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Modified = false; - bool LiveCPSR = false; // Yes, CPSR could be livein. - for (MachineBasicBlock::const_livein_iterator I = MBB.livein_begin(), - E = MBB.livein_end(); I != E; ++I) { - if (*I == ARM::CPSR) { - LiveCPSR = true; - break; - } - } + bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MachineBasicBlock::iterator NextMII; diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp index eb5e4290ee4f0..a6c6f52704f6b 100644 --- a/lib/Target/Alpha/AlphaCodeEmitter.cpp +++ b/lib/Target/Alpha/AlphaCodeEmitter.cpp @@ -192,10 +192,13 @@ unsigned AlphaCodeEmitter::getMachineOpValue(const MachineInstr &MI, llvm_unreachable("unknown relocatable instruction"); } if (MO.isGlobal()) - MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), - Reloc, MO.getGlobal(), Offset, - isa<Function>(MO.getGlobal()), - useGOT)); + MCE.addRelocation(MachineRelocation::getGV( + MCE.getCurrentPCOffset(), + Reloc, + const_cast<GlobalValue *>(MO.getGlobal()), + Offset, + isa<Function>(MO.getGlobal()), + useGOT)); else if (MO.isSymbol()) MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), Reloc, MO.getSymbolName(), diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp index 5303d853cca2c..d526dc0827b26 100644 --- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp +++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp @@ -14,7 +14,6 @@ #include "Alpha.h" #include "AlphaTargetMachine.h" -#include "AlphaISelLowering.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -309,7 +308,7 @@ SDNode *AlphaDAGToDAGISel::Select(SDNode *N) { T, CurDAG->getRegister(Alpha::F31, T), CurDAG->getRegister(Alpha::F31, T)); } else { - llvm_report_error("Unhandled FP constant type"); + report_fatal_error("Unhandled FP constant type"); } break; } diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index 94c6f80c03617..1d85f12c3ef9a 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -13,6 +13,7 @@ #include "AlphaISelLowering.h" #include "AlphaTargetMachine.h" +#include "AlphaMachineFunctionInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -225,7 +226,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Alpha target does not yet support tail call optimization. isTailCall = false; @@ -342,7 +343,7 @@ AlphaTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; @@ -385,10 +386,12 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); + AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>(); unsigned args_int[] = { Alpha::R16, Alpha::R17, Alpha::R18, Alpha::R19, Alpha::R20, Alpha::R21}; @@ -435,14 +438,14 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, // If the functions takes variable number of arguments, copy all regs to stack if (isVarArg) { - VarArgsOffset = Ins.size() * 8; + FuncInfo->setVarArgsOffset(Ins.size() * 8); std::vector<SDValue> LS; for (int i = 0; i < 6; ++i) { if (TargetRegisterInfo::isPhysicalRegister(args_int[i])) args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass); SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64); int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true, false); - if (i == 0) VarArgsBase = FI; + if (i == 0) FuncInfo->setVarArgsBase(FI); SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64); LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0, false, false, 0)); @@ -467,7 +470,7 @@ SDValue AlphaTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { SDValue Copy = DAG.getCopyToReg(Chain, dl, Alpha::R26, DAG.getNode(AlphaISD::GlobalRetAddr, @@ -525,7 +528,8 @@ AlphaTargetLowering::LowerReturn(SDValue Chain, } void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, - SDValue &DataPtr, SelectionDAG &DAG) { + SDValue &DataPtr, + SelectionDAG &DAG) const { Chain = N->getOperand(0); SDValue VAListP = N->getOperand(1); const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue(); @@ -556,7 +560,8 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, /// LowerOperation - Provide custom lowering hooks for some operations. /// -SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue AlphaTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); switch (Op.getOpcode()) { default: llvm_unreachable("Wasn't expecting to be able to lower this!"); @@ -624,7 +629,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { } case ISD::ConstantPool: { ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); - Constant *C = CP->getConstVal(); + const Constant *C = CP->getConstVal(); SDValue CPI = DAG.getTargetConstantPool(C, MVT::i64, CP->getAlignment()); // FIXME there isn't really any debug info here @@ -637,7 +642,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { llvm_unreachable("TLS not implemented for Alpha."); case ISD::GlobalAddress: { GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); - GlobalValue *GV = GSDN->getGlobal(); + const GlobalValue *GV = GSDN->getGlobal(); SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i64, GSDN->getOffset()); // FIXME there isn't really any debug info here @@ -725,17 +730,22 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { false, false, 0); } case ISD::VASTART: { + MachineFunction &MF = DAG.getMachineFunction(); + AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>(); + SDValue Chain = Op.getOperand(0); SDValue VAListP = Op.getOperand(1); const Value *VAListS = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); // vastart stores the address of the VarArgsBase and VarArgsOffset - SDValue FR = DAG.getFrameIndex(VarArgsBase, MVT::i64); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsBase(), MVT::i64); SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP, VAListS, 0, false, false, 0); SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP, DAG.getConstant(8, MVT::i64)); - return DAG.getTruncStore(S1, dl, DAG.getConstant(VarArgsOffset, MVT::i64), + return DAG.getTruncStore(S1, dl, + DAG.getConstant(FuncInfo->getVarArgsOffset(), + MVT::i64), SA2, NULL, 0, MVT::i32, false, false, 0); } case ISD::RETURNADDR: @@ -749,7 +759,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { void AlphaTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = N->getDebugLoc(); assert(N->getValueType(0) == MVT::i32 && N->getOpcode() == ISD::VAARG && @@ -822,8 +832,7 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, MachineBasicBlock * AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); assert((MI->getOpcode() == Alpha::CAS32 || MI->getOpcode() == Alpha::CAS64 || @@ -854,11 +863,6 @@ AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - // Inform sdisel of the edge changes. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - EM->insert(std::make_pair(*I, sinkMBB)); - sinkMBB->transferSuccessors(thisMBB); F->insert(It, llscMBB); diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h index 0f17025b77476..7ee823a86a4d8 100644 --- a/lib/Target/Alpha/AlphaISelLowering.h +++ b/lib/Target/Alpha/AlphaISelLowering.h @@ -60,8 +60,6 @@ namespace llvm { } class AlphaTargetLowering : public TargetLowering { - int VarArgsOffset; // What is the offset to the first vaarg - int VarArgsBase; // What is the base FrameIndex public: explicit AlphaTargetLowering(TargetMachine &TM); @@ -70,13 +68,13 @@ namespace llvm { /// LowerOperation - Provide custom lowering hooks for some operations. /// - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. /// virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG); + SelectionDAG &DAG) const; // Friendly names for dumps const char *getTargetNodeName(unsigned Opcode) const; @@ -85,7 +83,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; ConstraintType getConstraintType(const std::string &Constraint) const; @@ -93,9 +91,9 @@ namespace llvm { getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; @@ -110,14 +108,14 @@ namespace llvm { private: // Helpers for custom lowering. void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr, - SelectionDAG &DAG); + SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, @@ -125,13 +123,13 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; }; } diff --git a/lib/Target/Alpha/AlphaJITInfo.cpp b/lib/Target/Alpha/AlphaJITInfo.cpp index cb8eb514656fa..12685ed17e3c5 100644 --- a/lib/Target/Alpha/AlphaJITInfo.cpp +++ b/lib/Target/Alpha/AlphaJITInfo.cpp @@ -103,7 +103,7 @@ extern "C" { asm( ".text\n" - ".globl AlphaComilationCallbackC\n" + ".globl AlphaCompilationCallbackC\n" ".align 4\n" ".globl AlphaCompilationCallback\n" ".ent AlphaCompilationCallback\n" diff --git a/lib/Target/Alpha/AlphaMachineFunctionInfo.h b/lib/Target/Alpha/AlphaMachineFunctionInfo.h index 8221fc7a7c97a..186738c20c706 100644 --- a/lib/Target/Alpha/AlphaMachineFunctionInfo.h +++ b/lib/Target/Alpha/AlphaMachineFunctionInfo.h @@ -30,17 +30,31 @@ class AlphaMachineFunctionInfo : public MachineFunctionInfo { /// the return address value. unsigned GlobalRetAddr; + /// VarArgsOffset - What is the offset to the first vaarg + int VarArgsOffset; + /// VarArgsBase - What is the base FrameIndex + int VarArgsBase; + public: - AlphaMachineFunctionInfo() : GlobalBaseReg(0), GlobalRetAddr(0) {} + AlphaMachineFunctionInfo() : GlobalBaseReg(0), GlobalRetAddr(0), + VarArgsOffset(0), VarArgsBase(0) {} explicit AlphaMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0), - GlobalRetAddr(0) {} + GlobalRetAddr(0), + VarArgsOffset(0), + VarArgsBase(0) {} unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } unsigned getGlobalRetAddr() const { return GlobalRetAddr; } void setGlobalRetAddr(unsigned Reg) { GlobalRetAddr = Reg; } + + int getVarArgsOffset() const { return VarArgsOffset; } + void setVarArgsOffset(int Offset) { VarArgsOffset = Offset; } + + int getVarArgsBase() const { return VarArgsBase; } + void setVarArgsBase(int Base) { VarArgsBase = Base; } }; } // End llvm namespace diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp index 16a23cc120fb8..c083d8c30b16b 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -212,15 +212,14 @@ void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const { //handle GOP offset BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29) - .addGlobalAddress(const_cast<Function*>(MF.getFunction())) + .addGlobalAddress(MF.getFunction()) .addReg(Alpha::R27).addImm(++curgpdist); BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29) - .addGlobalAddress(const_cast<Function*>(MF.getFunction())) + .addGlobalAddress(MF.getFunction()) .addReg(Alpha::R29).addImm(curgpdist); - //evil const_cast until MO stuff setup to handle const BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT)) - .addGlobalAddress(const_cast<Function*>(MF.getFunction())); + .addGlobalAddress(MF.getFunction()); // Get the number of bytes to allocate from the FrameInfo long NumBytes = MFI->getStackSize(); @@ -248,10 +247,7 @@ void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30) .addImm(getLower16(NumBytes)).addReg(Alpha::R30); } else { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Too big a stack frame at " << NumBytes; - llvm_report_error(Msg.str()); + report_fatal_error("Too big a stack frame at " + Twine(NumBytes)); } //now if we need to, save the old FP and set the new @@ -300,10 +296,7 @@ void AlphaRegisterInfo::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30) .addImm(getLower16(NumBytes)).addReg(Alpha::R30); } else { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Too big a stack frame at " << NumBytes; - llvm_report_error(Msg.str()); + report_fatal_error("Too big a stack frame at " + Twine(NumBytes)); } } } diff --git a/lib/Target/Alpha/AlphaSchedule.td b/lib/Target/Alpha/AlphaSchedule.td index b7b4560847090..4dc04b88a70b2 100644 --- a/lib/Target/Alpha/AlphaSchedule.td +++ b/lib/Target/Alpha/AlphaSchedule.td @@ -53,7 +53,8 @@ def s_pseudo : InstrItinClass; //Table 24 Instruction Class Latency in Cycles //modified some -def Alpha21264Itineraries : ProcessorItineraries<[ +def Alpha21264Itineraries : ProcessorItineraries< + [L0, L1, FST0, FST1, U0, U1, FA, FM], [ InstrItinData<s_ild , [InstrStage<3, [L0, L1]>]>, InstrItinData<s_fld , [InstrStage<4, [L0, L1]>]>, InstrItinData<s_ist , [InstrStage<0, [L0, L1]>]>, diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp new file mode 100644 index 0000000000000..0eb7b8f28a687 --- /dev/null +++ b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- AlphaSelectionDAGInfo.cpp - Alpha SelectionDAG Info ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AlphaSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "alpha-selectiondag-info" +#include "AlphaSelectionDAGInfo.h" +using namespace llvm; + +AlphaSelectionDAGInfo::AlphaSelectionDAGInfo() { +} + +AlphaSelectionDAGInfo::~AlphaSelectionDAGInfo() { +} diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.h b/lib/Target/Alpha/AlphaSelectionDAGInfo.h new file mode 100644 index 0000000000000..70889ae422e07 --- /dev/null +++ b/lib/Target/Alpha/AlphaSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- AlphaSelectionDAGInfo.h - Alpha SelectionDAG Info -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Alpha subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef ALPHASELECTIONDAGINFO_H +#define ALPHASELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class AlphaSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + AlphaSelectionDAGInfo(); + ~AlphaSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h index 6f3a774a1eaf8..0990f6d7032bd 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.h +++ b/lib/Target/Alpha/AlphaTargetMachine.h @@ -44,8 +44,8 @@ public: virtual const AlphaRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } - virtual AlphaTargetLowering* getTargetLowering() const { - return const_cast<AlphaTargetLowering*>(&TLInfo); + virtual const AlphaTargetLowering* getTargetLowering() const { + return &TLInfo; } virtual const TargetData *getTargetData() const { return &DataLayout; } virtual AlphaJITInfo* getJITInfo() { diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp index 2a1f5559053f1..9f4aff6fd5f53 100644 --- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp +++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp @@ -73,7 +73,7 @@ namespace { void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(opNum); - if (MO.getType() == MachineOperand::MO_Register) { + if (MO.isReg()) { assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && "Not physreg??"); O << getRegisterName(MO.getReg()); diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt index b4f41aebd8db0..fbf7f3ab6b30f 100644 --- a/lib/Target/Alpha/CMakeLists.txt +++ b/lib/Target/Alpha/CMakeLists.txt @@ -23,6 +23,7 @@ add_llvm_target(AlphaCodeGen AlphaRegisterInfo.cpp AlphaSubtarget.cpp AlphaTargetMachine.cpp + AlphaSelectionDAGInfo.cpp ) target_link_libraries (LLVMAlphaCodeGen LLVMSelectionDAG) diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp index c8d71aabd1bdd..b4da96cba59ec 100644 --- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp +++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "Blackfin.h" -#include "BlackfinISelLowering.h" #include "BlackfinTargetMachine.h" #include "BlackfinRegisterInfo.h" #include "llvm/Intrinsics.h" diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index 5ce201347dc88..adf2118818706 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -139,15 +139,16 @@ MVT::SimpleValueType BlackfinTargetLowering::getSetCCResultType(EVT VT) const { } SDValue BlackfinTargetLowering::LowerGlobalAddress(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); Op = DAG.getTargetGlobalAddress(GV, MVT::i32); return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op); } -SDValue BlackfinTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { +SDValue BlackfinTargetLowering::LowerJumpTable(SDValue Op, + SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); int JTI = cast<JumpTableSDNode>(Op)->getIndex(); @@ -161,7 +162,8 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -218,7 +220,7 @@ SDValue BlackfinTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to locations. SmallVector<CCValAssign, 16> RVLocs; @@ -278,7 +280,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Blackfin target does not yet support tail call optimization. isTailCall = false; @@ -414,7 +416,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Expansion of ADDE / SUBE. This is a bit involved since blackfin doesn't have // add-with-carry instructions. -SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) { +SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) const { // Operands: lhs, rhs, carry-in (AC0 flag) // Results: sum, carry-out (AC0 flag) DebugLoc dl = Op.getDebugLoc(); @@ -448,7 +450,8 @@ SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) { return DAG.getMergeValues(ops, 2, dl); } -SDValue BlackfinTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue BlackfinTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: Op.getNode()->dump(); @@ -468,7 +471,7 @@ SDValue BlackfinTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { void BlackfinTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { default: diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h index 5f399103f157e..a7842482687f6 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.h +++ b/lib/Target/Blackfin/BlackfinISelLowering.h @@ -30,16 +30,13 @@ namespace llvm { } class BlackfinTargetLowering : public TargetLowering { - int VarArgsFrameOffset; // Frame offset to start of varargs area. public: BlackfinTargetLowering(TargetMachine &TM); virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG); - - int getVarArgsFrameOffset() const { return VarArgsFrameOffset; } + SelectionDAG &DAG) const; ConstraintType getConstraintType(const std::string &Constraint) const; std::pair<unsigned, const TargetRegisterClass*> @@ -52,29 +49,29 @@ namespace llvm { unsigned getFunctionAlignment(const Function *F) const; private: - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); - SDValue LowerADDE(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADDE(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; }; } // end namespace llvm diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp index 6fd610fa3b537..2512c9b7fb1de 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp @@ -110,7 +110,8 @@ BlackfinRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const { // if frame pointer elimination is disabled. bool BlackfinRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return NoFramePointerElim || MFI->hasCalls() || MFI->hasVarSizedObjects(); + return DisableFramePointerElim(MF) || + MFI->hasCalls() || MFI->hasVarSizedObjects(); } bool BlackfinRegisterInfo:: diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp new file mode 100644 index 0000000000000..f4bb25fb75297 --- /dev/null +++ b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- BlackfinSelectionDAGInfo.cpp - Blackfin SelectionDAG Info ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the BlackfinSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "blackfin-selectiondag-info" +#include "BlackfinSelectionDAGInfo.h" +using namespace llvm; + +BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo() { +} + +BlackfinSelectionDAGInfo::~BlackfinSelectionDAGInfo() { +} diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h new file mode 100644 index 0000000000000..a620330d18d41 --- /dev/null +++ b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- BlackfinSelectionDAGInfo.h - Blackfin SelectionDAG Info -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Blackfin subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef BLACKFINSELECTIONDAGINFO_H +#define BLACKFINSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class BlackfinSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + BlackfinSelectionDAGInfo(); + ~BlackfinSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h index a14052bc4db54..07e73944143da 100644 --- a/lib/Target/Blackfin/BlackfinTargetMachine.h +++ b/lib/Target/Blackfin/BlackfinTargetMachine.h @@ -43,8 +43,8 @@ namespace llvm { virtual const BlackfinRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } - virtual BlackfinTargetLowering* getTargetLowering() const { - return const_cast<BlackfinTargetLowering*>(&TLInfo); + virtual const BlackfinTargetLowering* getTargetLowering() const { + return &TLInfo; } virtual const TargetData *getTargetData() const { return &DataLayout; } virtual bool addInstSelector(PassManagerBase &PM, diff --git a/lib/Target/Blackfin/CMakeLists.txt b/lib/Target/Blackfin/CMakeLists.txt index deb005d89eb51..f8847d057da61 100644 --- a/lib/Target/Blackfin/CMakeLists.txt +++ b/lib/Target/Blackfin/CMakeLists.txt @@ -20,4 +20,5 @@ add_llvm_target(BlackfinCodeGen BlackfinRegisterInfo.cpp BlackfinSubtarget.cpp BlackfinTargetMachine.cpp + BlackfinSelectionDAGInfo.cpp ) diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 0c265adf7419e..67f513b09860c 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -274,8 +274,8 @@ namespace { // isInlineAsm - Check if the instruction is a call to an inline asm chunk static bool isInlineAsm(const Instruction& I) { - if (isa<CallInst>(&I) && isa<InlineAsm>(I.getOperand(0))) - return true; + if (const CallInst *CI = dyn_cast<CallInst>(&I)) + return isa<InlineAsm>(CI->getCalledValue()); return false; } @@ -473,8 +473,9 @@ void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out, PrintedType = true; } if (FTy->isVarArg()) { - if (PrintedType) - FunctionInnards << ", ..."; + if (!PrintedType) + FunctionInnards << " int"; //dummy argument for empty vararg functs + FunctionInnards << ", ..."; } else if (!PrintedType) { FunctionInnards << "void"; } @@ -568,8 +569,9 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty, ++Idx; } if (FTy->isVarArg()) { - if (FTy->getNumParams()) - FunctionInnards << ", ..."; + if (!FTy->getNumParams()) + FunctionInnards << " int"; //dummy argument for empty vaarg functs + FunctionInnards << ", ..."; } else if (!FTy->getNumParams()) { FunctionInnards << "void"; } @@ -1344,7 +1346,7 @@ void CWriter::writeInstComputationInline(Instruction &I) { Ty!=Type::getInt16Ty(I.getContext()) && Ty!=Type::getInt32Ty(I.getContext()) && Ty!=Type::getInt64Ty(I.getContext()))) { - llvm_report_error("The C backend does not currently support integer " + report_fatal_error("The C backend does not currently support integer " "types of widths other than 1, 8, 16, 32, 64.\n" "This is being tracked as PR 4158."); } @@ -2237,12 +2239,16 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) { } } + if (!PrintedArg && FT->isVarArg()) { + FunctionInnards << "int vararg_dummy_arg"; + PrintedArg = true; + } + // Finish printing arguments... if this is a vararg function, print the ..., // unless there are no known types, in which case, we just emit (). // if (FT->isVarArg() && PrintedArg) { - if (PrintedArg) FunctionInnards << ", "; - FunctionInnards << "..."; // Output varargs portion of signature! + FunctionInnards << ",..."; // Output varargs portion of signature! } else if (!FT->isVarArg() && !PrintedArg) { FunctionInnards << "void"; // ret() -> ret(void) in C. } @@ -2858,7 +2864,7 @@ void CWriter::lowerIntrinsics(Function &F) { } void CWriter::visitCallInst(CallInst &I) { - if (isa<InlineAsm>(I.getOperand(0))) + if (isa<InlineAsm>(I.getCalledValue())) return visitInlineAsm(I); bool WroteCallee = false; @@ -2928,6 +2934,12 @@ void CWriter::visitCallInst(CallInst &I) { Out << '('; + bool PrintedArg = false; + if(FTy->isVarArg() && !FTy->getNumParams()) { + Out << "0 /*dummy arg*/"; + PrintedArg = true; + } + unsigned NumDeclaredParams = FTy->getNumParams(); CallSite::arg_iterator AI = I.op_begin()+1, AE = I.op_end(); @@ -2937,7 +2949,7 @@ void CWriter::visitCallInst(CallInst &I) { ++ArgNo; } - bool PrintedArg = false; + for (; AI != AE; ++AI, ++ArgNo) { if (PrintedArg) Out << ", "; if (ArgNo < NumDeclaredParams && @@ -2987,15 +2999,10 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, writeOperand(I.getOperand(1)); Out << ", "; // Output the last argument to the enclosing function. - if (I.getParent()->getParent()->arg_empty()) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "The C backend does not currently support zero " - << "argument varargs functions, such as '" - << I.getParent()->getParent()->getName() << "'!"; - llvm_report_error(Msg.str()); - } - writeOperand(--I.getParent()->getParent()->arg_end()); + if (I.getParent()->getParent()->arg_empty()) + Out << "vararg_dummy_arg"; + else + writeOperand(--I.getParent()->getParent()->arg_end()); Out << ')'; return true; case Intrinsic::vaend: @@ -3165,7 +3172,7 @@ static std::string gccifyAsm(std::string asmstr) { //TODO: assumptions about what consume arguments from the call are likely wrong // handle communitivity void CWriter::visitInlineAsm(CallInst &CI) { - InlineAsm* as = cast<InlineAsm>(CI.getOperand(0)); + InlineAsm* as = cast<InlineAsm>(CI.getCalledValue()); std::vector<InlineAsm::ConstraintInfo> Constraints = as->ParseConstraints(); std::vector<std::pair<Value*, int> > ResultVals; diff --git a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt index 1e508fe18908c..8a2b59a88a68c 100644 --- a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt +++ b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt @@ -1,9 +1,9 @@ -include_directories(
- ${CMAKE_CURRENT_BINARY_DIR}/..
- ${CMAKE_CURRENT_SOURCE_DIR}/..
- )
-
-add_llvm_library(LLVMCellSPUAsmPrinter
+include_directories( + ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. + ) + +add_llvm_library(LLVMCellSPUAsmPrinter SPUAsmPrinter.cpp - )
-add_dependencies(LLVMCellSPUAsmPrinter CellSPUCodeGenTable_gen)
\ No newline at end of file + ) +add_dependencies(LLVMCellSPUAsmPrinter CellSPUCodeGenTable_gen) diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp index 0ef36e550d037..3e955310b513d 100644 --- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp @@ -280,7 +280,7 @@ namespace { void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { switch (MO.getType()) { case MachineOperand::MO_Immediate: - llvm_report_error("printOp() does not handle immediate values"); + report_fatal_error("printOp() does not handle immediate values"); return; case MachineOperand::MO_MachineBasicBlock: @@ -307,7 +307,7 @@ void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { // External or weakly linked global variables need non-lazily-resolved // stubs if (TM.getRelocationModel() != Reloc::Static) { - GlobalValue *GV = MO.getGlobal(); + const GlobalValue *GV = MO.getGlobal(); if (((GV->isDeclaration() || GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) { O << *GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt index 0cb6676d7df71..ddfca37d23e33 100644 --- a/lib/Target/CellSPU/CMakeLists.txt +++ b/lib/Target/CellSPU/CMakeLists.txt @@ -21,6 +21,7 @@ add_llvm_target(CellSPUCodeGen SPURegisterInfo.cpp SPUSubtarget.cpp SPUTargetMachine.cpp + SPUSelectionDAGInfo.cpp ) target_link_libraries (LLVMCellSPUCodeGen LLVMSelectionDAG) diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 90f83100cfabb..c3c2b3947e064 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -14,7 +14,6 @@ #include "SPU.h" #include "SPUTargetMachine.h" -#include "SPUISelLowering.h" #include "SPUHazardRecognizers.h" #include "SPUFrameInfo.h" #include "SPURegisterNames.h" @@ -194,11 +193,8 @@ namespace { #ifndef NDEBUG if (retval == 0) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns NULL for " - << VT.getEVTString(); - llvm_report_error(Msg.str()); + report_fatal_error("SPUISelDAGToDAG.cpp: getValueTypeMapEntry returns" + "NULL for " + Twine(VT.getEVTString())); } #endif @@ -242,8 +238,8 @@ namespace { class SPUDAGToDAGISel : public SelectionDAGISel { - SPUTargetMachine &TM; - SPUTargetLowering &SPUtli; + const SPUTargetMachine &TM; + const SPUTargetLowering &SPUtli; unsigned GlobalBaseReg; public: @@ -305,16 +301,15 @@ namespace { std::vector<Constant*> CV; for (size_t i = 0; i < bvNode->getNumOperands(); ++i) { - ConstantSDNode *V = dyn_cast<ConstantSDNode > (bvNode->getOperand(i)); + ConstantSDNode *V = cast<ConstantSDNode > (bvNode->getOperand(i)); CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue())); } - Constant *CP = ConstantVector::get(CV); + const Constant *CP = ConstantVector::get(CV); SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy()); unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); SDValue CGPoolOffset = - SPU::LowerConstantPool(CPIdx, *CurDAG, - SPUtli.getSPUTargetMachine()); + SPU::LowerConstantPool(CPIdx, *CurDAG, TM); HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl, CurDAG->getEntryNode(), CGPoolOffset, @@ -433,13 +428,13 @@ SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base, case ISD::Constant: case ISD::ConstantPool: case ISD::GlobalAddress: - llvm_report_error("SPU SelectAFormAddr: Constant/Pool/Global not lowered."); + report_fatal_error("SPU SelectAFormAddr: Constant/Pool/Global not lowered."); /*NOTREACHED*/ case ISD::TargetConstant: case ISD::TargetGlobalAddress: case ISD::TargetJumpTable: - llvm_report_error("SPUSelectAFormAddr: Target Constant/Pool/Global " + report_fatal_error("SPUSelectAFormAddr: Target Constant/Pool/Global " "not wrapped as A-form address."); /*NOTREACHED*/ @@ -457,7 +452,7 @@ SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base, case ISD::TargetGlobalAddress: { GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op0); - GlobalValue *GV = GSDN->getGlobal(); + const GlobalValue *GV = GSDN->getGlobal(); if (GV->getAlignment() == 16) { Base = Op0; Index = Zero; @@ -510,7 +505,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, if (Opc == ISD::FrameIndex) { // Stack frame index must be less than 512 (divided by 16): - FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N); + FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(N); int FI = int(FIN->getIndex()); DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = " << FI << "\n"); @@ -531,11 +526,11 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, return true; } else if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) { - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1); + ConstantSDNode *CN = cast<ConstantSDNode>(Op1); int32_t offset = int32_t(CN->getSExtValue()); if (Op0.getOpcode() == ISD::FrameIndex) { - FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op0); + FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op0); int FI = int(FIN->getIndex()); DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset << " frame index = " << FI << "\n"); @@ -552,11 +547,11 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, } } else if (Op0.getOpcode() == ISD::Constant || Op0.getOpcode() == ISD::TargetConstant) { - ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0); + ConstantSDNode *CN = cast<ConstantSDNode>(Op0); int32_t offset = int32_t(CN->getSExtValue()); if (Op1.getOpcode() == ISD::FrameIndex) { - FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op1); + FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op1); int FI = int(FIN->getIndex()); DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset << " frame index = " << FI << "\n"); @@ -725,7 +720,7 @@ SPUDAGToDAGISel::Select(SDNode *N) { switch (Op0VT.getSimpleVT().SimpleTy) { default: - llvm_report_error("CellSPU Select: Unhandled zero/any extend EVT"); + report_fatal_error("CellSPU Select: Unhandled zero/any extend EVT"); /*NOTREACHED*/ case MVT::i32: shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, @@ -915,11 +910,8 @@ SPUDAGToDAGISel::Select(SDNode *N) { const valtype_map_s *vtm = getValueTypeMapEntry(VT); if (vtm->ldresult_ins == 0) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "LDRESULT for unsupported type: " - << VT.getEVTString(); - llvm_report_error(Msg.str()); + report_fatal_error("LDRESULT for unsupported type: " + + Twine(VT.getEVTString())); } Opc = vtm->ldresult_ins; @@ -1252,7 +1244,7 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(emitBuildVector(i64vec.getNode()), 0)); } else { - llvm_report_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec" + report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec" "condition"); } } diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 4b0d4429d28b3..5e04454f6a5a9 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -15,6 +15,7 @@ #include "SPUISelLowering.h" #include "SPUTargetMachine.h" #include "SPUFrameInfo.h" +#include "SPUMachineFunction.h" #include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Intrinsics.h" @@ -71,11 +72,8 @@ namespace { #ifndef NDEBUG if (retval == 0) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "getValueTypeMapEntry returns NULL for " - << VT.getEVTString(); - llvm_report_error(Msg.str()); + report_fatal_error("getValueTypeMapEntry returns NULL for " + + Twine(VT.getEVTString())); } #endif @@ -91,7 +89,7 @@ namespace { SDValue ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG, - bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) { + bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) { // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. @@ -714,12 +712,9 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { case ISD::POST_DEC: case ISD::LAST_INDEXED_MODE: { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than " - "UNINDEXED\n"; - Msg << (unsigned) LN->getAddressingMode(); - llvm_report_error(Msg.str()); + report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other " + "than UNINDEXED\n" + + Twine((unsigned)LN->getAddressingMode())); /*NOTREACHED*/ } } @@ -884,12 +879,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { case ISD::POST_DEC: case ISD::LAST_INDEXED_MODE: { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than " - "UNINDEXED\n"; - Msg << (unsigned) SN->getAddressingMode(); - llvm_report_error(Msg.str()); + report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other " + "than UNINDEXED\n" + + Twine((unsigned)SN->getAddressingMode())); /*NOTREACHED*/ } } @@ -902,7 +894,7 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { EVT PtrVT = Op.getValueType(); ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); - Constant *C = CP->getConstVal(); + const Constant *C = CP->getConstVal(); SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); SDValue Zero = DAG.getConstant(0, PtrVT); const TargetMachine &TM = DAG.getTarget(); @@ -960,7 +952,7 @@ static SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); - GlobalValue *GV = GSDN->getGlobal(); + const GlobalValue *GV = GSDN->getGlobal(); SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); const TargetMachine &TM = DAG.getTarget(); SDValue Zero = DAG.getConstant(0, PtrVT); @@ -976,7 +968,7 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); } } else { - llvm_report_error("LowerGlobalAddress: Relocation model other than static" + report_fatal_error("LowerGlobalAddress: Relocation model other than static" "not supported."); /*NOTREACHED*/ } @@ -1013,11 +1005,13 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); + SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>(); const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); @@ -1038,13 +1032,9 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, const TargetRegisterClass *ArgRegClass; switch (ObjectVT.getSimpleVT().SimpleTy) { - default: { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "LowerFormalArguments Unhandled argument type: " - << ObjectVT.getEVTString(); - llvm_report_error(Msg.str()); - } + default: + report_fatal_error("LowerFormalArguments Unhandled argument type: " + + Twine(ObjectVT.getEVTString())); case MVT::i8: ArgRegClass = &SPU::R8CRegClass; break; @@ -1104,9 +1094,10 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, // Create the frame slot for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { - VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset, - true, false); - SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + FuncInfo->setVarArgsFrameIndex( + MFI->CreateFixedObject(StackSlotSize, ArgOffset, + true, false)); + SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass); SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0, @@ -1146,7 +1137,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // CellSPU target does not yet support tail call optimization. isTailCall = false; @@ -1255,7 +1246,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - GlobalValue *GV = G->getGlobal(); + const GlobalValue *GV = G->getGlobal(); EVT CalleeVT = Callee.getValueType(); SDValue Zero = DAG.getConstant(0, PtrVT); SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT); @@ -1339,22 +1330,12 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, InVals.push_back(Chain.getValue(0)); } break; + case MVT::i8: + case MVT::i16: case MVT::i64: - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64, - InFlag).getValue(1); - InVals.push_back(Chain.getValue(0)); - break; case MVT::i128: - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128, - InFlag).getValue(1); - InVals.push_back(Chain.getValue(0)); - break; case MVT::f32: case MVT::f64: - Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT, - InFlag).getValue(1); - InVals.push_back(Chain.getValue(0)); - break; case MVT::v2f64: case MVT::v2i64: case MVT::v4f32: @@ -1374,7 +1355,7 @@ SDValue SPUTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), @@ -1581,14 +1562,10 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { uint64_t SplatBits = APSplatBits.getZExtValue(); switch (VT.getSimpleVT().SimpleTy) { - default: { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " - << VT.getEVTString(); - llvm_report_error(Msg.str()); + default: + report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " + + Twine(VT.getEVTString())); /*NOTREACHED*/ - } case MVT::v4f32: { uint32_t Value32 = uint32_t(SplatBits); assert(SplatBitSize == 32 @@ -2004,7 +1981,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // slot 0 across the vector EVT VecVT = N.getValueType(); if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) { - llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" + report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" "vector type!"); } @@ -2032,7 +2009,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { switch (VT.getSimpleVT().SimpleTy) { default: - llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector" + report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector" "type"); /*NOTREACHED*/ case MVT::i8: { @@ -2368,7 +2345,7 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { All conversions to i64 are expanded to a libcall. */ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, - SPUTargetLowering &TLI) { + const SPUTargetLowering &TLI) { EVT OpVT = Op.getValueType(); SDValue Op0 = Op.getOperand(0); EVT Op0VT = Op0.getValueType(); @@ -2394,7 +2371,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, All conversions from i64 are expanded to a libcall. */ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, - SPUTargetLowering &TLI) { + const SPUTargetLowering &TLI) { EVT OpVT = Op.getValueType(); SDValue Op0 = Op.getOperand(0); EVT Op0VT = Op0.getValueType(); @@ -2515,7 +2492,7 @@ static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, case ISD::SETONE: compareOp = ISD::SETNE; break; default: - llvm_report_error("CellSPU ISel Select: unimplemented f64 condition"); + report_fatal_error("CellSPU ISel Select: unimplemented f64 condition"); } SDValue result = @@ -2670,7 +2647,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) lowering of nodes. */ SDValue -SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) +SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { unsigned Opc = (unsigned) Op.getOpcode(); EVT VT = Op.getValueType(); @@ -2766,7 +2743,7 @@ SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) void SPUTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) + SelectionDAG &DAG) const { #if 0 unsigned Opc = (unsigned) N->getOpcode(); diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index 3c511772680d4..9ebd442b43c70 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -109,11 +109,11 @@ namespace llvm { virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; //! Custom lowering hooks - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; //! Custom lowering hook for nodes with illegal result types. virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG); + SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; @@ -153,7 +153,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, @@ -162,13 +162,13 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; }; } diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp index 3e17a51b505f5..68445cf6bf9d7 100644 --- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp +++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp @@ -18,7 +18,6 @@ SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) { ZeroDirective = "\t.space\t"; Data64bitsDirective = "\t.quad\t"; AlignmentIsInBytes = false; - HasLCOMMDirective = true; PCSymbol = "."; CommentString = "#"; diff --git a/lib/Target/CellSPU/SPUMachineFunction.h b/lib/Target/CellSPU/SPUMachineFunction.h index 6a66967bc050d..3ef3ccbcaaee2 100644 --- a/lib/Target/CellSPU/SPUMachineFunction.h +++ b/lib/Target/CellSPU/SPUMachineFunction.h @@ -26,14 +26,20 @@ private: /// bool UsesLR; + // VarArgsFrameIndex - FrameIndex for start of varargs area. + int VarArgsFrameIndex; + public: SPUFunctionInfo(MachineFunction& MF) - : UsesLR(false) + : UsesLR(false), + VarArgsFrameIndex(0) {} void setUsesLR(bool U) { UsesLR = U; } bool usesLR() { return UsesLR; } + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } }; } // end of namespace llvm diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index ffac58182aec7..fdbe10f84a7ae 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -179,7 +179,7 @@ unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) { case SPU::R126: return 126; case SPU::R127: return 127; default: - llvm_report_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering"); + report_fatal_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering"); } } @@ -303,7 +303,7 @@ BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const { // static bool needsFP(const MachineFunction &MF) { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return NoFramePointerElim || MFI->hasVarSizedObjects(); + return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects(); } //-------------------------------------------------------------------------- @@ -509,10 +509,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const .addReg(SPU::R2) .addReg(SPU::R1); } else { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Unhandled frame size: " << FrameSize; - llvm_report_error(Msg.str()); + report_fatal_error("Unhandled frame size: " + Twine(FrameSize)); } if (hasDebugInfo) { @@ -605,10 +602,7 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const .addReg(SPU::R2) .addReg(SPU::R1); } else { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Unhandled frame size: " << FrameSize; - llvm_report_error(Msg.str()); + report_fatal_error("Unhandled frame size: " + Twine(FrameSize)); } } } diff --git a/lib/Target/CellSPU/SPUSchedule.td b/lib/Target/CellSPU/SPUSchedule.td index 785dc46601107..a0b581f1632b6 100644 --- a/lib/Target/CellSPU/SPUSchedule.td +++ b/lib/Target/CellSPU/SPUSchedule.td @@ -36,7 +36,7 @@ def RotateShift : InstrItinClass; // EVEN_UNIT def ImmLoad : InstrItinClass; // EVEN_UNIT /* Note: The itinerary for the Cell SPU is somewhat contrived... */ -def SPUItineraries : ProcessorItineraries<[ +def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [ InstrItinData<LoadStore , [InstrStage<6, [ODD_UNIT]>]>, InstrItinData<BranchHints , [InstrStage<6, [ODD_UNIT]>]>, InstrItinData<BranchResolv, [InstrStage<4, [ODD_UNIT]>]>, diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp new file mode 100644 index 0000000000000..ca2a4bf2199a0 --- /dev/null +++ b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- SPUSelectionDAGInfo.cpp - CellSPU SelectionDAG Info ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SPUSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "cellspu-selectiondag-info" +#include "SPUSelectionDAGInfo.h" +using namespace llvm; + +SPUSelectionDAGInfo::SPUSelectionDAGInfo() { +} + +SPUSelectionDAGInfo::~SPUSelectionDAGInfo() { +} diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/lib/Target/CellSPU/SPUSelectionDAGInfo.h new file mode 100644 index 0000000000000..0a6b4c171f18b --- /dev/null +++ b/lib/Target/CellSPU/SPUSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- SPUSelectionDAGInfo.h - CellSPU SelectionDAG Info -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the CellSPU subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef CELLSPUSELECTIONDAGINFO_H +#define CELLSPUSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class SPUSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + SPUSelectionDAGInfo(); + ~SPUSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index 9fdcfe9ab619b..37e7cd2b7b3ad 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -57,8 +57,8 @@ public: return NULL; } - virtual SPUTargetLowering *getTargetLowering() const { - return const_cast<SPUTargetLowering*>(&TLInfo); + virtual const SPUTargetLowering *getTargetLowering() const { + return &TLInfo; } virtual const SPURegisterInfo *getRegisterInfo() const { diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 9c5893cec0d0d..e739b26bc5c39 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -210,7 +210,7 @@ namespace { } void CppWriter::error(const std::string& msg) { - llvm_report_error(msg); + report_fatal_error(msg); } // printCFP - Print a floating point constant .. very carefully :) @@ -1082,8 +1082,9 @@ namespace { // Before we emit this instruction, we need to take care of generating any // forward references. So, we get the names of all the operands in advance - std::string* opNames = new std::string[I->getNumOperands()]; - for (unsigned i = 0; i < I->getNumOperands(); i++) { + const unsigned Ops(I->getNumOperands()); + std::string* opNames = new std::string[Ops]; + for (unsigned i = 0; i < Ops; i++) { opNames[i] = getOpName(I->getOperand(i)); } @@ -1144,15 +1145,15 @@ namespace { const InvokeInst* inv = cast<InvokeInst>(I); Out << "std::vector<Value*> " << iName << "_params;"; nl(Out); - for (unsigned i = 3; i < inv->getNumOperands(); ++i) { + for (unsigned i = 0; i < inv->getNumOperands() - 3; ++i) { Out << iName << "_params.push_back(" << opNames[i] << ");"; nl(Out); } Out << "InvokeInst *" << iName << " = InvokeInst::Create(" - << opNames[0] << ", " - << opNames[1] << ", " - << opNames[2] << ", " + << opNames[Ops - 3] << ", " + << opNames[Ops - 2] << ", " + << opNames[Ops - 1] << ", " << iName << "_params.begin(), " << iName << "_params.end(), \""; printEscapedString(inv->getName()); Out << "\", " << bbname << ");"; diff --git a/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt index cfb2fc8bf950d..fac2c1959d7a7 100644 --- a/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt +++ b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt @@ -1,9 +1,9 @@ -include_directories(
- ${CMAKE_CURRENT_BINARY_DIR}/..
- ${CMAKE_CURRENT_SOURCE_DIR}/..
- )
-
-add_llvm_library(LLVMMBlazeAsmPrinter
+include_directories( + ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. + ) + +add_llvm_library(LLVMMBlazeAsmPrinter MBlazeAsmPrinter.cpp - )
-add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen)
\ No newline at end of file + ) +add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen) diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp index b1df926864e67..04dfb0ae6f9bc 100644 --- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp @@ -268,7 +268,7 @@ void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum, void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(opNum); - if (MO.getType() == MachineOperand::MO_Immediate) + if (MO.isImm()) O << (unsigned int)MO.getImm(); else printOperand(MI, opNum, O); @@ -277,7 +277,7 @@ void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum, void MBlazeAsmPrinter::printFSLImm(const MachineInstr *MI, int opNum, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(opNum); - if (MO.getType() == MachineOperand::MO_Immediate) + if (MO.isImm()) O << "rfsl" << (unsigned int)MO.getImm(); else printOperand(MI, opNum, O); diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt index c93e3dfc7879b..7f85bf82518d4 100644 --- a/lib/Target/MBlaze/CMakeLists.txt +++ b/lib/Target/MBlaze/CMakeLists.txt @@ -22,6 +22,7 @@ add_llvm_target(MBlazeCodeGen MBlazeTargetMachine.cpp MBlazeTargetObjectFile.cpp MBlazeIntrinsicInfo.cpp + MBlazeSelectionDAGInfo.cpp ) target_link_libraries (LLVMMBlazeCodeGen LLVMSelectionDAG) diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp index 7e59c4a164df7..c7cd5f4e44a98 100644 --- a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp +++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp @@ -13,7 +13,6 @@ #define DEBUG_TYPE "mblaze-isel" #include "MBlaze.h" -#include "MBlazeISelLowering.h" #include "MBlazeMachineFunction.h" #include "MBlazeRegisterInfo.h" #include "MBlazeSubtarget.h" diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index f0864d0f49221..23889b128ffea 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -185,7 +185,8 @@ unsigned MBlazeTargetLowering::getFunctionAlignment(const Function *) const { return 2; } -SDValue MBlazeTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue MBlazeTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { switch (Op.getOpcode()) { case ISD::ConstantPool: return LowerConstantPool(Op, DAG); @@ -201,10 +202,9 @@ SDValue MBlazeTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { //===----------------------------------------------------------------------===// // Lower helper functions //===----------------------------------------------------------------------===// -MachineBasicBlock* MBlazeTargetLowering:: -EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, - MachineBasicBlock*> *EM) const { +MachineBasicBlock* +MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); @@ -254,12 +254,9 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB, // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - // Also inform sdisel of the edge changes. for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) { - EM->insert(std::make_pair(*i, finish)); + e = BB->succ_end(); i != e; ++i) finish->addSuccessor(*i); - } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. @@ -350,12 +347,9 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB, // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - // Also inform sdisel of the edge changes. for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) { - EM->insert(std::make_pair(*i, dneBB)); + e = BB->succ_end(); i != e; ++i) dneBB->addSuccessor(*i); - } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. @@ -387,7 +381,8 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB, //===----------------------------------------------------------------------===// // -SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { +SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op, + SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue TrueVal = Op.getOperand(2); @@ -409,23 +404,23 @@ SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { } SDValue MBlazeTargetLowering:: -LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { +LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA); } SDValue MBlazeTargetLowering:: -LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { +LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("TLS not implemented for MicroBlaze."); return SDValue(); // Not reached } SDValue MBlazeTargetLowering:: -LowerJumpTable(SDValue Op, SelectionDAG &DAG) { +LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { SDValue ResNode; SDValue HiPart; // FIXME there isn't actually debug info here @@ -442,11 +437,11 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) { } SDValue MBlazeTargetLowering:: -LowerConstantPool(SDValue Op, SelectionDAG &DAG) { +LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { SDValue ResNode; EVT PtrVT = Op.getValueType(); ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); - Constant *C = N->getConstVal(); + const Constant *C = N->getConstVal(); SDValue Zero = DAG.getConstant(0, PtrVT); DebugLoc dl = Op.getDebugLoc(); @@ -455,9 +450,14 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, CP); } -SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { +SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + MBlazeFunctionInfo *FuncInfo = MF.getInfo<MBlazeFunctionInfo>(); + DebugLoc dl = Op.getDebugLoc(); - SDValue FI = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), + getPointerTy()); // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. @@ -533,7 +533,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // MBlaze does not yet support tail call optimization isTailCall = false; @@ -669,7 +669,7 @@ SDValue MBlazeTargetLowering:: LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), @@ -699,13 +699,13 @@ SDValue MBlazeTargetLowering:: LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>(); unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF); - VarArgsFrameIndex = 0; + MBlazeFI->setVarArgsFrameIndex(0); // Used with vargs to acumulate store chains. std::vector<SDValue> OutChains; @@ -818,8 +818,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // Record the frame index of the first variable argument // which is a value necessary to VASTART. - if (!VarArgsFrameIndex) - VarArgsFrameIndex = FI; + if (!MBlazeFI->getVarArgsFrameIndex()) + MBlazeFI->setVarArgsFrameIndex(FI); } } @@ -841,7 +841,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, SDValue MBlazeTargetLowering:: LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of // the return value to a location SmallVector<CCValAssign, 16> RVLocs; diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h index f8b147024de41..9f9ac899c6fc6 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.h +++ b/lib/Target/MBlaze/MBlazeISelLowering.h @@ -63,14 +63,11 @@ namespace llvm { //===--------------------------------------------------------------------===// class MBlazeTargetLowering : public TargetLowering { - int VarArgsFrameIndex; // FrameIndex for start of varargs area. - public: - explicit MBlazeTargetLowering(MBlazeTargetMachine &TM); /// LowerOperation - Provide custom lowering hooks for some operations. - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// getTargetNodeName - This method returns the name of a target specific // DAG node. @@ -90,22 +87,22 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; // Lower Operand specifics - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG); + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, @@ -114,17 +111,17 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; - virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; // Inline asm support ConstraintType getConstraintType(const std::string &Constraint) const; diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.h b/lib/Target/MBlaze/MBlazeMachineFunction.h index 08d4dcad6e3ff..1f956c1f90fb5 100644 --- a/lib/Target/MBlaze/MBlazeMachineFunction.h +++ b/lib/Target/MBlaze/MBlazeMachineFunction.h @@ -79,11 +79,14 @@ private: /// relocation models. unsigned GlobalBaseReg; + // VarArgsFrameIndex - FrameIndex for start of varargs area. + int VarArgsFrameIndex; + public: MBlazeFunctionInfo(MachineFunction& MF) : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0), GPHolder(-1,-1), HasLoadArgs(false), HasStoreVarArgs(false), - SRetReturnReg(0), GlobalBaseReg(0) + SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0) {} int getFPStackOffset() const { return FPStackOffset; } @@ -129,6 +132,9 @@ public: unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } + + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } }; } // end of namespace llvm diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index a12310a2bd326..e15176eb06bec 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -243,7 +243,7 @@ void MBlazeRegisterInfo::adjustMBlazeStackFrame(MachineFunction &MF) const { // if frame pointer elimination is disabled. bool MBlazeRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return NoFramePointerElim || MFI->hasVarSizedObjects(); + return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects(); } // This function eliminate ADJCALLSTACKDOWN, diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td index 6a94491db4032..1fec9e694005c 100644 --- a/lib/Target/MBlaze/MBlazeSchedule.td +++ b/lib/Target/MBlaze/MBlazeSchedule.td @@ -40,7 +40,8 @@ def IIPseudo : InstrItinClass; //===----------------------------------------------------------------------===// // MBlaze Generic instruction itineraries. //===----------------------------------------------------------------------===// -def MBlazeGenericItineraries : ProcessorItineraries<[ +def MBlazeGenericItineraries : ProcessorItineraries< + [ALU, IMULDIV], [ InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>, InstrItinData<IILoad , [InstrStage<3, [ALU]>]>, InstrItinData<IIStore , [InstrStage<1, [ALU]>]>, diff --git a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp new file mode 100644 index 0000000000000..105e42a639dad --- /dev/null +++ b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- MBlazeSelectionDAGInfo.cpp - MBlaze SelectionDAG Info -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MBlazeSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mblaze-selectiondag-info" +#include "MBlazeSelectionDAGInfo.h" +using namespace llvm; + +MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo() { +} + +MBlazeSelectionDAGInfo::~MBlazeSelectionDAGInfo() { +} diff --git a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h new file mode 100644 index 0000000000000..11e6879911e5d --- /dev/null +++ b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- MBlazeSelectionDAGInfo.h - MBlaze SelectionDAG Info -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MBlaze subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef MBLAZESELECTIONDAGINFO_H +#define MBLAZESELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class MBlazeSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + MBlazeSelectionDAGInfo(); + ~MBlazeSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h index 85c975cae95c4..9bf989849c4d5 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -51,8 +51,8 @@ namespace llvm { virtual const MBlazeRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } - virtual MBlazeTargetLowering *getTargetLowering() const - { return const_cast<MBlazeTargetLowering*>(&TLInfo); } + virtual const MBlazeTargetLowering *getTargetLowering() const + { return &TLInfo; } const TargetIntrinsicInfo *getIntrinsicInfo() const { return &IntrinsicInfo; } diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp index 79c9494c4d4bd..05c01ef7a5d9b 100644 --- a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp +++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp @@ -11,6 +11,7 @@ #include "MBlazeSubtarget.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" @@ -22,14 +23,14 @@ Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFileELF::Initialize(Ctx, TM); SmallDataSection = - getELFSection(".sdata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getDataRel()); + getContext().getELFSection(".sdata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); SmallBSSSection = - getELFSection(".sbss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getBSS()); + getContext().getELFSection(".sbss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + SectionKind::getBSS()); } diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp index f4d7d8a52886e..d1d9a11586354 100644 --- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp +++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp @@ -78,6 +78,16 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const { return Ctx.GetOrCreateSymbol(Name.str()); } +MCSymbol *MSP430MCInstLower:: +GetBlockAddressSymbol(const MachineOperand &MO) const { + switch (MO.getTargetFlags()) { + default: assert(0 && "Unknown target flag on GV operand"); + case 0: break; + } + + return Printer.GetBlockAddressSymbol(MO.getBlockAddress()); +} + MCOperand MSP430MCInstLower:: LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { // FIXME: We would like an efficient form for this, so we don't have to do a @@ -131,6 +141,8 @@ void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { case MachineOperand::MO_ConstantPoolIndex: MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO)); break; + case MachineOperand::MO_BlockAddress: + MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO)); } OutMI.addOperand(MCOp); diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h index a2b99ae83c01b..f9620e8ccfd21 100644 --- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h +++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h @@ -42,6 +42,7 @@ public: MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const; MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const; + MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const; }; } diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt index 29abe46c880a5..a3f60d2a44f1e 100644 --- a/lib/Target/MSP430/CMakeLists.txt +++ b/lib/Target/MSP430/CMakeLists.txt @@ -19,6 +19,7 @@ add_llvm_target(MSP430CodeGen MSP430RegisterInfo.cpp MSP430Subtarget.cpp MSP430TargetMachine.cpp + MSP430SelectionDAGInfo.cpp ) target_link_libraries (LLVMMSP430CodeGen LLVMSelectionDAG) diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp index 836e4250abae0..68cb342b08f45 100644 --- a/lib/Target/MSP430/MSP430BranchSelector.cpp +++ b/lib/Target/MSP430/MSP430BranchSelector.cpp @@ -157,7 +157,7 @@ bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) { NewSize = 6; } // Uncond branch to the real destination. - I = BuildMI(MBB, I, dl, TII->get(MSP430::B)).addMBB(Dest); + I = BuildMI(MBB, I, dl, TII->get(MSP430::Bi)).addMBB(Dest); // Remove the old branch from the function. OldBranch->eraseFromParent(); diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index 911cfcbe6d966..7b328bb12c3d7 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "MSP430.h" -#include "MSP430ISelLowering.h" #include "MSP430TargetMachine.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -45,9 +44,9 @@ namespace { } Base; int16_t Disp; - GlobalValue *GV; - Constant *CP; - BlockAddress *BlockAddr; + const GlobalValue *GV; + const Constant *CP; + const BlockAddress *BlockAddr; const char *ES; int JT; unsigned Align; // CP alignment. @@ -100,7 +99,7 @@ namespace { /// namespace { class MSP430DAGToDAGISel : public SelectionDAGISel { - MSP430TargetLowering &Lowering; + const MSP430TargetLowering &Lowering; const MSP430Subtarget &Subtarget; public: @@ -364,7 +363,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op, unsigned Opc8, unsigned Opc16) { if (N1.getOpcode() == ISD::LOAD && N1.hasOneUse() && - IsLegalToFold(N1, Op, Op)) { + IsLegalToFold(N1, Op, Op, OptLevel)) { LoadSDNode *LD = cast<LoadSDNode>(N1); if (!isValidIndexedLoad(LD)) return NULL; diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index e6c7e1ecd8139..c3e2bdf7b46fe 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -110,8 +110,8 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setOperationAction(ISD::ROTR, MVT::i16, Expand); setOperationAction(ISD::GlobalAddress, MVT::i16, Custom); setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom); + setOperationAction(ISD::BlockAddress, MVT::i16, Custom); setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BRIND, MVT::Other, Expand); setOperationAction(ISD::BR_CC, MVT::i8, Custom); setOperationAction(ISD::BR_CC, MVT::i16, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Expand); @@ -176,12 +176,14 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : } } -SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue MSP430TargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { switch (Op.getOpcode()) { case ISD::SHL: // FALLTHROUGH case ISD::SRL: case ISD::SRA: return LowerShifts(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); @@ -252,7 +254,8 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain, &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { switch (CallConv) { default: @@ -264,7 +267,7 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain, if (Ins.empty()) return Chain; else { - llvm_report_error("ISRs cannot have arguments"); + report_fatal_error("ISRs cannot have arguments"); return SDValue(); } } @@ -277,7 +280,7 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // MSP430 target does not yet support tail call optimization. isTailCall = false; @@ -289,7 +292,7 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee, return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall, Outs, Ins, dl, DAG, InVals); case CallingConv::MSP430_INTR: - llvm_report_error("ISRs cannot be called directly"); + report_fatal_error("ISRs cannot be called directly"); return SDValue(); } } @@ -306,7 +309,8 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); @@ -383,14 +387,14 @@ SDValue MSP430TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location SmallVector<CCValAssign, 16> RVLocs; // ISRs cannot return any value. if (CallConv == CallingConv::MSP430_INTR && !Outs.empty()) { - llvm_report_error("ISRs cannot return any value"); + report_fatal_error("ISRs cannot return any value"); return SDValue(); } @@ -445,7 +449,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), @@ -568,7 +572,7 @@ MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; @@ -589,7 +593,7 @@ MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, } SDValue MSP430TargetLowering::LowerShifts(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { unsigned Opc = Op.getOpcode(); SDNode* N = Op.getNode(); EVT VT = Op.getValueType(); @@ -632,7 +636,8 @@ SDValue MSP430TargetLowering::LowerShifts(SDValue Op, return Victim; } -SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { +SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); @@ -643,7 +648,7 @@ SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) } SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy()); @@ -651,6 +656,15 @@ SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op, return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);; } +SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true); + + return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);; +} + static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC, ISD::CondCode CC, DebugLoc dl, SelectionDAG &DAG) { @@ -734,7 +748,7 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC, } -SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { +SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); @@ -749,8 +763,7 @@ SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { Chain, Dest, TargetCC, Flag); } - -SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { +SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); DebugLoc dl = Op.getDebugLoc(); @@ -830,7 +843,8 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { } } -SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { +SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, + SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue TrueV = Op.getOperand(2); @@ -852,7 +866,7 @@ SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { } SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { SDValue Val = Op.getOperand(0); EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); @@ -864,7 +878,8 @@ SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op, DAG.getValueType(Val.getValueType())); } -SDValue MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { +SDValue +MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>(); int ReturnAddrIndex = FuncInfo->getRAIndex(); @@ -880,7 +895,8 @@ SDValue MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); } -SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { +SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); @@ -900,7 +916,8 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { RetAddrFI, NULL, 0, false, false, 0); } -SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { +SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, + SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); @@ -999,8 +1016,7 @@ bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { MachineBasicBlock* MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { MachineFunction *F = BB->getParent(); MachineRegisterInfo &RI = F->getRegInfo(); DebugLoc dl = MI->getDebugLoc(); @@ -1052,11 +1068,6 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, // block to the block containing instructions after shift. RemBB->transferSuccessors(BB); - // Inform sdisel of the edge changes. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) - EM->insert(std::make_pair(*SI, RemBB)); - // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB BB->addSuccessor(LoopBB); BB->addSuccessor(RemBB); @@ -1111,14 +1122,13 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, MachineBasicBlock* MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { unsigned Opc = MI->getOpcode(); if (Opc == MSP430::Shl8 || Opc == MSP430::Shl16 || Opc == MSP430::Sra8 || Opc == MSP430::Sra16 || Opc == MSP430::Srl8 || Opc == MSP430::Srl16) - return EmitShiftInstr(MI, BB, EM); + return EmitShiftInstr(MI, BB); const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); @@ -1149,10 +1159,6 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, .addImm(MI->getOperand(3).getImm()); F->insert(I, copy0MBB); F->insert(I, copy1MBB); - // Inform sdisel of the edge changes. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) - EM->insert(std::make_pair(*SI, copy1MBB)); // Update machine-CFG edges by transferring all successors of the current // block to the new block which will contain the Phi node for the select. copy1MBB->transferSuccessors(BB); diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 87a790b047b72..01c5071622a7a 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -74,7 +74,7 @@ namespace llvm { explicit MSP430TargetLowering(MSP430TargetMachine &TM); /// LowerOperation - Provide custom lowering hooks for some operations. - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// getTargetNodeName - This method returns the name of a target specific /// DAG node. @@ -83,16 +83,17 @@ namespace llvm { /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; - SDValue LowerShifts(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG); - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG); - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG); - SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG); - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); - SDValue getReturnAddressFrameIndex(SelectionDAG &DAG); + SDValue LowerShifts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; TargetLowering::ConstraintType getConstraintType(const std::string &Constraint) const; @@ -117,11 +118,9 @@ namespace llvm { virtual bool isZExtFree(EVT VT1, EVT VT2) const; MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + MachineBasicBlock *BB) const; MachineBasicBlock* EmitShiftInstr(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + MachineBasicBlock *BB) const; private: SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee, @@ -130,7 +129,7 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCCCArguments(SDValue Chain, CallingConv::ID CallConv, @@ -138,33 +137,33 @@ namespace llvm { const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index 03819041067c6..2b09b3d5268d4 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -176,7 +176,9 @@ unsigned MSP430InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { if (I->isDebugValue()) continue; if (I->getOpcode() != MSP430::JMP && - I->getOpcode() != MSP430::JCC) + I->getOpcode() != MSP430::JCC && + I->getOpcode() != MSP430::Br && + I->getOpcode() != MSP430::Bm) break; // Remove the branch. I->eraseFromParent(); @@ -256,6 +258,11 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, if (!I->getDesc().isBranch()) return true; + // Cannot handle indirect branches. + if (I->getOpcode() == MSP430::Br || + I->getOpcode() == MSP430::Bm) + return true; + // Handle unconditional branches. if (I->getOpcode() == MSP430::JMP) { if (!AllowModify) { @@ -365,6 +372,7 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case TargetOpcode::EH_LABEL: case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: + case TargetOpcode::DBG_VALUE: return 0; case TargetOpcode::INLINEASM: { const MachineFunction *MF = MI->getParent()->getParent(); diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index 144ba26cfeb4e..6b9a2f2f29f4c 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -76,8 +76,8 @@ def memdst : Operand<i16> { let MIOperandInfo = (ops GR16, i16imm); } -// Branch targets have OtherVT type. -def brtarget : Operand<OtherVT> { +// Short jump targets have OtherVT type and are printed as pcrel imm values. +def jmptarget : Operand<OtherVT> { let PrintMethod = "printPCRelImmOperand"; } @@ -169,21 +169,27 @@ let isBranch = 1, isTerminator = 1 in { // Direct branch let isBarrier = 1 in { // Short branch - def JMP : CJForm<0, 0, - (outs), (ins brtarget:$dst), + def JMP : CJForm<0, 0, (outs), (ins jmptarget:$dst), "jmp\t$dst", [(br bb:$dst)]>; - // Long branch - def B : I16ri<0, - (outs), (ins brtarget:$dst), - "br\t$dst", - []>; + let isIndirectBranch = 1 in { + // Long branches + def Bi : I16ri<0, (outs), (ins i16imm:$brdst), + "br\t$brdst", + [(brind tblockaddress:$brdst)]>; + def Br : I16rr<0, (outs), (ins GR16:$brdst), + "mov.w\t{$brdst, pc}", + [(brind GR16:$brdst)]>; + def Bm : I16rm<0, (outs), (ins memsrc:$brdst), + "mov.w\t{$brdst, pc}", + [(brind (load addr:$brdst))]>; + } } // Conditional branches let Uses = [SRW] in def JCC : CJForm<0, 0, - (outs), (ins brtarget:$dst, cc:$cc), + (outs), (ins jmptarget:$dst, cc:$cc), "j$cc\t$dst", [(MSP430brcc bb:$dst, imm:$cc)]>; } // isBranch, isTerminator @@ -1126,16 +1132,21 @@ def : Pat<(i8 (trunc GR16:$src)), // GlobalAddress, ExternalSymbol def : Pat<(i16 (MSP430Wrapper tglobaladdr:$dst)), (MOV16ri tglobaladdr:$dst)>; def : Pat<(i16 (MSP430Wrapper texternalsym:$dst)), (MOV16ri texternalsym:$dst)>; +def : Pat<(i16 (MSP430Wrapper tblockaddress:$dst)), (MOV16ri tblockaddress:$dst)>; def : Pat<(add GR16:$src1, (MSP430Wrapper tglobaladdr :$src2)), (ADD16ri GR16:$src1, tglobaladdr:$src2)>; def : Pat<(add GR16:$src1, (MSP430Wrapper texternalsym:$src2)), (ADD16ri GR16:$src1, texternalsym:$src2)>; +def : Pat<(add GR16:$src1, (MSP430Wrapper tblockaddress:$src2)), + (ADD16ri GR16:$src1, tblockaddress:$src2)>; def : Pat<(store (i16 (MSP430Wrapper tglobaladdr:$src)), addr:$dst), (MOV16mi addr:$dst, tglobaladdr:$src)>; def : Pat<(store (i16 (MSP430Wrapper texternalsym:$src)), addr:$dst), (MOV16mi addr:$dst, texternalsym:$src)>; +def : Pat<(store (i16 (MSP430Wrapper tblockaddress:$src)), addr:$dst), + (MOV16mi addr:$dst, tblockaddress:$src)>; // calls def : Pat<(MSP430call (i16 tglobaladdr:$dst)), diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index d91783a80c831..0cae26714bfc7 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -138,7 +138,7 @@ MSP430RegisterInfo::getPointerRegClass(unsigned Kind) const { bool MSP430RegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return (NoFramePointerElim || + return (DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects() || MFI->isFrameAddressTaken()); } diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp new file mode 100644 index 0000000000000..a54c929e8356f --- /dev/null +++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- MSP430SelectionDAGInfo.cpp - MSP430 SelectionDAG Info -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MSP430SelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "msp430-selectiondag-info" +#include "MSP430SelectionDAGInfo.h" +using namespace llvm; + +MSP430SelectionDAGInfo::MSP430SelectionDAGInfo() { +} + +MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() { +} diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h new file mode 100644 index 0000000000000..c952ab726afe1 --- /dev/null +++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- MSP430SelectionDAGInfo.h - MSP430 SelectionDAG Info -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MSP430 subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef MSP430SELECTIONDAGINFO_H +#define MSP430SELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo { +public: + MSP430SelectionDAGInfo(); + ~MSP430SelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index d93ac5c6efe91..68bde9a551560 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -50,8 +50,8 @@ public: return &InstrInfo.getRegisterInfo(); } - virtual MSP430TargetLowering *getTargetLowering() const { - return const_cast<MSP430TargetLowering*>(&TLInfo); + virtual const MSP430TargetLowering *getTargetLowering() const { + return &TLInfo; } virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp index 1d5c51164e3f3..4ef017ab9295d 100644 --- a/lib/Target/Mangler.cpp +++ b/lib/Target/Mangler.cpp @@ -22,11 +22,12 @@ #include "llvm/ADT/Twine.h" using namespace llvm; -static bool isAcceptableChar(char C) { +static bool isAcceptableChar(char C, bool AllowPeriod) { if ((C < 'a' || C > 'z') && (C < 'A' || C > 'Z') && (C < '0' || C > '9') && - C != '_' && C != '$' && C != '.' && C != '@') + C != '_' && C != '$' && C != '@' && + !(AllowPeriod && C == '.')) return false; return true; } @@ -54,8 +55,9 @@ static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) { // If any of the characters in the string is an unacceptable character, force // quotes. + bool AllowPeriod = MAI.doesAllowPeriodsInName(); for (unsigned i = 0, e = Str.size(); i != e; ++i) - if (!isAcceptableChar(Str[i])) + if (!isAcceptableChar(Str[i], AllowPeriod)) return true; return false; } @@ -70,9 +72,10 @@ static void appendMangledName(SmallVectorImpl<char> &OutName, StringRef Str, MangleLetter(OutName, Str[0]); Str = Str.substr(1); } - + + bool AllowPeriod = MAI.doesAllowPeriodsInName(); for (unsigned i = 0, e = Str.size(); i != e; ++i) { - if (!isAcceptableChar(Str[i])) + if (!isAcceptableChar(Str[i], AllowPeriod)) MangleLetter(OutName, Str[i]); else OutName.push_back(Str[i]); diff --git a/lib/Target/Mips/AsmPrinter/CMakeLists.txt b/lib/Target/Mips/AsmPrinter/CMakeLists.txt index 56c68a6b41600..d3099d2a4e3e5 100644 --- a/lib/Target/Mips/AsmPrinter/CMakeLists.txt +++ b/lib/Target/Mips/AsmPrinter/CMakeLists.txt @@ -1,9 +1,9 @@ -include_directories(
- ${CMAKE_CURRENT_BINARY_DIR}/..
- ${CMAKE_CURRENT_SOURCE_DIR}/..
- )
-
-add_llvm_library(LLVMMipsAsmPrinter
+include_directories( + ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. + ) + +add_llvm_library(LLVMMipsAsmPrinter MipsAsmPrinter.cpp - )
-add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen)
\ No newline at end of file + ) +add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen) diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp index 69743715607d8..d269153ef0e76 100644 --- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp @@ -306,7 +306,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(opNum); - if (MO.getType() == MachineOperand::MO_Immediate) + if (MO.isImm()) O << (unsigned short int)MO.getImm(); else printOperand(MI, opNum, O); diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 0e3bf5a96d408..a77802aec52af 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_target(MipsCodeGen MipsSubtarget.cpp MipsTargetMachine.cpp MipsTargetObjectFile.cpp + MipsSelectionDAGInfo.cpp ) target_link_libraries (LLVMMipsCodeGen LLVMSelectionDAG) diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index c4746dba32673..ee85a3f380070 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -13,7 +13,6 @@ #define DEBUG_TYPE "mips-isel" #include "Mips.h" -#include "MipsISelLowering.h" #include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" #include "MipsSubtarget.h" diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 584b8875eef77..e979c3fe69fce 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -166,7 +166,7 @@ unsigned MipsTargetLowering::getFunctionAlignment(const Function *) const { } SDValue MipsTargetLowering:: -LowerOperation(SDValue Op, SelectionDAG &DAG) +LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -252,8 +252,7 @@ static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) { MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); bool isFPCmp = false; DebugLoc dl = MI->getDebugLoc(); @@ -301,12 +300,9 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, sinkMBB); // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - // Also inform sdisel of the edge changes. for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) { - EM->insert(std::make_pair(*i, sinkMBB)); + e = BB->succ_end(); i != e; ++i) sinkMBB->addSuccessor(*i); - } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. while(!BB->succ_empty()) @@ -341,7 +337,7 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, //===----------------------------------------------------------------------===// SDValue MipsTargetLowering:: -LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) +LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget->isMips1()) return Op; @@ -374,7 +370,7 @@ LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) } SDValue MipsTargetLowering:: -LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) +LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); @@ -398,7 +394,7 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) } SDValue MipsTargetLowering:: -LowerANDOR(SDValue Op, SelectionDAG &DAG) +LowerANDOR(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); @@ -419,7 +415,7 @@ LowerANDOR(SDValue Op, SelectionDAG &DAG) } SDValue MipsTargetLowering:: -LowerBRCOND(SDValue Op, SelectionDAG &DAG) +LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // The first operand is the chain, the second is the condition, the third is // the block to branch to if the condition is true. @@ -441,7 +437,7 @@ LowerBRCOND(SDValue Op, SelectionDAG &DAG) } SDValue MipsTargetLowering:: -LowerSETCC(SDValue Op, SelectionDAG &DAG) +LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // The operands to this are the left and right operands to compare (ops #0, // and #1) and the condition code to compare them with (op #2) as a @@ -457,7 +453,7 @@ LowerSETCC(SDValue Op, SelectionDAG &DAG) } SDValue MipsTargetLowering:: -LowerSELECT(SDValue Op, SelectionDAG &DAG) +LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue True = Op.getOperand(1); @@ -481,10 +477,11 @@ LowerSELECT(SDValue Op, SelectionDAG &DAG) Cond, True, False, CCNode); } -SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { +SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { SDVTList VTs = DAG.getVTList(MVT::i32); @@ -525,14 +522,14 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { } SDValue MipsTargetLowering:: -LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) +LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("TLS not implemented for MIPS."); return SDValue(); // Not reached } SDValue MipsTargetLowering:: -LowerJumpTable(SDValue Op, SelectionDAG &DAG) +LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { SDValue ResNode; SDValue HiPart; @@ -560,11 +557,11 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) } SDValue MipsTargetLowering:: -LowerConstantPool(SDValue Op, SelectionDAG &DAG) +LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { SDValue ResNode; ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); - Constant *C = N->getConstVal(); + const Constant *C = N->getConstVal(); // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); @@ -596,9 +593,13 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) return ResNode; } -SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { +SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>(); + DebugLoc dl = Op.getDebugLoc(); - SDValue FI = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), + getPointerTy()); // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. @@ -769,7 +770,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // MIPs target does not yet support tail call optimization. isTailCall = false; @@ -963,7 +964,7 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; @@ -995,14 +996,15 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF); - VarArgsFrameIndex = 0; + MipsFI->setVarArgsFrameIndex(0); // Used with vargs to acumulate store chains. std::vector<SDValue> OutChains; @@ -1143,8 +1145,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // Record the frame index of the first variable argument // which is a value necessary to VASTART. - if (!VarArgsFrameIndex) - VarArgsFrameIndex = FI; + if (!MipsFI->getVarArgsFrameIndex()) + MipsFI->setVarArgsFrameIndex(FI); } } @@ -1167,7 +1169,7 @@ SDValue MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of // the return value to a location diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 7256617242fac..f2de489a26430 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -68,14 +68,11 @@ namespace llvm { //===--------------------------------------------------------------------===// class MipsTargetLowering : public TargetLowering { - int VarArgsFrameIndex; // FrameIndex for start of varargs area. - public: - explicit MipsTargetLowering(MipsTargetMachine &TM); /// LowerOperation - Provide custom lowering hooks for some operations. - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// getTargetNodeName - This method returns the name of a target specific // DAG node. @@ -96,27 +93,27 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; // Lower Operand specifics - SDValue LowerANDOR(SDValue Op, SelectionDAG &DAG); - SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG); - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); - SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG); - SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); - SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG); - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG); - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG); + SDValue LowerANDOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, @@ -125,17 +122,17 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; - virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; // Inline asm support ConstraintType getConstraintType(const std::string &Constraint) const; diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index a300f49ff5d47..5723f9ea15170 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -80,11 +80,15 @@ private: /// relocation models. unsigned GlobalBaseReg; + /// VarArgsFrameIndex - FrameIndex for start of varargs area. + int VarArgsFrameIndex; + public: MipsFunctionInfo(MachineFunction& MF) : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0), FPUTopSavedRegOff(0), GPHolder(-1,-1), HasLoadArgs(false), - HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0) + HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0), + VarArgsFrameIndex(0) {} int getFPStackOffset() const { return FPStackOffset; } @@ -133,6 +137,9 @@ public: unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } + + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } }; } // end of namespace llvm diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index f43e69b354575..478da84cad0ce 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -338,7 +338,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const bool MipsRegisterInfo:: hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return NoFramePointerElim || MFI->hasVarSizedObjects(); + return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects(); } // This function eliminate ADJCALLSTACKDOWN, diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td index 0c3ca57361cdb..616a79bf831cb 100644 --- a/lib/Target/Mips/MipsSchedule.td +++ b/lib/Target/Mips/MipsSchedule.td @@ -40,7 +40,7 @@ def IIPseudo : InstrItinClass; //===----------------------------------------------------------------------===// // Mips Generic instruction itineraries. //===----------------------------------------------------------------------===// -def MipsGenericItineraries : ProcessorItineraries<[ +def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [ InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>, InstrItinData<IILoad , [InstrStage<3, [ALU]>]>, InstrItinData<IIStore , [InstrStage<1, [ALU]>]>, diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/lib/Target/Mips/MipsSelectionDAGInfo.cpp new file mode 100644 index 0000000000000..72c149decc261 --- /dev/null +++ b/lib/Target/Mips/MipsSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- MipsSelectionDAGInfo.cpp - Mips SelectionDAG Info -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MipsSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-selectiondag-info" +#include "MipsSelectionDAGInfo.h" +using namespace llvm; + +MipsSelectionDAGInfo::MipsSelectionDAGInfo() { +} + +MipsSelectionDAGInfo::~MipsSelectionDAGInfo() { +} diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h new file mode 100644 index 0000000000000..6eaf0c930f2ef --- /dev/null +++ b/lib/Target/Mips/MipsSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- MipsSelectionDAGInfo.h - Mips SelectionDAG Info ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Mips subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSSELECTIONDAGINFO_H +#define MIPSSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class MipsSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + MipsSelectionDAGInfo(); + ~MipsSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index c3428be48f592..cd671cf3d064a 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -47,8 +47,8 @@ namespace llvm { return &InstrInfo.getRegisterInfo(); } - virtual MipsTargetLowering *getTargetLowering() const { - return const_cast<MipsTargetLowering*>(&TLInfo); + virtual const MipsTargetLowering *getTargetLowering() const { + return &TLInfo; } // Pass Pipeline Configuration diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp index 0fb423dc1b240..405f41981fa31 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -11,6 +11,7 @@ #include "MipsSubtarget.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" @@ -26,14 +27,14 @@ void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ TargetLoweringObjectFileELF::Initialize(Ctx, TM); SmallDataSection = - getELFSection(".sdata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getDataRel()); + getContext().getELFSection(".sdata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); SmallBSSSection = - getELFSection(".sbss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC, - SectionKind::getBSS()); + getContext().getELFSection(".sbss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + SectionKind::getBSS()); } diff --git a/lib/Target/PIC16/AsmPrinter/CMakeLists.txt b/lib/Target/PIC16/AsmPrinter/CMakeLists.txt index 2e1b809b92d7d..d36bb8eb4a5fe 100644 --- a/lib/Target/PIC16/AsmPrinter/CMakeLists.txt +++ b/lib/Target/PIC16/AsmPrinter/CMakeLists.txt @@ -1,9 +1,9 @@ -include_directories(
- ${CMAKE_CURRENT_BINARY_DIR}/..
- ${CMAKE_CURRENT_SOURCE_DIR}/..
- )
-
-add_llvm_library(LLVMPIC16AsmPrinter
+include_directories( + ${CMAKE_CURRENT_BINARY_DIR}/.. + ${CMAKE_CURRENT_SOURCE_DIR}/.. + ) + +add_llvm_library(LLVMPIC16AsmPrinter PIC16AsmPrinter.cpp - )
+ ) add_dependencies(LLVMPIC16AsmPrinter PIC16CodeGenTable_gen) diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp index c46db46ea5b44..b665817e614e2 100644 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp @@ -16,6 +16,7 @@ #include "PIC16AsmPrinter.h" #include "PIC16Section.h" #include "PIC16MCAsmInfo.h" +#include "PIC16MachineFunctionInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Module.h" @@ -36,9 +37,8 @@ using namespace llvm; PIC16AsmPrinter::PIC16AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : AsmPrinter(TM, Streamer), DbgInfo(Streamer, TM.getMCAsmInfo()) { - PTLI = static_cast<PIC16TargetLowering*>(TM.getTargetLowering()); PMAI = static_cast<const PIC16MCAsmInfo*>(TM.getMCAsmInfo()); - PTOF = (PIC16TargetObjectFile *)&PTLI->getObjFileLowering(); + PTOF = &getObjFileLowering(); } void PIC16AsmPrinter::EmitInstruction(const MachineInstr *MI) { @@ -379,6 +379,8 @@ bool PIC16AsmPrinter::doFinalization(Module &M) { void PIC16AsmPrinter::EmitFunctionFrame(MachineFunction &MF) { const Function *F = MF.getFunction(); const TargetData *TD = TM.getTargetData(); + PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>(); + // Emit the data section name. PIC16Section *fPDataSection = @@ -420,7 +422,7 @@ void PIC16AsmPrinter::EmitFunctionFrame(MachineFunction &MF) { Twine(" RES ") + Twine(ArgSize)); // Emit temporary space - int TempSize = PTLI->GetTmpSize(); + int TempSize = FuncInfo->getTmpSize(); if (TempSize > 0) OutStreamer.EmitRawText(PAN::getTempdataLabel(CurrentFnSym->getName()) + Twine(" RES ") + Twine(TempSize)); diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h index e27778fbb0700..a424c270aaad4 100644 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h @@ -37,8 +37,8 @@ namespace llvm { return "PIC16 Assembly Printer"; } - PIC16TargetObjectFile &getObjFileLowering() const { - return (PIC16TargetObjectFile &)AsmPrinter::getObjFileLowering(); + const PIC16TargetObjectFile &getObjFileLowering() const { + return (const PIC16TargetObjectFile &)AsmPrinter::getObjFileLowering(); } bool runOnMachineFunction(MachineFunction &F); @@ -76,8 +76,7 @@ namespace llvm { } private: - PIC16TargetObjectFile *PTOF; - PIC16TargetLowering *PTLI; + const PIC16TargetObjectFile *PTOF; PIC16DbgInfo DbgInfo; const PIC16MCAsmInfo *PMAI; std::set<std::string> LibcallDecls; // Sorted & uniqued set of extern decls. diff --git a/lib/Target/PIC16/CMakeLists.txt b/lib/Target/PIC16/CMakeLists.txt index 208b0678fec2c..cd4afe8e2342a 100644 --- a/lib/Target/PIC16/CMakeLists.txt +++ b/lib/Target/PIC16/CMakeLists.txt @@ -22,4 +22,5 @@ add_llvm_target(PIC16 PIC16Subtarget.cpp PIC16TargetMachine.cpp PIC16TargetObjectFile.cpp + PIC16SelectionDAGInfo.cpp ) diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h index 8d067de676b69..cee55f4f260fe 100644 --- a/lib/Target/PIC16/PIC16.h +++ b/lib/Target/PIC16/PIC16.h @@ -21,6 +21,7 @@ #include <sstream> #include <cstring> #include <string> +#include <vector> namespace llvm { class PIC16TargetMachine; @@ -52,17 +53,34 @@ namespace PIC16CC { UDATA_SHR }; + class ESNames { + std::vector<char*> stk; + ESNames() {} + public: + ~ESNames() { + std::vector<char*>::iterator it = stk.end(); + it--; + while(stk.end() != stk.begin()) + { + char* p = *it; + delete [] p; + it--; + stk.pop_back(); + } + } - // External symbol names require memory to live till the program end. - // So we have to allocate it and keep. - // FIXME: Don't leak the allocated strings. - inline static const char *createESName (const std::string &name) { - char *tmpName = new char[name.size() + 1]; - memcpy(tmpName, name.c_str(), name.size() + 1); - return tmpName; - } - + // External symbol names require memory to live till the program end. + // So we have to allocate it and keep. Push all such allocations into a + // vector so that they get freed up on termination. + inline static const char *createESName (const std::string &name) { + static ESNames esn; + char *tmpName = new char[name.size() + 1]; + memcpy(tmpName, name.c_str(), name.size() + 1); + esn.stk.push_back(tmpName); + return tmpName; + } + }; inline static const char *PIC16CondCodeToString(PIC16CC::CondCodes CC) { switch (CC) { diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h index 8ed5bf7172962..f1fcec5bad962 100644 --- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h +++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h @@ -14,9 +14,9 @@ #define DEBUG_TYPE "pic16-isel" #include "PIC16.h" -#include "PIC16ISelLowering.h" #include "PIC16RegisterInfo.h" #include "PIC16TargetMachine.h" +#include "PIC16MachineFunctionInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" @@ -29,19 +29,16 @@ namespace { class VISIBILITY_HIDDEN PIC16DAGToDAGISel : public SelectionDAGISel { /// TM - Keep a reference to PIC16TargetMachine. - PIC16TargetMachine &TM; + const PIC16TargetMachine &TM; /// PIC16Lowering - This object fully describes how to lower LLVM code to an /// PIC16-specific SelectionDAG. - PIC16TargetLowering &PIC16Lowering; + const PIC16TargetLowering &PIC16Lowering; public: explicit PIC16DAGToDAGISel(PIC16TargetMachine &tm) : SelectionDAGISel(tm), - TM(tm), PIC16Lowering(*TM.getTargetLowering()) { - // Keep PIC16 specific DAGISel to use during the lowering - PIC16Lowering.ISel = this; - } + TM(tm), PIC16Lowering(*TM.getTargetLowering()) {} // Pass Name virtual const char *getPassName() const { diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index d17abb9ed0a88..f479f4626fd73 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -16,6 +16,7 @@ #include "PIC16ISelLowering.h" #include "PIC16TargetObjectFile.h" #include "PIC16TargetMachine.h" +#include "PIC16MachineFunctionInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalValue.h" #include "llvm/Function.h" @@ -24,6 +25,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/ErrorHandling.h" @@ -116,7 +118,7 @@ static const char *getIntrinsicName(unsigned opcode) { std::string Fullname = prefix + tagname + Basename; // The name has to live through program life. - return createESName(Fullname); + return ESNames::createESName(Fullname); } // getStdLibCallName - Get the name for the standard library function. @@ -139,12 +141,12 @@ static const char *getStdLibCallName(unsigned opcode) { std::string LibCallName = prefix + BaseName; // The name has to live through program life. - return createESName(LibCallName); + return ESNames::createESName(LibCallName); } // PIC16TargetLowering Constructor. PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM) - : TargetLowering(TM, new PIC16TargetObjectFile()), TmpSize(0) { + : TargetLowering(TM, new PIC16TargetObjectFile()) { Subtarget = &TM.getSubtarget<PIC16Subtarget>(); @@ -321,18 +323,29 @@ static SDValue getOutFlag(SDValue &Op) { return Flag; } // Get the TmpOffset for FrameIndex -unsigned PIC16TargetLowering::GetTmpOffsetForFI(unsigned FI, unsigned size) { +unsigned PIC16TargetLowering::GetTmpOffsetForFI(unsigned FI, unsigned size, + MachineFunction &MF) const { + PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>(); + std::map<unsigned, unsigned> &FiTmpOffsetMap = FuncInfo->getFiTmpOffsetMap(); + std::map<unsigned, unsigned>::iterator MapIt = FiTmpOffsetMap.find(FI); if (MapIt != FiTmpOffsetMap.end()) return MapIt->second; // This FI (FrameIndex) is not yet mapped, so map it - FiTmpOffsetMap[FI] = TmpSize; - TmpSize += size; + FiTmpOffsetMap[FI] = FuncInfo->getTmpSize(); + FuncInfo->setTmpSize(FuncInfo->getTmpSize() + size); return FiTmpOffsetMap[FI]; } +void PIC16TargetLowering::ResetTmpOffsetMap(SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>(); + FuncInfo->getFiTmpOffsetMap().clear(); + FuncInfo->setTmpSize(0); +} + // To extract chain value from the SDValue Nodes // This function will help to maintain the chain extracting // code at one place. In case of any change in future it will @@ -390,7 +403,7 @@ PIC16TargetLowering::setPIC16LibcallName(PIC16ISD::PIC16Libcall Call, } const char * -PIC16TargetLowering::getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) { +PIC16TargetLowering::getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) const { return PIC16LibcallNames[Call]; } @@ -398,7 +411,7 @@ SDValue PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, - SelectionDAG &DAG, DebugLoc dl) { + SelectionDAG &DAG, DebugLoc dl) const { TargetLowering::ArgListTy Args; Args.reserve(NumOps); @@ -456,7 +469,7 @@ const char *PIC16TargetLowering::getTargetNodeName(unsigned Opcode) const { void PIC16TargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { switch (N->getOpcode()) { case ISD::GlobalAddress: @@ -483,7 +496,8 @@ void PIC16TargetLowering::ReplaceNodeResults(SDNode *N, } } -SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N, + SelectionDAG &DAG) const { // Currently handling FrameIndex of size MVT::i16 only // One example of this scenario is when return value is written on @@ -518,7 +532,7 @@ SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) { } -SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) const { StoreSDNode *St = cast<StoreSDNode>(N); SDValue Chain = St->getChain(); SDValue Src = St->getValue(); @@ -636,8 +650,9 @@ SDValue PIC16TargetLowering::ExpandStore(SDNode *N, SelectionDAG &DAG) { } } -SDValue PIC16TargetLowering::ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG) -{ +SDValue PIC16TargetLowering::ExpandExternalSymbol(SDNode *N, + SelectionDAG &DAG) + const { ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(SDValue(N, 0)); // FIXME there isn't really debug info here DebugLoc dl = ES->getDebugLoc(); @@ -651,7 +666,8 @@ SDValue PIC16TargetLowering::ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG) } // ExpandGlobalAddress - -SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N, + SelectionDAG &DAG) const { GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(SDValue(N, 0)); // FIXME there isn't really debug info here DebugLoc dl = G->getDebugLoc(); @@ -666,7 +682,7 @@ SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i16, Lo, Hi); } -bool PIC16TargetLowering::isDirectAddress(const SDValue &Op) { +bool PIC16TargetLowering::isDirectAddress(const SDValue &Op) const { assert (Op.getNode() != NULL && "Can't operate on NULL SDNode!!"); if (Op.getOpcode() == ISD::BUILD_PAIR) { @@ -677,7 +693,7 @@ bool PIC16TargetLowering::isDirectAddress(const SDValue &Op) { } // Return true if DirectAddress is in ROM_SPACE -bool PIC16TargetLowering::isRomAddress(const SDValue &Op) { +bool PIC16TargetLowering::isRomAddress(const SDValue &Op) const { // RomAddress is a GlobalAddress in ROM_SPACE_ // If the Op is not a GlobalAddress return NULL without checking @@ -703,7 +719,7 @@ bool PIC16TargetLowering::isRomAddress(const SDValue &Op) { // parts of Op in Lo and Hi. void PIC16TargetLowering::GetExpandedParts(SDValue Op, SelectionDAG &DAG, - SDValue &Lo, SDValue &Hi) { + SDValue &Lo, SDValue &Hi) const { SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); EVT NewVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); @@ -720,11 +736,12 @@ void PIC16TargetLowering::GetExpandedParts(SDValue Op, SelectionDAG &DAG, // Legalize FrameIndex into ExternalSymbol and offset. void PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, - SDValue &ES, int &Offset) { + SDValue &ES, int &Offset) const { MachineFunction &MF = DAG.getMachineFunction(); const Function *Func = MF.getFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); + PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>(); const std::string Name = Func->getName(); FrameIndexSDNode *FR = dyn_cast<FrameIndexSDNode>(Op); @@ -736,8 +753,8 @@ PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, // the list and add their requested size. unsigned FIndex = FR->getIndex(); const char *tmpName; - if (FIndex < ReservedFrameCount) { - tmpName = createESName(PAN::getFrameLabel(Name)); + if (FIndex < FuncInfo->getReservedFrameCount()) { + tmpName = ESNames::createESName(PAN::getFrameLabel(Name)); ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); Offset = 0; for (unsigned i=0; i<FIndex ; ++i) { @@ -745,9 +762,9 @@ PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, } } else { // FrameIndex has been made for some temporary storage - tmpName = createESName(PAN::getTempdataLabel(Name)); + tmpName = ESNames::createESName(PAN::getTempdataLabel(Name)); ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); - Offset = GetTmpOffsetForFI(FIndex, MFI->getObjectSize(FIndex)); + Offset = GetTmpOffsetForFI(FIndex, MFI->getObjectSize(FIndex), MF); } return; @@ -767,7 +784,7 @@ PIC16TargetLowering::LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, void PIC16TargetLowering::LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, SDValue &Lo, SDValue &Hi, - unsigned &Offset, DebugLoc dl) { + unsigned &Offset, DebugLoc dl) const { // Offset, by default, should be 0 Offset = 0; @@ -846,7 +863,7 @@ void PIC16TargetLowering::LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, return; } -SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) const { LoadSDNode *LD = dyn_cast<LoadSDNode>(SDValue(N, 0)); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); @@ -961,7 +978,7 @@ SDValue PIC16TargetLowering::ExpandLoad(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, BP, Chain); } -SDValue PIC16TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { // We should have handled larger operands in type legalizer itself. assert (Op.getValueType() == MVT::i8 && "illegal shift to lower"); @@ -991,7 +1008,7 @@ SDValue PIC16TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { return Call; } -SDValue PIC16TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { // We should have handled larger operands in type legalizer itself. assert (Op.getValueType() == MVT::i8 && "illegal multiply to lower"); @@ -1007,7 +1024,7 @@ SDValue PIC16TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { void PIC16TargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { SDValue Op = SDValue(N, 0); SDValue Res; unsigned i; @@ -1031,7 +1048,8 @@ PIC16TargetLowering::LowerOperationWrapper(SDNode *N, } } -SDValue PIC16TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { switch (Op.getOpcode()) { case ISD::ADD: case ISD::ADDC: @@ -1065,7 +1083,7 @@ SDValue PIC16TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op, SelectionDAG &DAG, - DebugLoc dl) { + DebugLoc dl) const { assert (Op.getValueType() == MVT::i8 && "illegal value type to store on stack."); @@ -1077,7 +1095,7 @@ SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op, // Put the value on stack. // Get a stack slot index and convert to es. int FI = MF.getFrameInfo()->CreateStackObject(1, 1, false); - const char *tmpName = createESName(PAN::getTempdataLabel(FuncName)); + const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName)); SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); // Store the value to ES. @@ -1085,14 +1103,14 @@ SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op, DAG.getEntryNode(), Op, ES, DAG.getConstant (1, MVT::i8), // Banksel. - DAG.getConstant (GetTmpOffsetForFI(FI, 1), + DAG.getConstant (GetTmpOffsetForFI(FI, 1, MF), MVT::i8)); // Load the value from ES. SDVTList Tys = DAG.getVTList(MVT::i8, MVT::Other); SDValue Load = DAG.getNode(PIC16ISD::PIC16Load, dl, Tys, Store, ES, DAG.getConstant (1, MVT::i8), - DAG.getConstant (GetTmpOffsetForFI(FI, 1), + DAG.getConstant (GetTmpOffsetForFI(FI, 1, MF), MVT::i8)); return Load.getValue(0); @@ -1103,7 +1121,7 @@ LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, SDValue DataAddr_Lo, SDValue DataAddr_Hi, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { unsigned NumOps = Outs.size(); // If call has no arguments then do nothing and return. @@ -1140,7 +1158,7 @@ LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, SDValue PIC16TargetLowering:: LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { unsigned NumOps = Outs.size(); std::string Name; SDValue Arg, StoreAt; @@ -1197,7 +1215,7 @@ LowerIndirectCallReturn(SDValue Chain, SDValue InFlag, SDValue DataAddr_Lo, SDValue DataAddr_Hi, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { unsigned RetVals = Ins.size(); // If call does not have anything to return @@ -1224,7 +1242,7 @@ SDValue PIC16TargetLowering:: LowerDirectCallReturn(SDValue RetLabel, SDValue Chain, SDValue InFlag, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Currently handling primitive types only. They will come in // i8 parts @@ -1264,7 +1282,7 @@ SDValue PIC16TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { // Number of values to return unsigned NumRet = Outs.size(); @@ -1275,7 +1293,7 @@ PIC16TargetLowering::LowerReturn(SDValue Chain, const Function *F = MF.getFunction(); std::string FuncName = F->getName(); - const char *tmpName = createESName(PAN::getFrameLabel(FuncName)); + const char *tmpName = ESNames::createESName(PAN::getFrameLabel(FuncName)); SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); SDValue BS = DAG.getConstant(1, MVT::i8); SDValue RetVal; @@ -1292,7 +1310,7 @@ PIC16TargetLowering::LowerReturn(SDValue Chain, void PIC16TargetLowering:: GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain, SDValue &DataAddr_Lo, SDValue &DataAddr_Hi, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { assert (Callee.getOpcode() == PIC16ISD::PIC16Connect && "Don't know what to do of such callee!!"); SDValue ZeroOperand = DAG.getConstant(0, MVT::i8); @@ -1358,7 +1376,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // PIC16 target does not yet support tail call optimization. isTailCall = false; @@ -1409,7 +1427,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (IsDirectCall) { // Considering the GlobalAddressNode case here. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - GlobalValue *GV = G->getGlobal(); + const GlobalValue *GV = G->getGlobal(); Callee = DAG.getTargetGlobalAddress(GV, MVT::i8); Name = G->getGlobal()->getName(); } else {// Considering the ExternalSymbol case here @@ -1419,11 +1437,11 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee, } // Label for argument passing - const char *argFrame = createESName(PAN::getArgsLabel(Name)); + const char *argFrame = ESNames::createESName(PAN::getArgsLabel(Name)); ArgLabel = DAG.getTargetExternalSymbol(argFrame, MVT::i8); // Label for reading return value - const char *retName = createESName(PAN::getRetvalLabel(Name)); + const char *retName = ESNames::createESName(PAN::getRetvalLabel(Name)); RetLabel = DAG.getTargetExternalSymbol(retName, MVT::i8); } else { // if indirect call @@ -1476,7 +1494,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee, DataAddr_Hi, Ins, dl, DAG, InVals); } -bool PIC16TargetLowering::isDirectLoad(const SDValue Op) { +bool PIC16TargetLowering::isDirectLoad(const SDValue Op) const { if (Op.getOpcode() == PIC16ISD::PIC16Load) if (Op.getOperand(1).getOpcode() == ISD::TargetGlobalAddress || Op.getOperand(1).getOpcode() == ISD::TargetExternalSymbol) @@ -1490,7 +1508,7 @@ bool PIC16TargetLowering::isDirectLoad(const SDValue Op) { // no instruction that can operation on two registers. Most insns take // one register and one memory operand (addwf) / Constant (addlw). bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { // If one of the operand is a constant, return false. if (Op.getOperand(0).getOpcode() == ISD::Constant || Op.getOperand(1).getOpcode() == ISD::Constant) @@ -1512,7 +1530,9 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, // Direct load operands are folded in binary operations. But before folding // verify if this folding is legal. Fold only if it is legal otherwise // convert this direct load to a separate memory operation. - if(ISel->IsLegalToFold(Op.getOperand(0), Op.getNode(), Op.getNode())) + if (SelectionDAGISel::IsLegalToFold(Op.getOperand(0), + Op.getNode(), Op.getNode(), + CodeGenOpt::Default)) return false; else MemOp = 0; @@ -1539,7 +1559,9 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, // Direct load operands are folded in binary operations. But before folding // verify if this folding is legal. Fold only if it is legal otherwise // convert this direct load to a separate memory operation. - if(ISel->IsLegalToFold(Op.getOperand(1), Op.getNode(), Op.getNode())) + if (SelectionDAGISel::IsLegalToFold(Op.getOperand(1), + Op.getNode(), Op.getNode(), + CodeGenOpt::Default)) return false; else MemOp = 1; @@ -1550,7 +1572,7 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, // LowerBinOp - Lower a commutative binary operation that does not // affect status flag carry. -SDValue PIC16TargetLowering::LowerBinOp(SDValue Op, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::LowerBinOp(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // We should have handled larger operands in type legalizer itself. @@ -1571,7 +1593,7 @@ SDValue PIC16TargetLowering::LowerBinOp(SDValue Op, SelectionDAG &DAG) { // LowerADD - Lower all types of ADD operations including the ones // that affects carry. -SDValue PIC16TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const { // We should have handled larger operands in type legalizer itself. assert (Op.getValueType() == MVT::i8 && "illegal add to lower"); DebugLoc dl = Op.getDebugLoc(); @@ -1600,7 +1622,7 @@ SDValue PIC16TargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) { return Op; } -SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // We should have handled larger operands in type legalizer itself. assert (Op.getValueType() == MVT::i8 && "illegal sub to lower"); @@ -1647,15 +1669,19 @@ SDValue PIC16TargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) { return Op; } -void PIC16TargetLowering::InitReservedFrameCount(const Function *F) { +void PIC16TargetLowering::InitReservedFrameCount(const Function *F, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + PIC16MachineFunctionInfo *FuncInfo = MF.getInfo<PIC16MachineFunctionInfo>(); + unsigned NumArgs = F->arg_size(); bool isVoidFunc = (F->getReturnType()->getTypeID() == Type::VoidTyID); if (isVoidFunc) - ReservedFrameCount = NumArgs; + FuncInfo->setReservedFrameCount(NumArgs); else - ReservedFrameCount = NumArgs + 1; + FuncInfo->setReservedFrameCount(NumArgs + 1); } // LowerFormalArguments - Argument values are loaded from the @@ -1669,7 +1695,8 @@ PIC16TargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { unsigned NumArgVals = Ins.size(); // Get the callee's name to create the <fname>.args label to pass args. @@ -1678,12 +1705,12 @@ PIC16TargetLowering::LowerFormalArguments(SDValue Chain, std::string FuncName = F->getName(); // Reset the map of FI and TmpOffset - ResetTmpOffsetMap(); + ResetTmpOffsetMap(DAG); // Initialize the ReserveFrameCount - InitReservedFrameCount(F); + InitReservedFrameCount(F, DAG); // Create the <fname>.args external symbol. - const char *tmpName = createESName(PAN::getArgsLabel(FuncName)); + const char *tmpName = ESNames::createESName(PAN::getArgsLabel(FuncName)); SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); // Load arg values from the label + offset. @@ -1782,7 +1809,7 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, // Returns appropriate CMP insn and corresponding condition code in PIC16CC SDValue PIC16TargetLowering::getPIC16Cmp(SDValue LHS, SDValue RHS, unsigned CC, SDValue &PIC16CC, - SelectionDAG &DAG, DebugLoc dl) { + SelectionDAG &DAG, DebugLoc dl) const { PIC16CC::CondCodes CondCode = (PIC16CC::CondCodes) CC; // PIC16 sub is literal - W. So Swap the operands and condition if needed. @@ -1846,7 +1873,8 @@ SDValue PIC16TargetLowering::getPIC16Cmp(SDValue LHS, SDValue RHS, } -SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op, + SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); @@ -1874,8 +1902,7 @@ SDValue PIC16TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { MachineBasicBlock * PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); unsigned CC = (PIC16CC::CondCodes)MI->getOperand(3).getImm(); DebugLoc dl = MI->getDebugLoc(); @@ -1903,12 +1930,9 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - // Also inform sdisel of the edge changes. for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) { - EM->insert(std::make_pair(*I, sinkMBB)); + E = BB->succ_end(); I != E; ++I) sinkMBB->addSuccessor(*I); - } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. while (!BB->succ_empty()) @@ -1938,7 +1962,7 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, } -SDValue PIC16TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { +SDValue PIC16TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); // LHS of the condition. diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h index de1452015f60b..eea17f898365a 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.h +++ b/lib/Target/PIC16/PIC16ISelLowering.h @@ -18,7 +18,6 @@ #include "PIC16.h" #include "PIC16Subtarget.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Target/TargetLowering.h" #include <map> @@ -85,53 +84,52 @@ namespace llvm { /// getSetCCResultType - Return the ISD::SETCC ValueType virtual MVT::SimpleValueType getSetCCResultType(EVT ValType) const; virtual MVT::SimpleValueType getCmpLibcallReturnType() const; - SDValue LowerShift(SDValue Op, SelectionDAG &DAG); - SDValue LowerMUL(SDValue Op, SelectionDAG &DAG); - SDValue LowerADD(SDValue Op, SelectionDAG &DAG); - SDValue LowerSUB(SDValue Op, SelectionDAG &DAG); - SDValue LowerBinOp(SDValue Op, SelectionDAG &DAG); + SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBinOp(SDValue Op, SelectionDAG &DAG) const; // Call returns SDValue LowerDirectCallReturn(SDValue RetLabel, SDValue Chain, SDValue InFlag, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerIndirectCallReturn(SDValue Chain, SDValue InFlag, SDValue DataAddr_Lo, SDValue DataAddr_Hi, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; // Call arguments SDValue LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; SDValue LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, SDValue DataAddr_Lo, SDValue DataAddr_Hi, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue getPIC16Cmp(SDValue LHS, SDValue RHS, unsigned OrigCC, SDValue &CC, - SelectionDAG &DAG, DebugLoc dl); - virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + SelectionDAG &DAG, DebugLoc dl) const; + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; - - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG); + SelectionDAG &DAG) const; virtual void LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG); + SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, @@ -139,7 +137,7 @@ namespace llvm { bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, @@ -147,19 +145,19 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; - SDValue ExpandStore(SDNode *N, SelectionDAG &DAG); - SDValue ExpandLoad(SDNode *N, SelectionDAG &DAG); - SDValue ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG); - SDValue ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG); - SDValue ExpandFrameIndex(SDNode *N, SelectionDAG &DAG); + SDValue ExpandStore(SDNode *N, SelectionDAG &DAG) const; + SDValue ExpandLoad(SDNode *N, SelectionDAG &DAG) const; + SDValue ExpandGlobalAddress(SDNode *N, SelectionDAG &DAG) const; + SDValue ExpandExternalSymbol(SDNode *N, SelectionDAG &DAG) const; + SDValue ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) const; SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue PerformPIC16LoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; @@ -168,13 +166,11 @@ namespace llvm { // This function returns the Tmp Offset for FrameIndex. If any TmpOffset // already exists for the FI then it returns the same else it creates the // new offset and returns. - unsigned GetTmpOffsetForFI(unsigned FI, unsigned slot_size); - void ResetTmpOffsetMap() { FiTmpOffsetMap.clear(); SetTmpSize(0); } - void InitReservedFrameCount(const Function *F); - - // Return the size of Tmp variable - unsigned GetTmpSize() { return TmpSize; } - void SetTmpSize(unsigned Size) { TmpSize = Size; } + unsigned GetTmpOffsetForFI(unsigned FI, unsigned slot_size, + MachineFunction &MF) const; + void ResetTmpOffsetMap(SelectionDAG &DAG) const; + void InitReservedFrameCount(const Function *F, + SelectionDAG &DAG) const; /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *) const { @@ -184,43 +180,45 @@ namespace llvm { private: // If the Node is a BUILD_PAIR representing a direct Address, // then this function will return true. - bool isDirectAddress(const SDValue &Op); + bool isDirectAddress(const SDValue &Op) const; // If the Node is a DirectAddress in ROM_SPACE then this // function will return true - bool isRomAddress(const SDValue &Op); + bool isRomAddress(const SDValue &Op) const; // Extract the Lo and Hi component of Op. void GetExpandedParts(SDValue Op, SelectionDAG &DAG, SDValue &Lo, - SDValue &Hi); + SDValue &Hi) const; // Load pointer can be a direct or indirect address. In PIC16 direct // addresses need Banksel and Indirect addresses need to be loaded to // FSR first. Handle address specific cases here. void LegalizeAddress(SDValue Ptr, SelectionDAG &DAG, SDValue &Chain, - SDValue &NewPtr, unsigned &Offset, DebugLoc dl); + SDValue &NewPtr, unsigned &Offset, DebugLoc dl) const; // FrameIndex should be broken down into ExternalSymbol and FrameOffset. void LegalizeFrameIndex(SDValue Op, SelectionDAG &DAG, SDValue &ES, - int &Offset); + int &Offset) const; // For indirect calls data address of the callee frame need to be // extracted. This function fills the arguments DataAddr_Lo and // DataAddr_Hi with the address of the callee frame. void GetDataAddress(DebugLoc dl, SDValue Callee, SDValue &Chain, SDValue &DataAddr_Lo, SDValue &DataAddr_Hi, - SelectionDAG &DAG); + SelectionDAG &DAG) const; // We can not have both operands of a binary operation in W. // This function is used to put one operand on stack and generate a load. - SDValue ConvertToMemOperand(SDValue Op, SelectionDAG &DAG, DebugLoc dl); + SDValue ConvertToMemOperand(SDValue Op, SelectionDAG &DAG, + DebugLoc dl) const; // This function checks if we need to put an operand of an operation on // stack and generate a load or not. // DAG parameter is required to access DAG information during // analysis. - bool NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, SelectionDAG &DAG); + bool NeedToConvertToMemOp(SDValue Op, unsigned &MemOp, + SelectionDAG &DAG) const; /// Subtarget - Keep a pointer to the PIC16Subtarget around so that we can /// make the right decision when generating code for different targets. @@ -233,31 +231,15 @@ namespace llvm { // To set and retrieve the lib call names. void setPIC16LibcallName(PIC16ISD::PIC16Libcall Call, const char *Name); - const char *getPIC16LibcallName(PIC16ISD::PIC16Libcall Call); + const char *getPIC16LibcallName(PIC16ISD::PIC16Libcall Call) const; // Make PIC16 Libcall. SDValue MakePIC16Libcall(PIC16ISD::PIC16Libcall Call, EVT RetVT, const SDValue *Ops, unsigned NumOps, bool isSigned, - SelectionDAG &DAG, DebugLoc dl); + SelectionDAG &DAG, DebugLoc dl) const; // Check if operation has a direct load operand. - inline bool isDirectLoad(const SDValue Op); - - public: - // Keep a pointer to SelectionDAGISel to access its public - // interface (It is required during legalization) - SelectionDAGISel *ISel; - - private: - // The frameindexes generated for spill/reload are stack based. - // This maps maintain zero based indexes for these FIs. - std::map<unsigned, unsigned> FiTmpOffsetMap; - unsigned TmpSize; - - // These are the frames for return value and argument passing - // These FrameIndices will be expanded to foo.frame external symbol - // and all others will be expanded to foo.tmp external symbol. - unsigned ReservedFrameCount; + inline bool isDirectLoad(const SDValue Op) const; }; } // namespace llvm diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp index 365e8b20b7a1f..9e415e069a9c5 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.cpp +++ b/lib/Target/PIC16/PIC16InstrInfo.cpp @@ -71,14 +71,14 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC) const { - PIC16TargetLowering *PTLI = TM.getTargetLowering(); + const PIC16TargetLowering *PTLI = TM.getTargetLowering(); DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); const Function *Func = MBB.getParent()->getFunction(); const std::string FuncName = Func->getName(); - const char *tmpName = createESName(PAN::getTempdataLabel(FuncName)); + const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName)); // On the order of operands here: think "movwf SrcReg, tmp_slot, offset". if (RC == PIC16::GPRRegisterClass) { @@ -86,7 +86,7 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, //MachineRegisterInfo &RI = MF.getRegInfo(); BuildMI(MBB, I, DL, get(PIC16::movwf)) .addReg(SrcReg, getKillRegState(isKill)) - .addImm(PTLI->GetTmpOffsetForFI(FI, 1)) + .addImm(PTLI->GetTmpOffsetForFI(FI, 1, *MBB.getParent())) .addExternalSymbol(tmpName) .addImm(1); // Emit banksel for it. } @@ -101,7 +101,7 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, : PIC16::save_fsr1; BuildMI(MBB, I, DL, get(opcode)) .addReg(SrcReg, getKillRegState(isKill)) - .addImm(PTLI->GetTmpOffsetForFI(FI, 3)) + .addImm(PTLI->GetTmpOffsetForFI(FI, 3, *MBB.getParent())) .addExternalSymbol(tmpName) .addImm(1); // Emit banksel for it. } @@ -113,21 +113,21 @@ void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC) const { - PIC16TargetLowering *PTLI = TM.getTargetLowering(); + const PIC16TargetLowering *PTLI = TM.getTargetLowering(); DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); const Function *Func = MBB.getParent()->getFunction(); const std::string FuncName = Func->getName(); - const char *tmpName = createESName(PAN::getTempdataLabel(FuncName)); + const char *tmpName = ESNames::createESName(PAN::getTempdataLabel(FuncName)); // On the order of operands here: think "movf FrameIndex, W". if (RC == PIC16::GPRRegisterClass) { //MachineFunction &MF = *MBB.getParent(); //MachineRegisterInfo &RI = MF.getRegInfo(); BuildMI(MBB, I, DL, get(PIC16::movf), DestReg) - .addImm(PTLI->GetTmpOffsetForFI(FI, 1)) + .addImm(PTLI->GetTmpOffsetForFI(FI, 1, *MBB.getParent())) .addExternalSymbol(tmpName) .addImm(1); // Emit banksel for it. } @@ -141,7 +141,7 @@ void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, unsigned opcode = (DestReg == PIC16::FSR0) ? PIC16::restore_fsr0 : PIC16::restore_fsr1; BuildMI(MBB, I, DL, get(opcode), DestReg) - .addImm(PTLI->GetTmpOffsetForFI(FI, 3)) + .addImm(PTLI->GetTmpOffsetForFI(FI, 3, *MBB.getParent())) .addExternalSymbol(tmpName) .addImm(1); // Emit banksel for it. } diff --git a/lib/Target/PIC16/PIC16MachineFunctionInfo.h b/lib/Target/PIC16/PIC16MachineFunctionInfo.h new file mode 100644 index 0000000000000..bdf50867f2e13 --- /dev/null +++ b/lib/Target/PIC16/PIC16MachineFunctionInfo.h @@ -0,0 +1,52 @@ +//====- PIC16MachineFuctionInfo.h - PIC16 machine function info -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares PIC16-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#ifndef PIC16MACHINEFUNCTIONINFO_H +#define PIC16MACHINEFUNCTIONINFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/// PIC16MachineFunctionInfo - This class is derived from MachineFunction +/// private PIC16 target-specific information for each MachineFunction. +class PIC16MachineFunctionInfo : public MachineFunctionInfo { + // The frameindexes generated for spill/reload are stack based. + // This maps maintain zero based indexes for these FIs. + std::map<unsigned, unsigned> FiTmpOffsetMap; + unsigned TmpSize; + + // These are the frames for return value and argument passing + // These FrameIndices will be expanded to foo.frame external symbol + // and all others will be expanded to foo.tmp external symbol. + unsigned ReservedFrameCount; + +public: + PIC16MachineFunctionInfo() + : TmpSize(0), ReservedFrameCount(0) {} + + explicit PIC16MachineFunctionInfo(MachineFunction &MF) + : TmpSize(0), ReservedFrameCount(0) {} + + std::map<unsigned, unsigned> &getFiTmpOffsetMap() { return FiTmpOffsetMap; } + + unsigned getTmpSize() const { return TmpSize; } + void setTmpSize(unsigned Size) { TmpSize = Size; } + + unsigned getReservedFrameCount() const { return ReservedFrameCount; } + void setReservedFrameCount(unsigned Count) { ReservedFrameCount = Count; } +}; + +} // End llvm namespace + +#endif diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp index 865da35de3c57..c282521be3241 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp +++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp @@ -172,7 +172,7 @@ void PIC16Cloner::CloneAutos(Function *F) { VarName = I->getName().str(); if (PAN::isLocalToFunc(FnName, VarName)) { // Auto variable for current function found. Clone it. - GlobalVariable *GV = I; + const GlobalVariable *GV = I; const Type *InitTy = GV->getInitializer()->getType(); GlobalVariable *ClonedGV = diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp new file mode 100644 index 0000000000000..76c6c6091e5a7 --- /dev/null +++ b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- PIC16SelectionDAGInfo.cpp - PIC16 SelectionDAG Info ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PIC16SelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "pic16-selectiondag-info" +#include "PIC16SelectionDAGInfo.h" +using namespace llvm; + +PIC16SelectionDAGInfo::PIC16SelectionDAGInfo() { +} + +PIC16SelectionDAGInfo::~PIC16SelectionDAGInfo() { +} diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.h b/lib/Target/PIC16/PIC16SelectionDAGInfo.h new file mode 100644 index 0000000000000..112480e50cc79 --- /dev/null +++ b/lib/Target/PIC16/PIC16SelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- PIC16SelectionDAGInfo.h - PIC16 SelectionDAG Info -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PIC16 subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef PIC16SELECTIONDAGINFO_H +#define PIC16SELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class PIC16SelectionDAGInfo : public TargetSelectionDAGInfo { +public: + PIC16SelectionDAGInfo(); + ~PIC16SelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/PIC16/PIC16TargetMachine.h b/lib/Target/PIC16/PIC16TargetMachine.h index b11fdd5dba503..849845afd430e 100644 --- a/lib/Target/PIC16/PIC16TargetMachine.h +++ b/lib/Target/PIC16/PIC16TargetMachine.h @@ -50,8 +50,8 @@ public: return &(InstrInfo.getRegisterInfo()); } - virtual PIC16TargetLowering *getTargetLowering() const { - return const_cast<PIC16TargetLowering*>(&TLInfo); + virtual const PIC16TargetLowering *getTargetLowering() const { + return &TLInfo; } virtual bool addInstSelector(PassManagerBase &PM, diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.cpp b/lib/Target/PIC16/PIC16TargetObjectFile.cpp index b891c18c4643f..ff0f971cc3823 100644 --- a/lib/Target/PIC16/PIC16TargetObjectFile.cpp +++ b/lib/Target/PIC16/PIC16TargetObjectFile.cpp @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// #include "PIC16TargetObjectFile.h" -#include "PIC16ISelLowering.h" #include "PIC16TargetMachine.h" #include "PIC16Section.h" #include "llvm/DerivedTypes.h" @@ -27,7 +26,7 @@ PIC16TargetObjectFile::~PIC16TargetObjectFile() { /// Find a pic16 section. Return null if not found. Do not create one. PIC16Section *PIC16TargetObjectFile:: -findPIC16Section(const std::string &Name) { +findPIC16Section(const std::string &Name) const { /// Return if we have an already existing one. PIC16Section *Entry = SectionsByName[Name]; if (Entry) @@ -134,7 +133,7 @@ void PIC16TargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &tm){ const MCSection * PIC16TargetObjectFile::allocateUDATA(const GlobalVariable *GV) const { assert(GV->hasInitializer() && "This global doesn't need space"); - Constant *C = GV->getInitializer(); + const Constant *C = GV->getInitializer(); assert(C->isNullValue() && "Unitialized globals has non-zero initializer"); // Find how much space this global needs. @@ -169,7 +168,7 @@ PIC16TargetObjectFile::allocateUDATA(const GlobalVariable *GV) const { const MCSection * PIC16TargetObjectFile::allocateIDATA(const GlobalVariable *GV) const{ assert(GV->hasInitializer() && "This global doesn't need space"); - Constant *C = GV->getInitializer(); + const Constant *C = GV->getInitializer(); assert(!C->isNullValue() && "initialized globals has zero initializer"); assert(GV->getType()->getAddressSpace() == PIC16ISD::RAM_SPACE && "can allocate initialized RAM data only"); diff --git a/lib/Target/PIC16/PIC16TargetObjectFile.h b/lib/Target/PIC16/PIC16TargetObjectFile.h index cf8bf848e45ec..b1eb9f9d854a5 100644 --- a/lib/Target/PIC16/PIC16TargetObjectFile.h +++ b/lib/Target/PIC16/PIC16TargetObjectFile.h @@ -122,7 +122,7 @@ namespace llvm { void Initialize(MCContext &Ctx, const TargetMachine &TM); /// Return the section with the given Name. Null if not found. - PIC16Section *findPIC16Section(const std::string &Name); + PIC16Section *findPIC16Section(const std::string &Name) const; /// Override section allocations for user specified sections. virtual const MCSection * diff --git a/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt index 236b264d8f80b..42cd4862a98fd 100644 --- a/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt +++ b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt @@ -3,4 +3,4 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/ add_llvm_library(LLVMPowerPCAsmPrinter PPCAsmPrinter.cpp ) -add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen)
\ No newline at end of file +add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen) diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp index 605656493c4bc..e35dc579f2cd5 100644 --- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp @@ -21,6 +21,7 @@ #include "PPCPredicates.h" #include "PPCTargetMachine.h" #include "PPCSubtarget.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" @@ -199,8 +200,8 @@ namespace { raw_ostream &O) { const MachineOperand &MO = MI->getOperand(OpNo); if (TM.getRelocationModel() != Reloc::Static) { - if (MO.getType() == MachineOperand::MO_GlobalAddress) { - GlobalValue *GV = MO.getGlobal(); + if (MO.isGlobal()) { + const GlobalValue *GV = MO.getGlobal(); if (GV->isDeclaration() || GV->isWeakForLinker()) { // Dynamically-resolved functions need a stub for the function. MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub"); @@ -213,7 +214,7 @@ namespace { return; } } - if (MO.getType() == MachineOperand::MO_ExternalSymbol) { + if (MO.isSymbol()) { SmallString<128> TempNameStr; TempNameStr += StringRef(MO.getSymbolName()); TempNameStr += StringRef("$stub"); @@ -311,7 +312,7 @@ namespace { void printTOCEntryLabel(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { const MachineOperand &MO = MI->getOperand(OpNo); - assert(MO.getType() == MachineOperand::MO_GlobalAddress); + assert(MO.isGlobal()); MCSymbol *Sym = Mang->getSymbol(MO.getGlobal()); // Map symbol -> label of TOC entry. @@ -405,7 +406,7 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { } case MachineOperand::MO_GlobalAddress: { // Computing the address of a global symbol, not calling it. - GlobalValue *GV = MO.getGlobal(); + const GlobalValue *GV = MO.getGlobal(); MCSymbol *SymToPrint; // External or weakly linked global variables need non-lazily-resolved stubs @@ -535,6 +536,23 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallString<128> Str; raw_svector_ostream O(Str); + if (MI->getOpcode() == TargetOpcode::DBG_VALUE) { + unsigned NOps = MI->getNumOperands(); + assert(NOps==4); + O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); + O << V.getName(); + O << " <- "; + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); + O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 1, O); + O << ']'; + O << "+"; + printOperand(MI, NOps-2, O); + OutStreamer.EmitRawText(O.str()); + return; + } // Check for slwi/srwi mnemonics. if (MI->getOpcode() == PPC::RLWINM) { unsigned char SH = MI->getOperand(2).getImm(); @@ -649,18 +667,18 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { // Prime text sections so they are adjacent. This reduces the likelihood a // large data or debug section causes a branch to exceed 16M limit. - TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering()); + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection()); if (TM.getRelocationModel() == Reloc::PIC_) { OutStreamer.SwitchSection( - TLOFMacho.getMachOSection("__TEXT", "__picsymbolstub1", + OutContext.getMachOSection("__TEXT", "__picsymbolstub1", MCSectionMachO::S_SYMBOL_STUBS | MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 32, SectionKind::getText())); } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) { OutStreamer.SwitchSection( - TLOFMacho.getMachOSection("__TEXT","__symbol_stub1", + OutContext.getMachOSection("__TEXT","__symbol_stub1", MCSectionMachO::S_SYMBOL_STUBS | MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 16, SectionKind::getText())); @@ -686,8 +704,8 @@ void PPCDarwinAsmPrinter:: EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64; - TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering()); + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); // .lazy_symbol_pointer const MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection(); @@ -695,10 +713,10 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { // Output stubs for dynamically-linked functions if (TM.getRelocationModel() == Reloc::PIC_) { const MCSection *StubSection = - TLOFMacho.getMachOSection("__TEXT", "__picsymbolstub1", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 32, SectionKind::getText()); + OutContext.getMachOSection("__TEXT", "__picsymbolstub1", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 32, SectionKind::getText()); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { OutStreamer.SwitchSection(StubSection); EmitAlignment(4); @@ -742,10 +760,10 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) { } const MCSection *StubSection = - TLOFMacho.getMachOSection("__TEXT","__symbol_stub1", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 16, SectionKind::getText()); + OutContext.getMachOSection("__TEXT","__symbol_stub1", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 16, SectionKind::getText()); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { MCSymbol *Stub = Stubs[i].first; MCSymbol *RawSym = Stubs[i].second.getPointer(); @@ -782,8 +800,8 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64; // Darwin/PPC always uses mach-o. - TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering()); + const TargetLoweringObjectFileMachO &TLOFMacho = + static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering()); MachineModuleInfoMachO &MMIMacho = MMI->getObjFileInfo<MachineModuleInfoMachO>(); @@ -794,8 +812,8 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) { if (MAI->doesSupportExceptionHandling() && MMI) { // Add the (possibly multiple) personalities to the set of global values. // Only referenced functions get into the Personalities list. - const std::vector<Function *> &Personalities = MMI->getPersonalities(); - for (std::vector<Function *>::const_iterator I = Personalities.begin(), + const std::vector<const Function*> &Personalities = MMI->getPersonalities(); + for (std::vector<const Function*>::const_iterator I = Personalities.begin(), E = Personalities.end(); I != E; ++I) { if (*I) { MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr"); diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index c997c5cfc7a0d..7ffc5eb5f3118 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -24,6 +24,7 @@ add_llvm_target(PowerPCCodeGen PPCRegisterInfo.cpp PPCSubtarget.cpp PPCTargetMachine.cpp + PPCSelectionDAGInfo.cpp ) target_link_libraries (LLVMPowerPCCodeGen LLVMSelectionDAG) diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index f7c27d40a5b37..361fa70fb4c45 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -202,7 +202,7 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI, MachineRelocation R; if (MO.isGlobal()) { R = MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - MO.getGlobal(), 0, + const_cast<GlobalValue *>(MO.getGlobal()), 0, isa<Function>(MO.getGlobal())); } else if (MO.isSymbol()) { R = MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 1e323849d1ef2..3d9f8aa6ee5c6 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -16,7 +16,6 @@ #include "PPC.h" #include "PPCPredicates.h" #include "PPCTargetMachine.h" -#include "PPCISelLowering.h" #include "PPCHazardRecognizers.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" @@ -41,8 +40,8 @@ namespace { /// instructions for SelectionDAG operations. /// class PPCDAGToDAGISel : public SelectionDAGISel { - PPCTargetMachine &TM; - PPCTargetLowering &PPCLowering; + const PPCTargetMachine &TM; + const PPCTargetLowering &PPCLowering; const PPCSubtarget &PPCSubTarget; unsigned GlobalBaseReg; public: diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 9cd01be54e42d..6f1195393bd05 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -397,7 +397,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const { - TargetMachine &TM = getTargetMachine(); + const TargetMachine &TM = getTargetMachine(); // Darwin passes everything on 4 byte boundary. if (TM.getSubtarget<PPCSubtarget>().isDarwin()) return 4; @@ -476,7 +476,7 @@ static bool isFloatingPointZero(SDValue Op) { else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { // Maybe this has already been legalized into the constant pool? if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) - if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) return CFP->getValueAPF().isZero(); } return false; @@ -1095,10 +1095,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, //===----------------------------------------------------------------------===// SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); - Constant *C = CP->getConstVal(); + const Constant *C = CP->getConstVal(); SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); SDValue Zero = DAG.getConstant(0, PtrVT); // FIXME there isn't really any debug info here @@ -1129,7 +1129,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, return Lo; } -SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); @@ -1163,16 +1163,17 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { } SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { llvm_unreachable("TLS not implemented for PPC."); return SDValue(); // Not reached } -SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); DebugLoc DL = Op.getDebugLoc(); - BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); SDValue TgtBA = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true); SDValue Zero = DAG.getConstant(0, PtrVT); SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, TgtBA, Zero); @@ -1199,10 +1200,10 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { } SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); - GlobalValue *GV = GSDN->getGlobal(); + const GlobalValue *GV = GSDN->getGlobal(); SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); SDValue Zero = DAG.getConstant(0, PtrVT); // FIXME there isn't really any debug info here @@ -1247,7 +1248,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, false, false, 0); } -SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); DebugLoc dl = Op.getDebugLoc(); @@ -1291,17 +1292,14 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { } SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, - int VarArgsFrameIndex, - int VarArgsStackOffset, - unsigned VarArgsNumGPR, - unsigned VarArgsNumFPR, - const PPCSubtarget &Subtarget) { + const PPCSubtarget &Subtarget) const { llvm_unreachable("VAARG not yet implemented for the SVR4 ABI!"); return SDValue(); // Not reached } -SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -1343,18 +1341,17 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) { } SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, - int VarArgsFrameIndex, - int VarArgsStackOffset, - unsigned VarArgsNumGPR, - unsigned VarArgsNumFPR, - const PPCSubtarget &Subtarget) { + const PPCSubtarget &Subtarget) const { + MachineFunction &MF = DAG.getMachineFunction(); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + DebugLoc dl = Op.getDebugLoc(); if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, false, false, 0); @@ -1385,14 +1382,16 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, // } va_list[1]; - SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i32); - SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i32); + SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32); + SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - SDValue StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT); - SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), + PtrVT); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), + PtrVT); uint64_t FrameOffset = PtrVT.getSizeInBits()/8; SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); @@ -1525,7 +1524,8 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) { return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); @@ -1542,7 +1542,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // 32-bit SVR4 ABI Stack Frame Layout: // +-----------------------------------+ @@ -1575,6 +1575,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Potential tail calls could cause overwriting of argument stack slots. @@ -1688,24 +1689,27 @@ PPCTargetLowering::LowerFormalArguments_SVR4( }; const unsigned NumFPArgRegs = array_lengthof(FPArgRegs); - VarArgsNumGPR = CCInfo.getFirstUnallocated(GPArgRegs, NumGPArgRegs); - VarArgsNumFPR = CCInfo.getFirstUnallocated(FPArgRegs, NumFPArgRegs); + FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs, + NumGPArgRegs)); + FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs, + NumFPArgRegs)); // Make room for NumGPArgRegs and NumFPArgRegs. int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8; - VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, - CCInfo.getNextStackOffset(), - true, false); + FuncInfo->setVarArgsStackOffset( + MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, + CCInfo.getNextStackOffset(), + true, false)); - VarArgsFrameIndex = MFI->CreateStackObject(Depth, 8, false); - SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); + SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // The fixed integer arguments of a variadic function are // stored to the VarArgsFrameIndex on the stack. unsigned GPRIndex = 0; - for (; GPRIndex != VarArgsNumGPR; ++GPRIndex) { + for (; GPRIndex != FuncInfo->getVarArgsNumGPR(); ++GPRIndex) { SDValue Val = DAG.getRegister(GPArgRegs[GPRIndex], PtrVT); SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0, false, false, 0); @@ -1736,7 +1740,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // The double arguments are stored to the VarArgsFrameIndex // on the stack. unsigned FPRIndex = 0; - for (FPRIndex = 0; FPRIndex != VarArgsNumFPR; ++FPRIndex) { + for (FPRIndex = 0; FPRIndex != FuncInfo->getVarArgsNumFPR(); ++FPRIndex) { SDValue Val = DAG.getRegister(FPArgRegs[FPRIndex], MVT::f64); SDValue Store = DAG.getStore(Chain, dl, Val, FIN, NULL, 0, false, false, 0); @@ -1775,11 +1779,12 @@ PPCTargetLowering::LowerFormalArguments_Darwin( const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // TODO: add description of PPC stack frame format, or at least some docs. // MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); + PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; @@ -2090,9 +2095,10 @@ PPCTargetLowering::LowerFormalArguments_Darwin( if (isVarArg) { int Depth = ArgOffset; - VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, - Depth, true, false); - SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); + FuncInfo->setVarArgsFrameIndex( + MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, + Depth, true, false)); + SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // If this function is vararg, store any remaining integer argument regs // to their spots on the stack so that they may be loaded by deferencing the @@ -2359,7 +2365,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, SDValue &LROpOut, SDValue &FPOpOut, bool isDarwinABI, - DebugLoc dl) { + DebugLoc dl) const { if (SPDiff) { // Load the LR and FP stack slot for later adjusting. EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; @@ -2582,7 +2588,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(), @@ -2613,7 +2619,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, SDValue &Callee, int SPDiff, unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { std::vector<EVT> NodeTys; SmallVector<SDValue, 8> Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, @@ -2701,7 +2707,7 @@ PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { if (isTailCall) isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG); @@ -2724,7 +2730,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description // of the 32-bit SVR4 ABI stack frame layout. @@ -2930,7 +2936,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { unsigned NumOps = Outs.size(); @@ -3291,7 +3297,7 @@ SDValue PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), @@ -3323,7 +3329,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain, } SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget) { + const PPCSubtarget &Subtarget) const { // When we pop the dynamic allocation we need to restore the SP link. DebugLoc dl = Op.getDebugLoc(); @@ -3407,7 +3413,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget) { + const PPCSubtarget &Subtarget) const { // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); @@ -3428,7 +3434,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. -SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // Not FP? Not a fsel. if (!Op.getOperand(0).getValueType().isFloatingPoint() || !Op.getOperand(2).getValueType().isFloatingPoint()) @@ -3502,7 +3508,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { // FIXME: Split this code up when LegalizeDAGTypes lands. SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, - DebugLoc dl) { + DebugLoc dl) const { assert(Op.getOperand(0).getValueType().isFloatingPoint()); SDValue Src = Op.getOperand(0); if (Src.getValueType() == MVT::f32) @@ -3537,7 +3543,8 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, false, false, 0); } -SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // Don't handle ppc_fp128 here; let it be lowered to a libcall. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) @@ -3586,7 +3593,8 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { return FP; } -SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); /* The rounding mode is in bits 30:31 of FPSR, and has the following @@ -3649,7 +3657,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); } -SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); DebugLoc dl = Op.getDebugLoc(); @@ -3678,7 +3686,7 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) { return DAG.getMergeValues(OutOps, 2, dl); } -SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); unsigned BitWidth = VT.getSizeInBits(); @@ -3707,7 +3715,7 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) { return DAG.getMergeValues(OutOps, 2, dl); } -SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); @@ -3808,7 +3816,8 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, // selects to a single instruction, return Op. Otherwise, if we can codegen // this case more efficiently than a constant pool load, lower it to the // sequence of ops that should be used. -SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); @@ -4050,7 +4059,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, /// return the code it can be lowered into. Worst case, it can always be /// lowered into a vperm. SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -4216,7 +4225,7 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom /// lower, do it, otherwise return null. SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { // If this is a lowered altivec predicate compare, CompareOpc is set to the // opcode number of the comparison. DebugLoc dl = Op.getDebugLoc(); @@ -4284,12 +4293,12 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // Create a stack slot that is 16-byte aligned. MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT PtrVT = getPointerTy(); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); // Store the input value into Value#0 of the stack slot. @@ -4301,7 +4310,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, false, false, 0); } -SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); if (Op.getValueType() == MVT::v4i32) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); @@ -4362,7 +4371,7 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { /// LowerOperation - Provide custom lowering hooks for some operations. /// -SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Wasn't expecting to be able to lower this!"); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); @@ -4373,12 +4382,10 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); case ISD::VASTART: - return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, - VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); + return LowerVASTART(Op, DAG, PPCSubTarget); case ISD::VAARG: - return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, - VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); + return LowerVAARG(Op, DAG, PPCSubTarget); case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); case ISD::DYNAMIC_STACKALLOC: @@ -4412,7 +4419,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { void PPCTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { default: @@ -4677,8 +4684,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, MachineBasicBlock * PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); // To "insert" these instructions we actually have to insert their @@ -4716,12 +4722,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, sinkMBB); // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - // Also inform sdisel of the edge changes. for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) { - EM->insert(std::make_pair(*I, sinkMBB)); + E = BB->succ_end(); I != E; ++I) sinkMBB->addSuccessor(*I); - } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. while (!BB->succ_empty()) @@ -5032,7 +5035,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { - TargetMachine &TM = getTargetMachine(); + const TargetMachine &TM = getTargetMachine(); SelectionDAG &DAG = DCI.DAG; DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { @@ -5491,46 +5494,59 @@ bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { return false; } -SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - // Depths > 0 not supported yet! - if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0) - return SDValue(); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + // Make sure the function does not optimize away the store of the RA to + // the stack. MachineFunction &MF = DAG.getMachineFunction(); PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); + FuncInfo->setLRStoreRequired(); + bool isPPC64 = PPCSubTarget.isPPC64(); + bool isDarwinABI = PPCSubTarget.isDarwinABI(); + + if (Depth > 0) { + SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); + SDValue Offset = + + DAG.getConstant(PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI), + isPPC64? MVT::i64 : MVT::i32); + return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, getPointerTy(), + FrameAddr, Offset), + NULL, 0, false, false, 0); + } // Just load the return address off the stack. SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); - - // Make sure the function really does not optimize away the store of the RA - // to the stack. - FuncInfo->setLRStoreRequired(); - return DAG.getLoad(getPointerTy(), dl, - DAG.getEntryNode(), RetAddrFI, NULL, 0, - false, false, 0); + return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + RetAddrFI, NULL, 0, false, false, 0); } -SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { +SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - // Depths > 0 not supported yet! - if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0) - return SDValue(); + unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); bool isPPC64 = PtrVT == MVT::i64; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool is31 = (NoFramePointerElim || MFI->hasVarSizedObjects()) - && MFI->getStackSize(); - - if (isPPC64) - return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::X31 : PPC::X1, - MVT::i64); - else - return DAG.getCopyFromReg(DAG.getEntryNode(), dl, is31 ? PPC::R31 : PPC::R1, - MVT::i32); + MFI->setFrameAddressIsTaken(true); + bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) && + MFI->getStackSize() && + !MF.getFunction()->hasFnAttr(Attribute::Naked); + unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) : + (is31 ? PPC::R31 : PPC::R1); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, + PtrVT); + while (Depth--) + FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), + FrameAddr, NULL, 0, false, false, 0); + return FrameAddr; } bool @@ -5547,12 +5563,15 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// probably because the source does not need to be loaded. If /// 'NonScalarIntSafe' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded -/// from memory. It returns EVT::Other if SelectionDAG should be responsible -/// for determining it. +/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is +/// constant so it does not need to be loaded. +/// It returns EVT::Other if the type should be determined using generic +/// target-independent logic. EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool NonScalarIntSafe, - SelectionDAG &DAG) const { + bool MemcpyStrSrc, + MachineFunction &MF) const { if (this->PPCSubTarget.isPPC64()) { return MVT::i64; } else { diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index f816bddaf5a39..1d05f3d4ea698 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -233,14 +233,8 @@ namespace llvm { } class PPCTargetLowering : public TargetLowering { - int VarArgsFrameIndex; // FrameIndex for start of varargs area. - int VarArgsStackOffset; // StackOffset for start of stack - // arguments. - unsigned VarArgsNumGPR; // Index of the first unused integer - // register for parameter passing. - unsigned VarArgsNumFPR; // Index of the first unused double - // register for parameter passing. const PPCSubtarget &PPCSubTarget; + public: explicit PPCTargetLowering(PPCTargetMachine &TM); @@ -285,13 +279,13 @@ namespace llvm { /// LowerOperation - Provide custom lowering hooks for some operations. /// - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. /// virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG); + SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; @@ -302,9 +296,9 @@ namespace llvm { const SelectionDAG &DAG, unsigned Depth = 0) const; - virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, bool is64Bit, unsigned BinOpcode) const; @@ -355,12 +349,14 @@ namespace llvm { /// probably because the source does not need to be loaded. If /// 'NonScalarIntSafe' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded - /// from memory. It returns EVT::Other if SelectionDAG should be responsible - /// for determining it. + /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is + /// constant so it does not need to be loaded. + /// It returns EVT::Other if the type should be determined using generic + /// target-independent logic. virtual EVT - getOptimalMemOpType(uint64_t Size, - unsigned DstAlign, unsigned SrcAlign, - bool NonScalarIntSafe, SelectionDAG &DAG) const; + getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool NonScalarIntSafe, bool MemcpyStrSrc, + MachineFunction &MF) const; /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; @@ -382,46 +378,43 @@ namespace llvm { SDValue &LROpOut, SDValue &FPOpOut, bool isDarwinABI, - DebugLoc dl); + DebugLoc dl) const; - SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG); - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG); - SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG); + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, - int VarArgsFrameIndex, int VarArgsStackOffset, - unsigned VarArgsNumGPR, unsigned VarArgsNumFPR, - const PPCSubtarget &Subtarget); - SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, int VarArgsFrameIndex, - int VarArgsStackOffset, unsigned VarArgsNumGPR, - unsigned VarArgsNumFPR, const PPCSubtarget &Subtarget); + const PPCSubtarget &Subtarget) const; + SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) const; SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget); + const PPCSubtarget &Subtarget) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, - const PPCSubtarget &Subtarget); - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl); - SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG); - SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG); - SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG); - SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG); - SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG); - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG); - SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG); - SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG); - SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG); - SDValue LowerMUL(SDValue Op, SelectionDAG &DAG); + const PPCSubtarget &Subtarget) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const; + SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue FinishCall(CallingConv::ID CallConv, DebugLoc dl, bool isTailCall, bool isVarArg, SelectionDAG &DAG, @@ -431,14 +424,14 @@ namespace llvm { SDValue &Callee, int SPDiff, unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, @@ -446,26 +439,26 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; SDValue LowerFormalArguments_Darwin(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerFormalArguments_SVR4(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCall_Darwin(SDValue Chain, SDValue Callee, @@ -473,14 +466,14 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCall_SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; }; } diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 6b0a282af09ce..ae1fbd8220a7f 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -24,10 +24,13 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/MC/MCAsmInfo.h" -using namespace llvm; +namespace llvm { extern cl::opt<bool> EnablePPC32RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. extern cl::opt<bool> EnablePPC64RS; // FIXME (64-bit): See PPCRegisterInfo.cpp. +} + +using namespace llvm; PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) : TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm), @@ -642,6 +645,16 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MBB.insert(MI, NewMIs[i]); } +MachineInstr* +PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const { + MachineInstrBuilder MIB = BuildMI(MF, DL, get(PPC::DBG_VALUE)); + addFrameReference(MIB, FrameIx, 0, false).addImm(Offset).addMetadata(MDPtr); + return &*MIB; +} + /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into /// copy instructions, turning them into load/store instructions. MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, @@ -778,6 +791,7 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case PPC::DBG_LABEL: case PPC::EH_LABEL: case PPC::GC_LABEL: + case PPC::DBG_VALUE: return 0; default: return 4; // PowerPC instructions are all 4 bytes diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 57facac94354a..9fb6e7dce1f72 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -126,6 +126,12 @@ public: unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC) const; + virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, + uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const; + /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into /// copy instructions, turning them into load/store instructions. virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index b359dd33bd0e6..e2649c8b380f0 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -58,6 +58,18 @@ private: /// how the caller's stack pointer should be calculated (epilog/dynamicalloc). bool HasFastCall; + /// VarArgsFrameIndex - FrameIndex for start of varargs area. + int VarArgsFrameIndex; + /// VarArgsStackOffset - StackOffset for start of stack + /// arguments. + int VarArgsStackOffset; + /// VarArgsNumGPR - Index of the first unused integer + /// register for parameter passing. + unsigned VarArgsNumGPR; + /// VarArgsNumFPR - Index of the first unused double + /// register for parameter passing. + unsigned VarArgsNumFPR; + public: explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), @@ -66,7 +78,11 @@ public: LRStoreRequired(false), MinReservedArea(0), TailCallSPDelta(0), - HasFastCall(false) {} + HasFastCall(false), + VarArgsFrameIndex(0), + VarArgsStackOffset(0), + VarArgsNumGPR(0), + VarArgsNumFPR(0) {} int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } @@ -96,6 +112,18 @@ public: void setHasFastCall() { HasFastCall = true; } bool hasFastCall() const { return HasFastCall;} + + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } + + int getVarArgsStackOffset() const { return VarArgsStackOffset; } + void setVarArgsStackOffset(int Offset) { VarArgsStackOffset = Offset; } + + unsigned getVarArgsNumGPR() const { return VarArgsNumGPR; } + void setVarArgsNumGPR(unsigned Num) { VarArgsNumGPR = Num; } + + unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; } + void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; } }; } // end of namespace llvm diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 52d87cde803f8..5f1e04eecb4a8 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -43,7 +43,6 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include <cstdlib> -using namespace llvm; // FIXME This disables some code that aligns the stack to a boundary // bigger than the default (16 bytes on Darwin) when there is a stack local @@ -56,14 +55,19 @@ using namespace llvm; #define ALIGN_STACK 0 // FIXME (64-bit): Eventually enable by default. +namespace llvm { cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger", - cl::init(false), - cl::desc("Enable PPC32 register scavenger"), - cl::Hidden); + cl::init(false), + cl::desc("Enable PPC32 register scavenger"), + cl::Hidden); cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger", - cl::init(false), - cl::desc("Enable PPC64 register scavenger"), - cl::Hidden); + cl::init(false), + cl::desc("Enable PPC64 register scavenger"), + cl::Hidden); +} + +using namespace llvm; + #define EnableRegisterScavenging \ ((EnablePPC32RS && !Subtarget.isPPC64()) || \ (EnablePPC64RS && Subtarget.isPPC64())) @@ -405,7 +409,10 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { // static bool needsFP(const MachineFunction &MF) { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return NoFramePointerElim || MFI->hasVarSizedObjects() || + // Naked functions have no stack frame pushed, so we don't have a frame pointer. + if (MF.getFunction()->hasFnAttr(Attribute::Naked)) + return false; + return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() || (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall()); } @@ -790,7 +797,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // If we're not using a Frame Pointer that has been set to the value of the // SP before having the stack size subtracted from it, then add the stack size // to Offset to get the correct offset. - Offset += MFI->getStackSize(); + // Naked functions have stack size 0, although getStackSize may not reflect that + // because we didn't call all the pieces that compute it for naked functions. + if (!MF.getFunction()->hasFnAttr(Attribute::Naked)) + Offset += MFI->getStackSize(); // If we can, encode the offset directly into the instruction. If this is a // normal PPC "ri" instruction, any 16-bit value can be safely encoded. If diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td index d589414c01540..9664f14571715 100644 --- a/lib/Target/PowerPC/PPCSchedule.td +++ b/lib/Target/PowerPC/PPCSchedule.td @@ -15,8 +15,6 @@ def SLU : FuncUnit; // Store/load unit def SRU : FuncUnit; // special register unit def IU1 : FuncUnit; // integer unit 1 (simple) def IU2 : FuncUnit; // integer unit 2 (complex) -def IU3 : FuncUnit; // integer unit 3 (7450 simple) -def IU4 : FuncUnit; // integer unit 4 (7450 simple) def FPU1 : FuncUnit; // floating point unit 1 def FPU2 : FuncUnit; // floating point unit 2 def VPU : FuncUnit; // vector permutation unit @@ -24,7 +22,6 @@ def VIU1 : FuncUnit; // vector integer unit 1 (simple) def VIU2 : FuncUnit; // vector integer unit 2 (complex) def VFPU : FuncUnit; // vector floating point unit - //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for PowerPC // diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td index f72194d6de0eb..73447631b2ad8 100644 --- a/lib/Target/PowerPC/PPCScheduleG3.td +++ b/lib/Target/PowerPC/PPCScheduleG3.td @@ -12,7 +12,8 @@ //===----------------------------------------------------------------------===// -def G3Itineraries : ProcessorItineraries<[ +def G3Itineraries : ProcessorItineraries< + [IU1, IU2, FPU1, BPU, SRU, SLU], [ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>, InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>, InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td index 92ed20f17ce5d..7efc693fa8c96 100644 --- a/lib/Target/PowerPC/PPCScheduleG4.td +++ b/lib/Target/PowerPC/PPCScheduleG4.td @@ -11,7 +11,8 @@ // //===----------------------------------------------------------------------===// -def G4Itineraries : ProcessorItineraries<[ +def G4Itineraries : ProcessorItineraries< + [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2]>]>, InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2]>]>, InstrItinData<IntDivW , [InstrStage<19, [IU1]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td index 7474ba494d100..15056c0cfe44d 100644 --- a/lib/Target/PowerPC/PPCScheduleG4Plus.td +++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td @@ -11,7 +11,11 @@ // //===----------------------------------------------------------------------===// -def G4PlusItineraries : ProcessorItineraries<[ +def IU3 : FuncUnit; // integer unit 3 (7450 simple) +def IU4 : FuncUnit; // integer unit 4 (7450 simple) + +def G4PlusItineraries : ProcessorItineraries< + [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [ InstrItinData<IntGeneral , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, InstrItinData<IntCompare , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>, InstrItinData<IntDivW , [InstrStage<23, [IU2]>]>, diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td index d28214715a766..2dffc48b238ff 100644 --- a/lib/Target/PowerPC/PPCScheduleG5.td +++ b/lib/Target/PowerPC/PPCScheduleG5.td @@ -11,7 +11,8 @@ // //===----------------------------------------------------------------------===// -def G5Itineraries : ProcessorItineraries<[ +def G5Itineraries : ProcessorItineraries< + [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [ InstrItinData<IntGeneral , [InstrStage<2, [IU1, IU2]>]>, InstrItinData<IntCompare , [InstrStage<3, [IU1, IU2]>]>, InstrItinData<IntDivD , [InstrStage<68, [IU1]>]>, diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp new file mode 100644 index 0000000000000..c0004a9b4197d --- /dev/null +++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- PPCSelectionDAGInfo.cpp - PowerPC SelectionDAG Info ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PPCSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "powerpc-selectiondag-info" +#include "PPCSelectionDAGInfo.h" +using namespace llvm; + +PPCSelectionDAGInfo::PPCSelectionDAGInfo() { +} + +PPCSelectionDAGInfo::~PPCSelectionDAGInfo() { +} diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h new file mode 100644 index 0000000000000..3ad3418440825 --- /dev/null +++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- PPCSelectionDAGInfo.h - PowerPC SelectionDAG Info -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PowerPC subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef POWERPCCSELECTIONDAGINFO_H +#define POWERPCCSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class PPCSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + PPCSelectionDAGInfo(); + ~PPCSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index ac9ae2b43fd42..35e33a234c430 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -44,8 +44,8 @@ public: virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const PPCFrameInfo *getFrameInfo() const { return &FrameInfo; } virtual PPCJITInfo *getJITInfo() { return &JITInfo; } - virtual PPCTargetLowering *getTargetLowering() const { - return const_cast<PPCTargetLowering*>(&TLInfo); + virtual const PPCTargetLowering *getTargetLowering() const { + return &TLInfo; } virtual const PPCRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 052a5756da9fa..144bf5d3e3dc6 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -263,19 +263,6 @@ if anyone cared enough about sincos. //===---------------------------------------------------------------------===// -Turn this into a single byte store with no load (the other 3 bytes are -unmodified): - -define void @test(i32* %P) { - %tmp = load i32* %P - %tmp14 = or i32 %tmp, 3305111552 - %tmp15 = and i32 %tmp14, 3321888767 - store i32 %tmp15, i32* %P - ret void -} - -//===---------------------------------------------------------------------===// - quantum_sigma_x in 462.libquantum contains the following loop: for(i=0; i<reg->size; i++) @@ -1843,3 +1830,21 @@ entry: //===---------------------------------------------------------------------===// +We should use DSE + llvm.lifetime.end to delete dead vtable pointer updates. +See GCC PR34949 + +//===---------------------------------------------------------------------===// + +In this code: + +long foo(long x) { + return x > 1 ? x : 1; +} + +LLVM emits a comparison with 1 instead of 0. 0 would be equivalent +and cheaper on most targets. + +LLVM prefers comparisons with zero over non-zero in general, but in this +case it choses instead to keep the max operation obvious. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/AsmPrinter/CMakeLists.txt b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt index e3ca18e3b1b16..da629f6e63fb4 100644 --- a/lib/Target/Sparc/AsmPrinter/CMakeLists.txt +++ b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt @@ -3,4 +3,4 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/ add_llvm_library(LLVMSparcAsmPrinter SparcAsmPrinter.cpp ) -add_dependencies(LLVMSparcAsmPrinter SparcCodeGenTable_gen)
\ No newline at end of file +add_dependencies(LLVMSparcAsmPrinter SparcCodeGenTable_gen) diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index 74f320a00035f..684cadfb57f78 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_target(SparcCodeGen SparcRegisterInfo.cpp SparcSubtarget.cpp SparcTargetMachine.cpp + SparcSelectionDAGInfo.cpp ) target_link_libraries (LLVMSparcCodeGen LLVMSelectionDAG) diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index a7d1805e2a36b..698923e3c9e08 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#include "SparcISelLowering.h" #include "SparcTargetMachine.h" #include "llvm/Intrinsics.h" #include "llvm/CodeGen/SelectionDAGISel.h" @@ -68,7 +67,6 @@ private: } // end anonymous namespace SDNode* SparcDAGToDAGISel::getGlobalBaseReg() { - MachineFunction *MF = BB->getParent(); unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF); return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); } diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 4e93ef05a1cc0..f47e53acbfc1e 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -14,6 +14,7 @@ #include "SparcISelLowering.h" #include "SparcTargetMachine.h" +#include "SparcMachineFunctionInfo.h" #include "llvm/Function.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -37,7 +38,7 @@ SDValue SparcTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to locations. SmallVector<CCValAssign, 16> RVLocs; @@ -85,10 +86,12 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); + SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>(); // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; @@ -226,7 +229,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, // Store remaining ArgRegs to the stack if this is a varargs function. if (isVarArg) { // Remember the vararg offset for the va_start implementation. - VarArgsFrameOffset = ArgOffset; + FuncInfo->setVarArgsFrameOffset(ArgOffset); std::vector<SDValue> OutChains; @@ -261,7 +264,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Sparc target does not yet support tail call optimization. isTailCall = false; @@ -752,8 +755,8 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, } SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, - SelectionDAG &DAG) { - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + SelectionDAG &DAG) const { + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); // FIXME there isn't really any debug info here DebugLoc dl = Op.getDebugLoc(); SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); @@ -773,11 +776,11 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, } SDValue SparcTargetLowering::LowerConstantPool(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); // FIXME there isn't really any debug info here DebugLoc dl = Op.getDebugLoc(); - Constant *C = N->getConstVal(); + const Constant *C = N->getConstVal(); SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment()); SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP); SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP); @@ -873,14 +876,18 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, - SparcTargetLowering &TLI) { + const SparcTargetLowering &TLI) { + MachineFunction &MF = DAG.getMachineFunction(); + SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>(); + // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. DebugLoc dl = Op.getDebugLoc(); - SDValue Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, - DAG.getRegister(SP::I6, MVT::i32), - DAG.getConstant(TLI.getVarArgsFrameOffset(), - MVT::i32)); + SDValue Offset = + DAG.getNode(ISD::ADD, dl, MVT::i32, + DAG.getRegister(SP::I6, MVT::i32), + DAG.getConstant(FuncInfo->getVarArgsFrameOffset(), + MVT::i32)); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1), SV, 0, false, false, 0); @@ -939,7 +946,7 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { SDValue SparcTargetLowering:: -LowerOperation(SDValue Op, SelectionDAG &DAG) { +LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); // Frame & Return address. Currently unimplemented @@ -961,8 +968,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) { MachineBasicBlock * SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); unsigned BROpcode; unsigned CC; @@ -1006,12 +1012,9 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, sinkMBB); // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - // Also inform sdisel of the edge changes. for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) { - EM->insert(std::make_pair(*I, sinkMBB)); + E = BB->succ_end(); I != E; ++I) sinkMBB->addSuccessor(*I); - } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. while (!BB->succ_empty()) diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 2ee73c1ac9095..5ebdcacba57d4 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -41,12 +41,9 @@ namespace llvm { } class SparcTargetLowering : public TargetLowering { - int VarArgsFrameOffset; // Frame offset to start of varargs area. public: SparcTargetLowering(TargetMachine &TM); - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); - - int getVarArgsFrameOffset() const { return VarArgsFrameOffset; } + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// computeMaskedBitsForTargetNode - Determine which of the bits specified /// in Mask are known to be either zero or one and return them in the @@ -58,9 +55,9 @@ namespace llvm { const SelectionDAG &DAG, unsigned Depth = 0) const; - virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; virtual const char *getTargetNodeName(unsigned Opcode) const; @@ -82,7 +79,7 @@ namespace llvm { bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, @@ -91,16 +88,16 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; }; } // end namespace llvm diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h index 56d8708dbe7dc..e34c1312810c2 100644 --- a/lib/Target/Sparc/SparcMachineFunctionInfo.h +++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h @@ -20,12 +20,20 @@ namespace llvm { class SparcMachineFunctionInfo : public MachineFunctionInfo { private: unsigned GlobalBaseReg; + + /// VarArgsFrameOffset - Frame offset to start of varargs area. + int VarArgsFrameOffset; + public: - SparcMachineFunctionInfo() : GlobalBaseReg(0) {} - explicit SparcMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0) {} + SparcMachineFunctionInfo() : GlobalBaseReg(0), VarArgsFrameOffset(0) {} + explicit SparcMachineFunctionInfo(MachineFunction &MF) + : GlobalBaseReg(0), VarArgsFrameOffset(0) {} unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } + + int getVarArgsFrameOffset() const { return VarArgsFrameOffset; } + void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; } }; } diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp new file mode 100644 index 0000000000000..4825aa9286367 --- /dev/null +++ b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- SparcSelectionDAGInfo.cpp - Sparc SelectionDAG Info ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SparcSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sparc-selectiondag-info" +#include "SparcSelectionDAGInfo.h" +using namespace llvm; + +SparcSelectionDAGInfo::SparcSelectionDAGInfo() { +} + +SparcSelectionDAGInfo::~SparcSelectionDAGInfo() { +} diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h new file mode 100644 index 0000000000000..bc1b561f6e86c --- /dev/null +++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- SparcSelectionDAGInfo.h - Sparc SelectionDAG Info -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Sparc subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef SPARCSELECTIONDAGINFO_H +#define SPARCSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class SparcSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + SparcSelectionDAGInfo(); + ~SparcSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index 5834d08457d7c..1367a31129640 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -39,8 +39,8 @@ public: virtual const SparcRegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); } - virtual SparcTargetLowering* getTargetLowering() const { - return const_cast<SparcTargetLowering*>(&TLInfo); + virtual const SparcTargetLowering* getTargetLowering() const { + return &TLInfo; } virtual const TargetData *getTargetData() const { return &DataLayout; } diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt index 81e51d89ad9f5..880e56f0525bb 100644 --- a/lib/Target/SystemZ/CMakeLists.txt +++ b/lib/Target/SystemZ/CMakeLists.txt @@ -18,6 +18,7 @@ add_llvm_target(SystemZCodeGen SystemZRegisterInfo.cpp SystemZSubtarget.cpp SystemZTargetMachine.cpp + SystemZSelectionDAGInfo.cpp ) target_link_libraries (LLVMSystemZCodeGen LLVMSelectionDAG) diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 8152e1dbe1a1c..75d563b9c4d5a 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "SystemZ.h" -#include "SystemZISelLowering.h" #include "SystemZTargetMachine.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -85,7 +84,7 @@ namespace { /// namespace { class SystemZDAGToDAGISel : public SelectionDAGISel { - SystemZTargetLowering &Lowering; + const SystemZTargetLowering &Lowering; const SystemZSubtarget &Subtarget; void getAddressOperandsRI(const SystemZRRIAddressMode &AM, @@ -588,7 +587,7 @@ bool SystemZDAGToDAGISel::SelectLAAddr(SDNode *Op, SDValue Addr, bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Disp, SDValue &Index) { if (ISD::isNON_EXTLoad(N.getNode()) && - IsLegalToFold(N, P, P)) + IsLegalToFold(N, P, P, OptLevel)) return SelectAddrRRI20(P, N.getOperand(1), Base, Disp, Index); return false; } diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 6f4b30faaf60c..e98f18b9a31e8 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -158,7 +158,8 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) : setTruncStoreAction(MVT::f64, MVT::f32, Expand); } -SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue SystemZTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { switch (Op.getOpcode()) { case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); @@ -236,7 +237,8 @@ SystemZTargetLowering::LowerFormalArguments(SDValue Chain, &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { switch (CallConv) { default: @@ -254,7 +256,7 @@ SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // SystemZ target does not yet support tail call optimization. isTailCall = false; @@ -280,7 +282,8 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain, &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -293,7 +296,7 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain, CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); if (isVarArg) - llvm_report_error("Varargs not supported yet"); + report_fatal_error("Varargs not supported yet"); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { SDValue ArgValue; @@ -371,7 +374,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -505,7 +508,7 @@ SystemZTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; @@ -547,7 +550,7 @@ SDValue SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location SmallVector<CCValAssign, 16> RVLocs; @@ -600,7 +603,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, SDValue SystemZTargetLowering::EmitCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &SystemZCC, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { // FIXME: Emit a test if RHS is zero bool isUnsigned = false; @@ -678,7 +681,7 @@ SDValue SystemZTargetLowering::EmitCmp(SDValue LHS, SDValue RHS, } -SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { +SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); @@ -692,7 +695,8 @@ SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { Chain, Dest, SystemZCC, Flag); } -SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { +SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op, + SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue TrueV = Op.getOperand(2); @@ -714,9 +718,9 @@ SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { } SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_; @@ -753,7 +757,7 @@ SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op, // FIXME: PIC here SDValue SystemZTargetLowering::LowerJumpTable(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); @@ -765,7 +769,7 @@ SDValue SystemZTargetLowering::LowerJumpTable(SDValue Op, // FIXME: PIC here // FIXME: This is just dirty hack. We need to lower cpool properly SDValue SystemZTargetLowering::LowerConstantPool(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); @@ -795,8 +799,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { MachineBasicBlock* SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const SystemZInstrInfo &TII = *TM.getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); assert((MI->getOpcode() == SystemZ::Select32 || @@ -827,10 +830,6 @@ SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB); F->insert(I, copy0MBB); F->insert(I, copy1MBB); - // Inform sdisel of the edge changes. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) - EM->insert(std::make_pair(*SI, copy1MBB)); // Update machine-CFG edges by transferring all successors of the current // block to the new block which will contain the Phi node for the select. copy1MBB->transferSuccessors(BB); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 36ff994d03198..94bd906175176 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -58,7 +58,7 @@ namespace llvm { explicit SystemZTargetLowering(SystemZTargetMachine &TM); /// LowerOperation - Provide custom lowering hooks for some operations. - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// getTargetNodeName - This method returns the name of a target specific /// DAG node. @@ -74,20 +74,19 @@ namespace llvm { TargetLowering::ConstraintType getConstraintType(const std::string &Constraint) const; - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue EmitCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &SystemZCC, - SelectionDAG &DAG); + SelectionDAG &DAG) const; MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + MachineBasicBlock *BB) const; /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will @@ -101,7 +100,7 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCCCArguments(SDValue Chain, CallingConv::ID CallConv, @@ -109,33 +108,33 @@ namespace llvm { const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; const SystemZSubtarget &Subtarget; const SystemZTargetMachine &TM; diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h index b69d2f6ce9ff8..fa87061a7df3c 100644 --- a/lib/Target/SystemZ/SystemZInstrBuilder.h +++ b/lib/Target/SystemZ/SystemZInstrBuilder.h @@ -44,7 +44,7 @@ struct SystemZAddressMode { unsigned IndexReg; int32_t Disp; - GlobalValue *GV; + const GlobalValue *GV; SystemZAddressMode() : BaseType(RegBase), IndexReg(0), Disp(0) { Base.Reg = 0; diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp index 1a0920609b155..f9ccc47b0b944 100644 --- a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp +++ b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "SystemZMCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" using namespace llvm; @@ -21,7 +22,8 @@ SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, const StringRef &TT) { PCSymbol = "."; } -MCSection *SystemZMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const{ - return MCSectionELF::Create(".note.GNU-stack", MCSectionELF::SHT_PROGBITS, - 0, SectionKind::getMetadata(), false, Ctx); +const MCSection *SystemZMCAsmInfo:: +getNonexecutableStackSection(MCContext &Ctx) const{ + return Ctx.getELFSection(".note.GNU-stack", MCSectionELF::SHT_PROGBITS, + 0, SectionKind::getMetadata(), false); } diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.h b/lib/Target/SystemZ/SystemZMCAsmInfo.h index 00cb99b34c053..87908f21f722a 100644 --- a/lib/Target/SystemZ/SystemZMCAsmInfo.h +++ b/lib/Target/SystemZ/SystemZMCAsmInfo.h @@ -22,7 +22,7 @@ namespace llvm { struct SystemZMCAsmInfo : public MCAsmInfo { explicit SystemZMCAsmInfo(const Target &T, const StringRef &TT); - virtual MCSection *getNonexecutableStackSection(MCContext &Ctx) const; + virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const; }; } // namespace llvm diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 302c418d1ec96..638fd17c9953e 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -77,7 +77,7 @@ BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const /// allocas or if frame pointer elimination is disabled. bool SystemZRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - return NoFramePointerElim || MFI->hasVarSizedObjects(); + return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects(); } void SystemZRegisterInfo:: @@ -200,7 +200,7 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D) - .addReg(SystemZ::R15D).addImm((isSub ? -(int64_t)ThisVal : ThisVal)); + .addReg(SystemZ::R15D).addImm(isSub ? -ThisVal : ThisVal); // The PSW implicit def is dead. MI->getOperand(3).setIsDead(); Offset -= ThisVal; diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp new file mode 100644 index 0000000000000..87c831b494790 --- /dev/null +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SystemZSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "systemz-selectiondag-info" +#include "SystemZSelectionDAGInfo.h" +using namespace llvm; + +SystemZSelectionDAGInfo::SystemZSelectionDAGInfo() { +} + +SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() { +} diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h new file mode 100644 index 0000000000000..5292de91dc0a6 --- /dev/null +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the SystemZ subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef SYSTEMZSELECTIONDAGINFO_H +#define SYSTEMZSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + SystemZSelectionDAGInfo(); + ~SystemZSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index 551aeb5a3e47b..d3357cc765741 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -49,8 +49,8 @@ public: return &InstrInfo.getRegisterInfo(); } - virtual SystemZTargetLowering *getTargetLowering() const { - return const_cast<SystemZTargetLowering*>(&TLInfo); + virtual const SystemZTargetLowering *getTargetLowering() const { + return &TLInfo; } virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index 643b3977461b3..5870d8a87004d 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -228,7 +228,7 @@ void TargetData::init(StringRef Desc) { /// @note This has to exist, because this is a pass, but it should never be /// used. TargetData::TargetData() : ImmutablePass(&ID) { - llvm_report_error("Bad TargetData ctor used. " + report_fatal_error("Bad TargetData ctor used. " "Tool did not specify a TargetData to use?"); } @@ -269,18 +269,8 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign; // The best match so far depends on what we're looking for. - if (AlignType == VECTOR_ALIGN && Alignments[i].AlignType == VECTOR_ALIGN) { - // If this is a specification for a smaller vector type, we will fall back - // to it. This happens because <128 x double> can be implemented in terms - // of 64 <2 x double>. - if (Alignments[i].TypeBitWidth < BitWidth) { - // Verify that we pick the biggest of the fallbacks. - if (BestMatchIdx == -1 || - Alignments[BestMatchIdx].TypeBitWidth < Alignments[i].TypeBitWidth) - BestMatchIdx = i; - } - } else if (AlignType == INTEGER_ALIGN && - Alignments[i].AlignType == INTEGER_ALIGN) { + if (AlignType == INTEGER_ALIGN && + Alignments[i].AlignType == INTEGER_ALIGN) { // The "best match" for integers is the smallest size that is larger than // the BitWidth requested. if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 || @@ -303,10 +293,15 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, } else { assert(AlignType == VECTOR_ALIGN && "Unknown alignment type!"); - // If we didn't find a vector size that is smaller or equal to this type, - // then we will end up scalarizing this to its element type. Just return - // the alignment of the element. - return getAlignment(cast<VectorType>(Ty)->getElementType(), ABIInfo); + // By default, use natural alignment for vector types. This is consistent + // with what clang and llvm-gcc do. + unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType()); + Align *= cast<VectorType>(Ty)->getNumElements(); + // If the alignment is not a power of 2, round up to the next power of 2. + // This happens for non-power-of-2 length vectors. + if (Align & (Align-1)) + Align = llvm::NextPowerOf2(Align); + return Align; } } @@ -630,8 +625,8 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices, Ty = cast<SequentialType>(Ty)->getElementType(); // Get the array index and the size of each array element. - int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue(); - Result += arrayIdx * (int64_t)getTypeAllocSize(Ty); + if (int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue()) + Result += arrayIdx * (int64_t)getTypeAllocSize(Ty); } } diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 44722b39e3114..b9372d04bb176 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -310,7 +310,7 @@ getExprForDwarfReference(const MCSymbol *Sym, Mangler *Mang, switch (Encoding & 0xF0) { default: - llvm_report_error("We do not support this DWARF encoding yet!"); + report_fatal_error("We do not support this DWARF encoding yet!"); case dwarf::DW_EH_PE_absptr: // Do nothing special return Res; diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 88871e3580ccc..ac67c91f1706b 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -11,6 +11,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -25,6 +27,7 @@ namespace llvm { bool LessPreciseFPMADOption; bool PrintMachineCode; bool NoFramePointerElim; + bool NoFramePointerElimNonLeaf; bool NoExcessFPPrecision; bool UnsafeFPMath; bool FiniteOnlyFPMathOption; @@ -33,8 +36,7 @@ namespace llvm { FloatABI::ABIType FloatABIType; bool NoImplicitFloat; bool NoZerosInBSS; - bool DwarfExceptionHandling; - bool SjLjExceptionHandling; + bool JITExceptionHandling; bool JITEmitDebugInfo; bool JITEmitDebugInfoToDisk; bool UnwindTablesMandatory; @@ -58,6 +60,11 @@ DisableFPElim("disable-fp-elim", cl::location(NoFramePointerElim), cl::init(false)); static cl::opt<bool, true> +DisableFPElimNonLeaf("disable-non-leaf-fp-elim", + cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"), + cl::location(NoFramePointerElimNonLeaf), + cl::init(false)); +static cl::opt<bool, true> DisableExcessPrecision("disable-excess-fp-precision", cl::desc("Disable optimizations that may increase FP precision"), cl::location(NoExcessFPPrecision), @@ -107,14 +114,9 @@ DontPlaceZerosInBSS("nozero-initialized-in-bss", cl::location(NoZerosInBSS), cl::init(false)); static cl::opt<bool, true> -EnableDwarfExceptionHandling("enable-eh", - cl::desc("Emit DWARF exception handling (default if target supports)"), - cl::location(DwarfExceptionHandling), - cl::init(false)); -static cl::opt<bool, true> -EnableSjLjExceptionHandling("enable-sjlj-eh", - cl::desc("Emit SJLJ exception handling (default if target supports)"), - cl::location(SjLjExceptionHandling), +EnableJITExceptionHandling("jit-enable-eh", + cl::desc("Emit exception handling information"), + cl::location(JITExceptionHandling), cl::init(false)); // In debug builds, make this default to true. #ifdef NDEBUG @@ -197,7 +199,14 @@ EnableStrongPHIElim(cl::Hidden, "strong-phi-elim", cl::desc("Use strong PHI elimination."), cl::location(StrongPHIElim), cl::init(false)); - +static cl::opt<bool> +DataSections("fdata-sections", + cl::desc("Emit data into separate sections"), + cl::init(false)); +static cl::opt<bool> +FunctionSections("ffunction-sections", + cl::desc("Emit functions into separate sections"), + cl::init(false)); //--------------------------------------------------------------------------- // TargetMachine Class // @@ -244,7 +253,35 @@ void TargetMachine::setAsmVerbosityDefault(bool V) { AsmVerbosityDefault = V; } +bool TargetMachine::getFunctionSections() { + return FunctionSections; +} + +bool TargetMachine::getDataSections() { + return DataSections; +} + +void TargetMachine::setFunctionSections(bool V) { + FunctionSections = V; +} + +void TargetMachine::setDataSections(bool V) { + DataSections = V; +} + namespace llvm { + /// DisableFramePointerElim - This returns true if frame pointer elimination + /// optimization should be disabled for the given machine function. + bool DisableFramePointerElim(const MachineFunction &MF) { + if (NoFramePointerElim) + return true; + if (NoFramePointerElimNonLeaf) { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + return MFI->hasCalls(); + } + return false; + } + /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option /// is specified on the command line. When this flag is off(default), the /// code generator is not allowed to generate mad (multiply add) if the diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 47873d14a9114..da013505dabd7 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -44,7 +44,7 @@ private: bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); X86Operand *ParseOperand(); - X86Operand *ParseMemOperand(); + X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); bool ParseDirectiveWord(unsigned Size, SMLoc L); @@ -368,14 +368,22 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, X86Operand *X86ATTAsmParser::ParseOperand() { switch (getLexer().getKind()) { default: - return ParseMemOperand(); + // Parse a memory operand with no segment register. + return ParseMemOperand(0, Parser.getTok().getLoc()); case AsmToken::Percent: { - // FIXME: if a segment register, this could either be just the seg reg, or - // the start of a memory operand. + // Read the register. unsigned RegNo; SMLoc Start, End; if (ParseRegister(RegNo, Start, End)) return 0; - return X86Operand::CreateReg(RegNo, Start, End); + + // If this is a segment register followed by a ':', then this is the start + // of a memory reference, otherwise this is a normal register reference. + if (getLexer().isNot(AsmToken::Colon)) + return X86Operand::CreateReg(RegNo, Start, End); + + + getParser().Lex(); // Eat the colon. + return ParseMemOperand(RegNo, Start); } case AsmToken::Dollar: { // $42 -> immediate. @@ -389,13 +397,10 @@ X86Operand *X86ATTAsmParser::ParseOperand() { } } -/// ParseMemOperand: segment: disp(basereg, indexreg, scale) -X86Operand *X86ATTAsmParser::ParseMemOperand() { - SMLoc MemStart = Parser.getTok().getLoc(); - - // FIXME: If SegReg ':' (e.g. %gs:), eat and remember. - unsigned SegReg = 0; - +/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix +/// has already been parsed if present. +X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { + // We have to disambiguate a parenthesized expression "(4+5)" from the start // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The // only way to do this without lookahead is to eat the '(' and see what is diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index f5923969361db..8b0ed1ce34ced 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -474,9 +474,6 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) { void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { if (Subtarget->isTargetDarwin()) { // All darwin targets use mach-o. - TargetLoweringObjectFileMachO &TLOFMacho = - static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering()); - MachineModuleInfoMachO &MMIMacho = MMI->getObjFileInfo<MachineModuleInfoMachO>(); @@ -486,11 +483,11 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { Stubs = MMIMacho.GetFnStubList(); if (!Stubs.empty()) { const MCSection *TheSection = - TLOFMacho.getMachOSection("__IMPORT", "__jump_table", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 5, SectionKind::getMetadata()); + OutContext.getMachOSection("__IMPORT", "__jump_table", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + 5, SectionKind::getMetadata()); OutStreamer.SwitchSection(TheSection); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { @@ -512,9 +509,9 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { Stubs = MMIMacho.GetGVStubList(); if (!Stubs.empty()) { const MCSection *TheSection = - TLOFMacho.getMachOSection("__IMPORT", "__pointers", - MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, - SectionKind::getMetadata()); + OutContext.getMachOSection("__IMPORT", "__pointers", + MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, + SectionKind::getMetadata()); OutStreamer.SwitchSection(TheSection); for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { @@ -587,8 +584,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { // Necessary for dllexport support std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals; - TargetLoweringObjectFileCOFF &TLOFCOFF = - static_cast<TargetLoweringObjectFileCOFF&>(getObjFileLowering()); + const TargetLoweringObjectFileCOFF &TLOFCOFF = + static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering()); for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) if (I->hasDLLExportLinkage()) @@ -617,8 +614,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } if (Subtarget->isTargetELF()) { - TargetLoweringObjectFileELF &TLOFELF = - static_cast<TargetLoweringObjectFileELF &>(getObjFileLowering()); + const TargetLoweringObjectFileELF &TLOFELF = + static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering()); MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h index ee59289dc5727..95984b22cdc30 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h @@ -80,6 +80,8 @@ class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter { bool runOnMachineFunction(MachineFunction &F); void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); + + MachineLocation getDebugValueLocation(const MachineInstr *MI) const; }; } // end namespace llvm diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index a290eb0f2a5fc..effc8ed38116e 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -169,6 +169,15 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx), Ctx); + if (MO.isJTI() && AsmPrinter.MAI->hasSetDirective()) { + // If .set directive is supported, use it to reduce the number of + // relocations the assembler will generate for differences between + // local labels. This is only safe when the symbols are in the same + // section so we are restricting it to jumptable references. + MCSymbol *Label = Ctx.CreateTempSymbol(); + AsmPrinter.OutStreamer.EmitAssignment(Label, Expr); + Expr = MCSymbolRefExpr::Create(Label, Ctx); + } break; } @@ -328,61 +337,36 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, raw_ostream &O) { - // FIXME: if this is implemented for another target before it goes - // away completely, the common part should be moved into AsmPrinter. - O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // Only the target-dependent form of DBG_VALUE should get here. + // Referencing the offset and metadata as NOps-2 and NOps-1 is + // probably portable to other targets; frame pointer location is not. unsigned NOps = MI->getNumOperands(); + assert(NOps==7); + O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; // cast away const; DIetc do not take const operands for some reason. - DIVariable V((MDNode*)(MI->getOperand(NOps-1).getMetadata())); + DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata())); + if (V.getContext().isSubprogram()) + O << DISubprogram(V.getContext().getNode()).getDisplayName() << ":"; O << V.getName(); O << " <- "; - if (NOps==3) { - // Register or immediate value. Register 0 means undef. - assert(MI->getOperand(0).isReg() || - MI->getOperand(0).isImm() || - MI->getOperand(0).isFPImm()); - if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == 0) { - // Suppress offset in this case, it is not meaningful. - O << "undef"; - OutStreamer.AddBlankLine(); - return; - } - - if (MI->getOperand(0).isFPImm()) { - // This is more naturally done in printOperand, but since the only use - // of such an operand is in this comment and that is temporary (and it's - // ugly), we prefer to keep this localized. - // The include of Type.h may be removable when this code is. - if (MI->getOperand(0).getFPImm()->getType()->isFloatTy() || - MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) - MI->getOperand(0).print(O, &TM); - else { - // There is no good way to print long double. Convert a copy to - // double. Ah well, it's only a comment. - bool ignored; - APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); - APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, - &ignored); - O << "(long double) " << APF.convertToDouble(); - } - } else - printOperand(MI, 0, O); - } else { - if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == 0) { - // Suppress offset in this case, it is not meaningful. - O << "undef"; - OutStreamer.AddBlankLine(); - return; - } - // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm()); - O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 3, O); - O << ']'; - } + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm()); + O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 3, O); + O << ']'; O << "+"; printOperand(MI, NOps-2, O); } +MachineLocation +X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { + MachineLocation Location; + assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!"); + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm()); + Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm()); + return Location; +} + void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(OutContext, Mang, *this); @@ -395,7 +379,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitRawText(StringRef(OS.str())); } return; - + case X86::MOVPC32r: { MCInst TmpInst; // This is a pseudo op for a two instruction sequence with a label, which diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 22285f1932347..da6bb9140d05b 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -13,6 +13,7 @@ tablegen(X86GenDAGISel.inc -gen-dag-isel) tablegen(X86GenFastISel.inc -gen-fast-isel) tablegen(X86GenCallingConv.inc -gen-callingconv) tablegen(X86GenSubtarget.inc -gen-subtarget) +tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info) set(sources SSEDomainFix.cpp @@ -33,6 +34,7 @@ set(sources X86TargetMachine.cpp X86TargetObjectFile.cpp X86FastISel.cpp + X86SelectionDAGInfo.cpp ) if( CMAKE_CL_64 ) diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt index 2a83a9c26858c..9f91060179e6d 100644 --- a/lib/Target/X86/Disassembler/CMakeLists.txt +++ b/lib/Target/X86/Disassembler/CMakeLists.txt @@ -4,4 +4,11 @@ add_llvm_library(LLVMX86Disassembler X86Disassembler.cpp X86DisassemblerDecoder.c ) +# workaround for hanging compilation on MSVC9 +if( MSVC_VERSION EQUAL 1500 ) +set_property( + SOURCE X86Disassembler.cpp + PROPERTY COMPILE_FLAGS "/Od" + ) +endif() add_dependencies(LLVMX86Disassembler X86CodeGenTable_gen) diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 7328dc0ba8d76..62e7357b8f34b 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -17,6 +17,7 @@ #include "X86Disassembler.h" #include "X86DisassemblerDecoder.h" +#include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCInst.h" @@ -26,6 +27,7 @@ #include "llvm/Support/raw_ostream.h" #include "X86GenRegisterNames.inc" +#include "X86GenEDInfo.inc" using namespace llvm; using namespace llvm::X86Disassembler; @@ -69,6 +71,10 @@ X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) : X86GenericDisassembler::~X86GenericDisassembler() { } +EDInstInfo *X86GenericDisassembler::getEDInfo() const { + return instInfoX86; +} + /// regionReader - a callback function that wraps the readByte method from /// MemoryObject. /// diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h index 0e6e0b0e519f4..9c542628d7093 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.h +++ b/lib/Target/X86/Disassembler/X86Disassembler.h @@ -94,6 +94,8 @@ namespace llvm { class MCInst; class MemoryObject; class raw_ostream; + +struct EDInstInfo; namespace X86Disassembler { @@ -115,6 +117,9 @@ public: const MemoryObject ®ion, uint64_t address, raw_ostream &vStream) const; + + /// getEDInfo - See MCDisassembler. + EDInstInfo *getEDInfo() const; private: DisassemblerMode fMode; }; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index db694bc2f3c58..64f6b2dc7e8b1 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -1277,6 +1277,9 @@ static int readOperands(struct InternalInstruction* insn) { case ENCODING_IB: if (readImmediate(insn, 1)) return -1; + if (insn->spec->operands[index].type == TYPE_IMM3 && + insn->immediates[insn->numImmediatesConsumed - 1] > 7) + return -1; break; case ENCODING_IW: if (readImmediate(insn, 2)) @@ -1293,6 +1296,7 @@ static int readOperands(struct InternalInstruction* insn) { case ENCODING_Iv: if (readImmediate(insn, insn->immediateSize)) return -1; + break; case ENCODING_Ia: if (readImmediate(insn, insn->addressSize)) return -1; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index c213f89ebc812..4a7cd57f2e23c 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -236,6 +236,7 @@ struct ContextDecision { ENUM_ENTRY(TYPE_IMM16, "2-byte") \ ENUM_ENTRY(TYPE_IMM32, "4-byte") \ ENUM_ENTRY(TYPE_IMM64, "8-byte") \ + ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \ ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \ ENUM_ENTRY(TYPE_RM16, "2-byte") \ ENUM_ENTRY(TYPE_RM32, "4-byte") \ diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp index 4b546768b4f15..5e808450d1cd5 100644 --- a/lib/Target/X86/SSEDomainFix.cpp +++ b/lib/Target/X86/SSEDomainFix.cpp @@ -159,7 +159,7 @@ int SSEDomainFixPass::RegIndex(unsigned reg) { // We just need them to be consecutive, ordering doesn't matter. assert(X86::XMM9 == X86::XMM0+NumRegs-1 && "Unexpected sort"); reg -= X86::XMM0; - return reg < NumRegs ? reg : -1; + return reg < NumRegs ? (int) reg : -1; } DomainValue *SSEDomainFixPass::Alloc(int domain) { @@ -216,8 +216,15 @@ void SSEDomainFixPass::Force(int rx, unsigned domain) { if (LiveRegs && (dv = LiveRegs[rx])) { if (dv->isCollapsed()) dv->addDomain(domain); - else + else if (dv->hasDomain(domain)) Collapse(dv, domain); + else { + // This is an incompatible open DomainValue. Collapse it to whatever and force + // the new value into domain. This costs a domain crossing. + Collapse(dv, dv->getFirstDomain()); + assert(LiveRegs[rx] && "Not live after collapse?"); + LiveRegs[rx]->addDomain(domain); + } } else { // Set up basic collapsed DomainValue. SetLiveReg(rx, Alloc(domain)); @@ -263,7 +270,7 @@ bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) { void SSEDomainFixPass::enterBasicBlock() { // Try to coalesce live-out registers from predecessors. - for (MachineBasicBlock::const_livein_iterator i = MBB->livein_begin(), + for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), e = MBB->livein_end(); i != e; ++i) { int rx = RegIndex(*i); if (rx < 0) continue; @@ -281,8 +288,9 @@ void SSEDomainFixPass::enterBasicBlock() { // We have a live DomainValue from more than one predecessor. if (LiveRegs[rx]->isCollapsed()) { // We are already collapsed, but predecessor is not. Force him. - if (!pdv->isCollapsed()) - Collapse(pdv, LiveRegs[rx]->getFirstDomain()); + unsigned domain = LiveRegs[rx]->getFirstDomain(); + if (!pdv->isCollapsed() && pdv->hasDomain(domain)) + Collapse(pdv, domain); continue; } @@ -290,7 +298,7 @@ void SSEDomainFixPass::enterBasicBlock() { if (!pdv->isCollapsed()) Merge(LiveRegs[rx], pdv); else - Collapse(LiveRegs[rx], pdv->getFirstDomain()); + Force(rx, pdv->getFirstDomain()); } } } diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 9be38a4b56a98..22e89a57f63d0 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -69,6 +69,12 @@ TargetAsmBackend *createX86_64AsmBackend(const Target &, const std::string &); /// FunctionPass *createEmitX86CodeToMemory(); +/// createX86MaxStackAlignmentHeuristicPass - This function returns a pass +/// which determines whether the frame pointer register should be +/// reserved in case dynamic stack alignment is later required. +/// +FunctionPass *createX86MaxStackAlignmentHeuristicPass(); + extern Target TheX86_32Target, TheX86_64Target; } // End llvm namespace diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index 8e2928c3b7134..ba9c1d0588e3d 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -111,7 +111,7 @@ void X86AsmBackend::RelaxInstruction(const MCInstFragment *IF, SmallString<256> Tmp; raw_svector_ostream OS(Tmp); IF->getInst().dump_pretty(OS); - llvm_report_error("unexpected instruction to relax: " + OS.str()); + report_fatal_error("unexpected instruction to relax: " + OS.str()); } Res = IF->getInst(); diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 6638e110f4f4e..8f026049cb848 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -79,7 +79,7 @@ namespace { private: void emitPCRelativeBlockAddress(MachineBasicBlock *MBB); - void emitGlobalAddress(GlobalValue *GV, unsigned Reloc, + void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, intptr_t Disp = 0, intptr_t PCAdj = 0, bool Indirect = false); void emitExternalSymbolAddress(const char *ES, unsigned Reloc); @@ -163,7 +163,8 @@ void Emitter<CodeEmitter>::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) { /// this is part of a "take the address of a global" instruction. /// template<class CodeEmitter> -void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, +void Emitter<CodeEmitter>::emitGlobalAddress(const GlobalValue *GV, + unsigned Reloc, intptr_t Disp /* = 0 */, intptr_t PCAdj /* = 0 */, bool Indirect /* = false */) { @@ -174,9 +175,10 @@ void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, RelocCST = PCAdj; MachineRelocation MR = Indirect ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, - GV, RelocCST, false) + const_cast<GlobalValue *>(GV), + RelocCST, false) : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - GV, RelocCST, false); + const_cast<GlobalValue *>(GV), RelocCST, false); MCE.addRelocation(MR); // The relocated value will be added to the displacement if (Reloc == X86::reloc_absolute_dword) @@ -378,6 +380,16 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, const MachineOperand &IndexReg = MI.getOperand(Op+2); unsigned BaseReg = Base.getReg(); + + // Handle %rip relative addressing. + if (BaseReg == X86::RIP || + (Is64BitMode && DispForReloc)) { // [disp32+RIP] in X86-64 mode + assert(IndexReg.getReg() == 0 && Is64BitMode && + "Invalid rip-relative address"); + MCE.emitByte(ModRMByte(0, RegOpcodeField, 5)); + emitDisplacementField(DispForReloc, DispVal, PCAdj, true); + return; + } // Indicate that the displacement will use an pcrel or absolute reference // by default. MCEs able to resolve addresses on-the-fly use pcrel by default @@ -445,7 +457,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI, // Emit the normal disp32 encoding. MCE.emitByte(ModRMByte(2, RegOpcodeField, 4)); ForceDisp32 = true; - } else if (DispVal == 0 && getX86RegNum(BaseReg) != N86::EBP) { + } else if (DispVal == 0 && BaseRegNo != N86::EBP) { // Emit no displacement ModR/M byte MCE.emitByte(ModRMByte(0, RegOpcodeField, 4)); } else if (isDisp8(DispVal)) { @@ -600,7 +612,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. if (MI.getOperand(0).getSymbolName()[0]) - llvm_report_error("JIT does not support inline asm!"); + report_fatal_error("JIT does not support inline asm!"); break; case TargetOpcode::DBG_LABEL: case TargetOpcode::GC_LABEL: diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp index 1597d2b31d222..f84995dcf3424 100644 --- a/lib/Target/X86/X86ELFWriterInfo.cpp +++ b/lib/Target/X86/X86ELFWriterInfo.cpp @@ -26,7 +26,6 @@ using namespace llvm; X86ELFWriterInfo::X86ELFWriterInfo(TargetMachine &TM) : TargetELFWriterInfo(TM) { - bool is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; EMachine = is64Bit ? EM_X86_64 : EM_386; } diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 7849b51accc45..ff9208c2c1e92 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -15,7 +15,6 @@ #include "X86.h" #include "X86InstrBuilder.h" -#include "X86ISelLowering.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" @@ -56,12 +55,13 @@ public: explicit X86FastISel(MachineFunction &mf, DenseMap<const Value *, unsigned> &vm, DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, - DenseMap<const AllocaInst *, int> &am + DenseMap<const AllocaInst *, int> &am, + std::vector<std::pair<MachineInstr*, unsigned> > &pn #ifndef NDEBUG - , SmallSet<Instruction*, 8> &cil + , SmallSet<const Instruction *, 8> &cil #endif ) - : FastISel(mf, vm, bm, am + : FastISel(mf, vm, bm, am, pn #ifndef NDEBUG , cil #endif @@ -72,16 +72,16 @@ public: X86ScalarSSEf32 = Subtarget->hasSSE1(); } - virtual bool TargetSelectInstruction(Instruction *I); + virtual bool TargetSelectInstruction(const Instruction *I); #include "X86GenFastISel.inc" private: - bool X86FastEmitCompare(Value *LHS, Value *RHS, EVT VT); + bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT); bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR); - bool X86FastEmitStore(EVT VT, Value *Val, + bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM); bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM); @@ -89,32 +89,32 @@ private: bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, unsigned &ResultReg); - bool X86SelectAddress(Value *V, X86AddressMode &AM); - bool X86SelectCallAddress(Value *V, X86AddressMode &AM); + bool X86SelectAddress(const Value *V, X86AddressMode &AM); + bool X86SelectCallAddress(const Value *V, X86AddressMode &AM); - bool X86SelectLoad(Instruction *I); + bool X86SelectLoad(const Instruction *I); - bool X86SelectStore(Instruction *I); + bool X86SelectStore(const Instruction *I); - bool X86SelectCmp(Instruction *I); + bool X86SelectCmp(const Instruction *I); - bool X86SelectZExt(Instruction *I); + bool X86SelectZExt(const Instruction *I); - bool X86SelectBranch(Instruction *I); + bool X86SelectBranch(const Instruction *I); - bool X86SelectShift(Instruction *I); + bool X86SelectShift(const Instruction *I); - bool X86SelectSelect(Instruction *I); + bool X86SelectSelect(const Instruction *I); - bool X86SelectTrunc(Instruction *I); + bool X86SelectTrunc(const Instruction *I); - bool X86SelectFPExt(Instruction *I); - bool X86SelectFPTrunc(Instruction *I); + bool X86SelectFPExt(const Instruction *I); + bool X86SelectFPTrunc(const Instruction *I); - bool X86SelectExtractValue(Instruction *I); + bool X86SelectExtractValue(const Instruction *I); - bool X86VisitIntrinsicCall(IntrinsicInst &I); - bool X86SelectCall(Instruction *I); + bool X86VisitIntrinsicCall(const IntrinsicInst &I); + bool X86SelectCall(const Instruction *I); CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false); @@ -125,9 +125,9 @@ private: return static_cast<const X86TargetMachine *>(&TM); } - unsigned TargetMaterializeConstant(Constant *C); + unsigned TargetMaterializeConstant(const Constant *C); - unsigned TargetMaterializeAlloca(AllocaInst *C); + unsigned TargetMaterializeAlloca(const AllocaInst *C); /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is /// computed in an SSE register, not on the X87 floating point stack. @@ -280,14 +280,14 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, return true; } -bool X86FastISel::X86FastEmitStore(EVT VT, Value *Val, +bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM) { // Handle 'null' like i32/i64 0. if (isa<ConstantPointerNull>(Val)) Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext())); // If this is a store of a simple constant, fold the constant into the store. - if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { unsigned Opc = 0; bool Signed = true; switch (VT.getSimpleVT().SimpleTy) { @@ -305,7 +305,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, Value *Val, if (Opc) { addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM) - .addImm(Signed ? CI->getSExtValue() : + .addImm(Signed ? (uint64_t) CI->getSExtValue() : CI->getZExtValue()); return true; } @@ -335,13 +335,13 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, /// X86SelectAddress - Attempt to fill in an address from the given value. /// -bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { - User *U = NULL; +bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { + const User *U = NULL; unsigned Opcode = Instruction::UserOp1; - if (Instruction *I = dyn_cast<Instruction>(V)) { + if (const Instruction *I = dyn_cast<Instruction>(V)) { Opcode = I->getOpcode(); U = I; - } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { + } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { Opcode = C->getOpcode(); U = C; } @@ -378,7 +378,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { case Instruction::Add: { // Adds of constants are common and easy enough. - if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue(); // They have to fit in the 32-bit signed displacement field though. if (isInt<32>(Disp)) { @@ -399,16 +399,16 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { gep_type_iterator GTI = gep_type_begin(U); // Iterate through the indices, folding what we can. Constants can be // folded, and one dynamic index can be handled, if the scale is supported. - for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); + for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i, ++GTI) { - Value *Op = *i; + const Value *Op = *i; if (const StructType *STy = dyn_cast<StructType>(*GTI)) { const StructLayout *SL = TD.getStructLayout(STy); unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); Disp += SL->getElementOffset(Idx); } else { uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); - if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { // Constant-offset addressing. Disp += CI->getSExtValue() * S; } else if (IndexReg == 0 && @@ -446,7 +446,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { } // Handle constant address. - if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { // Can't handle alternate code models yet. if (TM.getCodeModel() != CodeModel::Small) return false; @@ -457,7 +457,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { return false; // Can't handle TLS yet. - if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) if (GVar->isThreadLocal()) return false; @@ -544,13 +544,13 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { /// X86SelectCallAddress - Attempt to fill in an address from the given value. /// -bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) { - User *U = NULL; +bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { + const User *U = NULL; unsigned Opcode = Instruction::UserOp1; - if (Instruction *I = dyn_cast<Instruction>(V)) { + if (const Instruction *I = dyn_cast<Instruction>(V)) { Opcode = I->getOpcode(); U = I; - } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { + } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { Opcode = C->getOpcode(); U = C; } @@ -575,7 +575,7 @@ bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) { } // Handle constant address. - if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { + if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { // Can't handle alternate code models yet. if (TM.getCodeModel() != CodeModel::Small) return false; @@ -586,7 +586,7 @@ bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) { return false; // Can't handle TLS or DLLImport. - if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage()) return false; @@ -627,7 +627,7 @@ bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) { /// X86SelectStore - Select and emit code to implement store instructions. -bool X86FastISel::X86SelectStore(Instruction* I) { +bool X86FastISel::X86SelectStore(const Instruction *I) { EVT VT; if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true)) return false; @@ -641,7 +641,7 @@ bool X86FastISel::X86SelectStore(Instruction* I) { /// X86SelectLoad - Select and emit code to implement load instructions. /// -bool X86FastISel::X86SelectLoad(Instruction *I) { +bool X86FastISel::X86SelectLoad(const Instruction *I) { EVT VT; if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true)) return false; @@ -673,7 +673,7 @@ static unsigned X86ChooseCmpOpcode(EVT VT) { /// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS /// of the comparison, return an opcode that works for the compare (e.g. /// CMP32ri) otherwise return 0. -static unsigned X86ChooseCmpImmediateOpcode(EVT VT, ConstantInt *RHSC) { +static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) { switch (VT.getSimpleVT().SimpleTy) { // Otherwise, we can't fold the immediate into this comparison. default: return 0; @@ -689,7 +689,8 @@ static unsigned X86ChooseCmpImmediateOpcode(EVT VT, ConstantInt *RHSC) { } } -bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, EVT VT) { +bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, + EVT VT) { unsigned Op0Reg = getRegForValue(Op0); if (Op0Reg == 0) return false; @@ -700,7 +701,7 @@ bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, EVT VT) { // We have two options: compare with register or immediate. If the RHS of // the compare is an immediate that we can fold into this compare, use // CMPri, otherwise use CMPrr. - if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { + if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) { BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg) .addImm(Op1C->getSExtValue()); @@ -718,8 +719,8 @@ bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, EVT VT) { return true; } -bool X86FastISel::X86SelectCmp(Instruction *I) { - CmpInst *CI = cast<CmpInst>(I); +bool X86FastISel::X86SelectCmp(const Instruction *I) { + const CmpInst *CI = cast<CmpInst>(I); EVT VT; if (!isTypeLegal(I->getOperand(0)->getType(), VT)) @@ -781,7 +782,7 @@ bool X86FastISel::X86SelectCmp(Instruction *I) { return false; } - Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); + const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); if (SwapArgs) std::swap(Op0, Op1); @@ -794,7 +795,7 @@ bool X86FastISel::X86SelectCmp(Instruction *I) { return true; } -bool X86FastISel::X86SelectZExt(Instruction *I) { +bool X86FastISel::X86SelectZExt(const Instruction *I) { // Handle zero-extension from i1 to i8, which is common. if (I->getType()->isIntegerTy(8) && I->getOperand(0)->getType()->isIntegerTy(1)) { @@ -811,15 +812,15 @@ bool X86FastISel::X86SelectZExt(Instruction *I) { } -bool X86FastISel::X86SelectBranch(Instruction *I) { +bool X86FastISel::X86SelectBranch(const Instruction *I) { // Unconditional branches are selected by tablegen-generated code. // Handle a conditional branch. - BranchInst *BI = cast<BranchInst>(I); + const BranchInst *BI = cast<BranchInst>(I); MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)]; // Fold the common case of a conditional branch with a comparison. - if (CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { + if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { if (CI->hasOneUse()) { EVT VT = TLI.getValueType(CI->getOperand(0)->getType()); @@ -866,7 +867,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { return false; } - Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); + const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); if (SwapArgs) std::swap(Op0, Op1); @@ -901,7 +902,8 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { // looking for the SETO/SETB instruction. If an instruction modifies the // EFLAGS register before we reach the SETO/SETB instruction, then we can't // convert the branch into a JO/JB instruction. - if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){ + if (const IntrinsicInst *CI = + dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){ if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow || CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) { const MachineInstr *SetMI = 0; @@ -956,7 +958,7 @@ bool X86FastISel::X86SelectBranch(Instruction *I) { return true; } -bool X86FastISel::X86SelectShift(Instruction *I) { +bool X86FastISel::X86SelectShift(const Instruction *I) { unsigned CReg = 0, OpReg = 0, OpImm = 0; const TargetRegisterClass *RC = NULL; if (I->getType()->isIntegerTy(8)) { @@ -1007,7 +1009,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { if (Op0Reg == 0) return false; // Fold immediate in shl(x,3). - if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { unsigned ResultReg = createResultReg(RC); BuildMI(MBB, DL, TII.get(OpImm), ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff); @@ -1032,7 +1034,7 @@ bool X86FastISel::X86SelectShift(Instruction *I) { return true; } -bool X86FastISel::X86SelectSelect(Instruction *I) { +bool X86FastISel::X86SelectSelect(const Instruction *I) { EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) return false; @@ -1066,11 +1068,11 @@ bool X86FastISel::X86SelectSelect(Instruction *I) { return true; } -bool X86FastISel::X86SelectFPExt(Instruction *I) { +bool X86FastISel::X86SelectFPExt(const Instruction *I) { // fpext from float to double. if (Subtarget->hasSSE2() && I->getType()->isDoubleTy()) { - Value *V = I->getOperand(0); + const Value *V = I->getOperand(0); if (V->getType()->isFloatTy()) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; @@ -1084,10 +1086,10 @@ bool X86FastISel::X86SelectFPExt(Instruction *I) { return false; } -bool X86FastISel::X86SelectFPTrunc(Instruction *I) { +bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { if (Subtarget->hasSSE2()) { if (I->getType()->isFloatTy()) { - Value *V = I->getOperand(0); + const Value *V = I->getOperand(0); if (V->getType()->isDoubleTy()) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; @@ -1102,7 +1104,7 @@ bool X86FastISel::X86SelectFPTrunc(Instruction *I) { return false; } -bool X86FastISel::X86SelectTrunc(Instruction *I) { +bool X86FastISel::X86SelectTrunc(const Instruction *I) { if (Subtarget->is64Bit()) // All other cases should be handled by the tblgen generated code. return false; @@ -1139,11 +1141,11 @@ bool X86FastISel::X86SelectTrunc(Instruction *I) { return true; } -bool X86FastISel::X86SelectExtractValue(Instruction *I) { - ExtractValueInst *EI = cast<ExtractValueInst>(I); - Value *Agg = EI->getAggregateOperand(); +bool X86FastISel::X86SelectExtractValue(const Instruction *I) { + const ExtractValueInst *EI = cast<ExtractValueInst>(I); + const Value *Agg = EI->getAggregateOperand(); - if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) { + if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) { switch (CI->getIntrinsicID()) { default: break; case Intrinsic::sadd_with_overflow: @@ -1160,7 +1162,7 @@ bool X86FastISel::X86SelectExtractValue(Instruction *I) { return false; } -bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { +bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { // FIXME: Handle more intrinsics. switch (I.getIntrinsicID()) { default: return false; @@ -1168,8 +1170,8 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { // Emit code inline code to store the stack guard onto the stack. EVT PtrTy = TLI.getPointerTy(); - Value *Op1 = I.getOperand(1); // The guard's value. - AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2)); + const Value *Op1 = I.getOperand(1); // The guard's value. + const AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2)); // Grab the frame index. X86AddressMode AM; @@ -1204,7 +1206,7 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { return true; } case Intrinsic::dbg_declare: { - DbgDeclareInst *DI = cast<DbgDeclareInst>(&I); + const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I); X86AddressMode AM; assert(DI->getAddress() && "Null address should be checked earlier!"); if (!X86SelectAddress(DI->getAddress(), AM)) @@ -1235,8 +1237,8 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { if (!isTypeLegal(RetTy, VT)) return false; - Value *Op1 = I.getOperand(1); - Value *Op2 = I.getOperand(2); + const Value *Op1 = I.getOperand(1); + const Value *Op2 = I.getOperand(2); unsigned Reg1 = getRegForValue(Op1); unsigned Reg2 = getRegForValue(Op2); @@ -1277,20 +1279,20 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { } } -bool X86FastISel::X86SelectCall(Instruction *I) { - CallInst *CI = cast<CallInst>(I); - Value *Callee = I->getOperand(0); +bool X86FastISel::X86SelectCall(const Instruction *I) { + const CallInst *CI = cast<CallInst>(I); + const Value *Callee = I->getOperand(0); // Can't handle inline asm yet. if (isa<InlineAsm>(Callee)) return false; // Handle intrinsic calls. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) return X86VisitIntrinsicCall(*II); // Handle only C and fastcc calling conventions for now. - CallSite CS(CI); + ImmutableCallSite CS(CI); CallingConv::ID CC = CS.getCallingConv(); if (CC != CallingConv::C && CC != CallingConv::Fast && @@ -1322,7 +1324,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { if (!X86SelectCallAddress(Callee, CalleeAM)) return false; unsigned CalleeOp = 0; - GlobalValue *GV = 0; + const GlobalValue *GV = 0; if (CalleeAM.GV != 0) { GV = CalleeAM.GV; } else if (CalleeAM.Base.Reg != 0) { @@ -1338,7 +1340,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { } // Deal with call operands first. - SmallVector<Value*, 8> ArgVals; + SmallVector<const Value *, 8> ArgVals; SmallVector<unsigned, 8> Args; SmallVector<EVT, 8> ArgVTs; SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; @@ -1346,7 +1348,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { ArgVals.reserve(CS.arg_size()); ArgVTs.reserve(CS.arg_size()); ArgFlags.reserve(CS.arg_size()); - for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { unsigned Arg = getRegForValue(*i); if (Arg == 0) @@ -1454,7 +1456,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { X86AddressMode AM; AM.Base.Reg = StackPtr; AM.Disp = LocMemOffset; - Value *ArgVal = ArgVals[VA.getValNo()]; + const Value *ArgVal = ArgVals[VA.getValNo()]; // If this is a really simple value, emit this with the Value* version of // X86FastEmitStore. If it isn't simple, we don't want to do this, as it @@ -1585,7 +1587,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { bool -X86FastISel::TargetSelectInstruction(Instruction *I) { +X86FastISel::TargetSelectInstruction(const Instruction *I) { switch (I->getOpcode()) { default: break; case Instruction::Load: @@ -1633,7 +1635,7 @@ X86FastISel::TargetSelectInstruction(Instruction *I) { return false; } -unsigned X86FastISel::TargetMaterializeConstant(Constant *C) { +unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { EVT VT; if (!isTypeLegal(C->getType(), VT)) return false; @@ -1728,7 +1730,7 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) { return ResultReg; } -unsigned X86FastISel::TargetMaterializeAlloca(AllocaInst *C) { +unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { // Fail on dynamic allocas. At this point, getRegForValue has already // checked its CSE maps, so if we're here trying to handle a dynamic // alloca, we're not going to succeed. X86SelectAddress has a @@ -1753,12 +1755,13 @@ namespace llvm { llvm::FastISel *X86::createFastISel(MachineFunction &mf, DenseMap<const Value *, unsigned> &vm, DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, - DenseMap<const AllocaInst *, int> &am + DenseMap<const AllocaInst *, int> &am, + std::vector<std::pair<MachineInstr*, unsigned> > &pn #ifndef NDEBUG - , SmallSet<Instruction*, 8> &cil + , SmallSet<const Instruction *, 8> &cil #endif ) { - return new X86FastISel(mf, vm, bm, am + return new X86FastISel(mf, vm, bm, am, pn #ifndef NDEBUG , cil #endif diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 6d6fe771d9a90..93460ef308cc5 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -1088,8 +1088,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { // 'f' constraint. These should be turned into the current ST(x) register // in the machine instr. Also, any kills should be explicitly popped after // the inline asm. - unsigned Kills[7]; - unsigned NumKills = 0; + unsigned Kills = 0; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &Op = MI->getOperand(i); if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) @@ -1103,7 +1102,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { // asm. We just remember it for now, and pop them all off at the end in // a batch. if (Op.isKill()) - Kills[NumKills++] = FPReg; + Kills |= 1U << FPReg; } // If this asm kills any FP registers (is the last use of them) we must @@ -1114,9 +1113,11 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { // Note: this might be a non-optimal pop sequence. We might be able to do // better by trying to pop in stack order or something. MachineBasicBlock::iterator InsertPt = MI; - while (NumKills) - freeStackSlotAfter(InsertPt, Kills[--NumKills]); - + while (Kills) { + unsigned FPReg = CountTrailingZeros_32(Kills); + freeStackSlotAfter(InsertPt, FPReg); + Kills &= ~(1U << FPReg); + } // Don't delete the inline asm! return; } diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index da45dac807f6d..fd8bb1e860f76 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -15,12 +15,10 @@ #define DEBUG_TYPE "x86-isel" #include "X86.h" #include "X86InstrBuilder.h" -#include "X86ISelLowering.h" #include "X86MachineFunctionInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" -#include "llvm/GlobalValue.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/Support/CFG.h" @@ -57,25 +55,24 @@ namespace { FrameIndexBase } BaseType; - struct { // This is really a union, discriminated by BaseType! - SDValue Reg; - int FrameIndex; - } Base; + // This is really a union, discriminated by BaseType! + SDValue Base_Reg; + int Base_FrameIndex; unsigned Scale; SDValue IndexReg; int32_t Disp; SDValue Segment; - GlobalValue *GV; - Constant *CP; - BlockAddress *BlockAddr; + const GlobalValue *GV; + const Constant *CP; + const BlockAddress *BlockAddr; const char *ES; int JT; unsigned Align; // CP alignment. unsigned char SymbolFlags; // X86II::MO_* X86ISelAddressMode() - : BaseType(RegBase), Scale(1), IndexReg(), Disp(0), + : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0), Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) { } @@ -85,7 +82,7 @@ namespace { } bool hasBaseOrIndexReg() const { - return IndexReg.getNode() != 0 || Base.Reg.getNode() != 0; + return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0; } /// isRIPRelative - Return true if this addressing mode is already RIP @@ -93,24 +90,24 @@ namespace { bool isRIPRelative() const { if (BaseType != RegBase) return false; if (RegisterSDNode *RegNode = - dyn_cast_or_null<RegisterSDNode>(Base.Reg.getNode())) + dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode())) return RegNode->getReg() == X86::RIP; return false; } void setBaseReg(SDValue Reg) { BaseType = RegBase; - Base.Reg = Reg; + Base_Reg = Reg; } void dump() { dbgs() << "X86ISelAddressMode " << this << '\n'; - dbgs() << "Base.Reg "; - if (Base.Reg.getNode() != 0) - Base.Reg.getNode()->dump(); + dbgs() << "Base_Reg "; + if (Base_Reg.getNode() != 0) + Base_Reg.getNode()->dump(); else dbgs() << "nul"; - dbgs() << " Base.FrameIndex " << Base.FrameIndex << '\n' + dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n' << " Scale" << Scale << '\n' << "IndexReg "; if (IndexReg.getNode() != 0) @@ -162,7 +159,7 @@ namespace { class X86DAGToDAGISel : public SelectionDAGISel { /// X86Lowering - This object fully describes how to lower LLVM code to an /// X86-specific SelectionDAG. - X86TargetLowering &X86Lowering; + const X86TargetLowering &X86Lowering; /// Subtarget - Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. @@ -183,7 +180,7 @@ namespace { return "X86 DAG->DAG Instruction Selection"; } - virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF); + virtual void EmitFunctionEntryCode(); virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const; @@ -235,8 +232,8 @@ namespace { SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? - CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) : - AM.Base.Reg; + CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, TLI.getPointerTy()) : + AM.Base_Reg; Scale = getI8Imm(AM.Scale); Index = AM.IndexReg; // These are 32-bit even in 64-bit mode since RIP relative offset @@ -546,11 +543,11 @@ void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB, TII->get(X86::CALLpcrel32)).addExternalSymbol("__main"); } -void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { +void X86DAGToDAGISel::EmitFunctionEntryCode() { // If this is main, emit special code for main. - MachineBasicBlock *BB = MF.begin(); - if (Fn.hasExternalLinkage() && Fn.getName() == "main") - EmitSpecialCodeForMain(BB, MF.getFrameInfo()); + if (const Function *Fn = MF->getFunction()) + if (Fn->hasExternalLinkage() && Fn->getName() == "main") + EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo()); } @@ -675,8 +672,8 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { // a smaller encoding and avoids a scaled-index. if (AM.Scale == 2 && AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base.Reg.getNode() == 0) { - AM.Base.Reg = AM.IndexReg; + AM.Base_Reg.getNode() == 0) { + AM.Base_Reg = AM.IndexReg; AM.Scale = 1; } @@ -687,15 +684,34 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { Subtarget->is64Bit() && AM.Scale == 1 && AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base.Reg.getNode() == 0 && + AM.Base_Reg.getNode() == 0 && AM.IndexReg.getNode() == 0 && AM.SymbolFlags == X86II::MO_NO_FLAG && AM.hasSymbolicDisplacement()) - AM.Base.Reg = CurDAG->getRegister(X86::RIP, MVT::i64); + AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64); return false; } +/// isLogicallyAddWithConstant - Return true if this node is semantically an +/// add of a value with a constantint. +static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) { + // Check for (add x, Cst) + if (V->getOpcode() == ISD::ADD) + return isa<ConstantSDNode>(V->getOperand(1)); + + // Check for (or x, Cst), where Cst & x == 0. + if (V->getOpcode() != ISD::OR || + !isa<ConstantSDNode>(V->getOperand(1))) + return false; + + // Handle "X | C" as "X + C" iff X is known to have C bits clear. + ConstantSDNode *CN = cast<ConstantSDNode>(V->getOperand(1)); + + // Check to see if the LHS & C is zero. + return CurDAG->MaskedValueIsZero(V->getOperand(0), CN->getAPIntValue()); +} + bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, X86ISelListener &DeadNodes, unsigned Depth) { @@ -762,9 +778,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, case ISD::FrameIndex: if (AM.BaseType == X86ISelAddressMode::RegBase - && AM.Base.Reg.getNode() == 0) { + && AM.Base_Reg.getNode() == 0) { AM.BaseType = X86ISelAddressMode::FrameIndexBase; - AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); + AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); return false; } break; @@ -787,8 +803,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // Okay, we know that we have a scale by now. However, if the scaled // value is an add of something and a constant, we can fold the // constant into the disp field here. - if (ShVal.getNode()->getOpcode() == ISD::ADD && - isa<ConstantSDNode>(ShVal.getNode()->getOperand(1))) { + if (isLogicallyAddWithConstant(ShVal, CurDAG)) { AM.IndexReg = ShVal.getNode()->getOperand(0); ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getNode()->getOperand(1)); @@ -816,7 +831,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, case X86ISD::MUL_IMM: // X*[3,5,9] -> X+X*[2,4,8] if (AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base.Reg.getNode() == 0 && + AM.Base_Reg.getNode() == 0 && AM.IndexReg.getNode() == 0) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) @@ -847,7 +862,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, Reg = N.getNode()->getOperand(0); } - AM.IndexReg = AM.Base.Reg = Reg; + AM.IndexReg = AM.Base_Reg = Reg; return false; } } @@ -892,8 +907,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // If the base is a register with multiple uses, this // transformation may save a mov. if ((AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base.Reg.getNode() && - !AM.Base.Reg.getNode()->hasOneUse()) || + AM.Base_Reg.getNode() && + !AM.Base_Reg.getNode()->hasOneUse()) || AM.BaseType == X86ISelAddressMode::FrameIndexBase) --Cost; // If the folded LHS was interesting, this transformation saves @@ -963,9 +978,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // see if we can just put each operand into a register and fold at least // the add. if (AM.BaseType == X86ISelAddressMode::RegBase && - !AM.Base.Reg.getNode() && + !AM.Base_Reg.getNode() && !AM.IndexReg.getNode()) { - AM.Base.Reg = N.getNode()->getOperand(0); + AM.Base_Reg = N.getNode()->getOperand(0); AM.IndexReg = N.getNode()->getOperand(1); AM.Scale = 1; return false; @@ -975,14 +990,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, case ISD::OR: // Handle "X | C" as "X + C" iff X is known to have C bits clear. - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + if (isLogicallyAddWithConstant(N, CurDAG)) { X86ISelAddressMode Backup = AM; + ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1)); uint64_t Offset = CN->getSExtValue(); - // Check to see if the LHS & C is zero. - if (!CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) - break; - // Start with the LHS as an addr mode. if (!MatchAddressRecursively(N.getOperand(0), AM, DeadNodes, Depth+1) && // Address could not have picked a GV address for the displacement. @@ -1127,7 +1139,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, /// specified addressing mode without any further recursion. bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { // Is the base register already occupied? - if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) { + if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) { // If so, check to see if the scale index register is set. if (AM.IndexReg.getNode() == 0) { AM.IndexReg = N; @@ -1141,7 +1153,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { // Default, generate it as a register. AM.BaseType = X86ISelAddressMode::RegBase; - AM.Base.Reg = N; + AM.Base_Reg = N; return false; } @@ -1157,8 +1169,8 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base, EVT VT = N.getValueType(); if (AM.BaseType == X86ISelAddressMode::RegBase) { - if (!AM.Base.Reg.getNode()) - AM.Base.Reg = CurDAG->getRegister(0, VT); + if (!AM.Base_Reg.getNode()) + AM.Base_Reg = CurDAG->getRegister(0, VT); } if (!AM.IndexReg.getNode()) @@ -1185,7 +1197,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) && PatternNodeWithChain.hasOneUse() && IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && - IsLegalToFold(N.getOperand(0), N.getNode(), Root)) { + IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain); if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment)) return false; @@ -1202,7 +1214,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) && N.getOperand(0).getOperand(0).hasOneUse() && IsProfitableToFold(N.getOperand(0), N.getNode(), Root) && - IsLegalToFold(N.getOperand(0), N.getNode(), Root)) { + IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) { // Okay, this is a zero extending load. Fold it. LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0)); if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment)) @@ -1234,10 +1246,10 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, EVT VT = N.getValueType(); unsigned Complexity = 0; if (AM.BaseType == X86ISelAddressMode::RegBase) - if (AM.Base.Reg.getNode()) + if (AM.Base_Reg.getNode()) Complexity = 1; else - AM.Base.Reg = CurDAG->getRegister(0, VT); + AM.Base_Reg = CurDAG->getRegister(0, VT); else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) Complexity = 4; @@ -1265,7 +1277,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, Complexity += 2; } - if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode())) + if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode())) Complexity++; // If it isn't worth using an LEA, reject it. @@ -1287,7 +1299,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, X86ISelAddressMode AM; AM.GV = GA->getGlobal(); AM.Disp += GA->getOffset(); - AM.Base.Reg = CurDAG->getRegister(0, N.getValueType()); + AM.Base_Reg = CurDAG->getRegister(0, N.getValueType()); AM.SymbolFlags = GA->getTargetFlags(); if (N.getValueType() == MVT::i32) { @@ -1309,7 +1321,7 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, SDValue &Segment) { if (!ISD::isNON_EXTLoad(N.getNode()) || !IsProfitableToFold(N, P, P) || - !IsLegalToFold(N, P, P)) + !IsLegalToFold(N, P, P, OptLevel)) return false; return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment); @@ -1841,6 +1853,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to // use a smaller encoding. + if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse()) + // Look past the truncate if CMP is the only use of it. + N0 = N0.getOperand(0); if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && N0.getValueType() != MVT::i8 && X86::isZeroNode(N1)) { diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 64702f1d5c9f5..6ce9ab711bbc3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -57,14 +57,6 @@ STATISTIC(NumTailCalls, "Number of tail calls"); static cl::opt<bool> DisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX")); -// Disable16Bit - 16-bit operations typically have a larger encoding than -// corresponding 32-bit instructions, and 16-bit code is slow on some -// processors. This is an experimental flag to disable 16-bit operations -// (which forces them to be Legalized to 32-bit operations). -static cl::opt<bool> -Disable16Bit("disable-16bit", cl::Hidden, - cl::desc("Disable use of 16-bit instructions")); - // Forward declarations. static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2); @@ -120,8 +112,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Set up the register classes. addRegisterClass(MVT::i8, X86::GR8RegisterClass); - if (!Disable16Bit) - addRegisterClass(MVT::i16, X86::GR16RegisterClass); + addRegisterClass(MVT::i16, X86::GR16RegisterClass); addRegisterClass(MVT::i32, X86::GR32RegisterClass); if (Subtarget->is64Bit()) addRegisterClass(MVT::i64, X86::GR64RegisterClass); @@ -130,11 +121,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // We don't accept any truncstore of integer registers. setTruncStoreAction(MVT::i64, MVT::i32, Expand); - if (!Disable16Bit) - setTruncStoreAction(MVT::i64, MVT::i16, Expand); + setTruncStoreAction(MVT::i64, MVT::i16, Expand); setTruncStoreAction(MVT::i64, MVT::i8 , Expand); - if (!Disable16Bit) - setTruncStoreAction(MVT::i32, MVT::i16, Expand); + setTruncStoreAction(MVT::i32, MVT::i16, Expand); setTruncStoreAction(MVT::i32, MVT::i8 , Expand); setTruncStoreAction(MVT::i16, MVT::i8, Expand); @@ -285,13 +274,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::CTTZ , MVT::i8 , Custom); setOperationAction(ISD::CTLZ , MVT::i8 , Custom); setOperationAction(ISD::CTPOP , MVT::i16 , Expand); - if (Disable16Bit) { - setOperationAction(ISD::CTTZ , MVT::i16 , Expand); - setOperationAction(ISD::CTLZ , MVT::i16 , Expand); - } else { - setOperationAction(ISD::CTTZ , MVT::i16 , Custom); - setOperationAction(ISD::CTLZ , MVT::i16 , Custom); - } + setOperationAction(ISD::CTTZ , MVT::i16 , Custom); + setOperationAction(ISD::CTLZ , MVT::i16 , Custom); setOperationAction(ISD::CTPOP , MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i32 , Custom); setOperationAction(ISD::CTLZ , MVT::i32 , Custom); @@ -308,19 +292,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SELECT , MVT::i1 , Promote); // X86 wants to expand cmov itself. setOperationAction(ISD::SELECT , MVT::i8 , Custom); - if (Disable16Bit) - setOperationAction(ISD::SELECT , MVT::i16 , Expand); - else - setOperationAction(ISD::SELECT , MVT::i16 , Custom); + setOperationAction(ISD::SELECT , MVT::i16 , Custom); setOperationAction(ISD::SELECT , MVT::i32 , Custom); setOperationAction(ISD::SELECT , MVT::f32 , Custom); setOperationAction(ISD::SELECT , MVT::f64 , Custom); setOperationAction(ISD::SELECT , MVT::f80 , Custom); setOperationAction(ISD::SETCC , MVT::i8 , Custom); - if (Disable16Bit) - setOperationAction(ISD::SETCC , MVT::i16 , Expand); - else - setOperationAction(ISD::SETCC , MVT::i16 , Custom); + setOperationAction(ISD::SETCC , MVT::i16 , Custom); setOperationAction(ISD::SETCC , MVT::i32 , Custom); setOperationAction(ISD::SETCC , MVT::f32 , Custom); setOperationAction(ISD::SETCC , MVT::f64 , Custom); @@ -623,11 +601,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // FIXME: In order to prevent SSE instructions being expanded to MMX ones // with -msoft-float, disable use of MMX as well. if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) { - addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); - addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); - addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); - addRegisterClass(MVT::v2f32, X86::VR64RegisterClass); - addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); + addRegisterClass(MVT::v8i8, X86::VR64RegisterClass, false); + addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false); + addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false); + addRegisterClass(MVT::v2f32, X86::VR64RegisterClass, false); + addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false); setOperationAction(ISD::ADD, MVT::v8i8, Legal); setOperationAction(ISD::ADD, MVT::v4i16, Legal); @@ -1067,23 +1045,27 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { } /// getOptimalMemOpType - Returns the target specific optimal type for load -/// and store operations as a result of memset, memcpy, and memmove lowering. -/// If DstAlign is zero that means it's safe to destination alignment can -/// satisfy any constraint. Similarly if SrcAlign is zero it means there -/// isn't a need to check it against alignment requirement, probably because -/// the source does not need to be loaded. If 'NonScalarIntSafe' is true, that -/// means it's safe to return a non-scalar-integer type, e.g. constant string -/// source or loaded from memory. It returns EVT::Other if SelectionDAG should -/// be responsible for determining it. +/// and store operations as a result of memset, memcpy, and memmove +/// lowering. If DstAlign is zero that means it's safe to destination +/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it +/// means there isn't a need to check it against alignment requirement, +/// probably because the source does not need to be loaded. If +/// 'NonScalarIntSafe' is true, that means it's safe to return a +/// non-scalar-integer type, e.g. empty string source, constant, or loaded +/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is +/// constant so it does not need to be loaded. +/// It returns EVT::Other if the type should be determined using generic +/// target-independent logic. EVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool NonScalarIntSafe, - SelectionDAG &DAG) const { + bool MemcpyStrSrc, + MachineFunction &MF) const { // FIXME: This turns off use of xmm stores for memset/memcpy on targets like // linux. This is because the stack realignment code can't handle certain // cases like PR2962. This should be removed when PR2962 is fixed. - const Function *F = DAG.getMachineFunction().getFunction(); + const Function *F = MF.getFunction(); if (NonScalarIntSafe && !F->hasFnAttr(Attribute::NoImplicitFloat)) { if (Size >= 16 && @@ -1095,11 +1077,14 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::v4i32; if (Subtarget->hasSSE1()) return MVT::v4f32; - } else if (Size >= 8 && + } else if (!MemcpyStrSrc && Size >= 8 && !Subtarget->is64Bit() && Subtarget->getStackAlignment() >= 8 && - Subtarget->hasSSE2()) + Subtarget->hasSSE2()) { + // Do not use f64 to lower memcpy if source is string constant. It's + // better to use i32 to avoid the loads. return MVT::f64; + } } if (Subtarget->is64Bit() && Size >= 8) return MVT::i64; @@ -1182,7 +1167,7 @@ bool X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<EVT> &OutTys, const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, *DAG.getContext()); @@ -1193,7 +1178,9 @@ SDValue X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), @@ -1211,7 +1198,8 @@ X86TargetLowering::LowerReturn(SDValue Chain, SmallVector<SDValue, 6> RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) // Operand #1 = Bytes To Pop - RetOps.push_back(DAG.getTargetConstant(getBytesToPopOnReturn(), MVT::i16)); + RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), + MVT::i16)); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { @@ -1287,7 +1275,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; @@ -1304,7 +1292,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // If this is x86-64, and we disabled SSE, we can't return FP values if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { - llvm_report_error("SSE register return with SSE disabled"); + report_fatal_error("SSE register return with SSE disabled"); } // If this is a call to a function that returns an fp value on the floating @@ -1384,7 +1372,8 @@ ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) { /// IsCalleePop - Determines whether the callee is required to pop its /// own arguments. Callee pop is necessary to support tail calls. -bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){ +bool X86TargetLowering::IsCalleePop(bool IsVarArg, + CallingConv::ID CallingConv) const { if (IsVarArg) return false; @@ -1457,7 +1446,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, MachineFrameInfo *MFI, - unsigned i) { + unsigned i) const { // Create the nodes corresponding to a load from this parameter slot. ISD::ArgFlagsTy Flags = Ins[i].Flags; bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv); @@ -1496,7 +1485,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); @@ -1607,7 +1597,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { if (Is64Bit || CallConv != CallingConv::X86_FastCall) { - VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize, true, false); + FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, + true, false)); } if (Is64Bit) { unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0; @@ -1655,16 +1646,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // For X86-64, if there are vararg parameters that are passed via // registers, then we must store them to their spots on the stack so they // may be loaded by deferencing the result of va_next. - VarArgsGPOffset = NumIntRegs * 8; - VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16; - RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 + - TotalNumXMMRegs * 16, 16, - false); + FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); + FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16); + FuncInfo->setRegSaveFrameIndex( + MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16, + false)); // Store the integer parameter registers. SmallVector<SDValue, 8> MemOps; - SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); - unsigned Offset = VarArgsGPOffset; + SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), + getPointerTy()); + unsigned Offset = FuncInfo->getVarArgsGPOffset(); for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) { SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, DAG.getIntPtrConstant(Offset)); @@ -1673,7 +1665,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - PseudoSourceValue::getFixedStack(RegSaveFrameIndex), + PseudoSourceValue::getFixedStack( + FuncInfo->getRegSaveFrameIndex()), Offset, false, false, 0); MemOps.push_back(Store); Offset += 8; @@ -1688,8 +1681,10 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8); SaveXMMOps.push_back(ALVal); - SaveXMMOps.push_back(DAG.getIntPtrConstant(RegSaveFrameIndex)); - SaveXMMOps.push_back(DAG.getIntPtrConstant(VarArgsFPOffset)); + SaveXMMOps.push_back(DAG.getIntPtrConstant( + FuncInfo->getRegSaveFrameIndex())); + SaveXMMOps.push_back(DAG.getIntPtrConstant( + FuncInfo->getVarArgsFPOffset())); for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs], @@ -1710,22 +1705,22 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // Some CCs need callee pop. if (IsCalleePop(isVarArg, CallConv)) { - BytesToPopOnReturn = StackSize; // Callee pops everything. + FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. } else { - BytesToPopOnReturn = 0; // Callee pops nothing. + FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. // If this is an sret function, the return should pop the hidden pointer. if (!Is64Bit && !IsTailCallConvention(CallConv) && ArgsAreStructReturn(Ins)) - BytesToPopOnReturn = 4; + FuncInfo->setBytesToPopOnReturn(4); } if (!Is64Bit) { - RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only. + // RegSaveFrameIndex is X86-64 only. + FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); if (CallConv == CallingConv::X86_FastCall) - VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. + // fastcc functions can't have varargs. + FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); } - FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); - return Chain; } @@ -1734,7 +1729,7 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, - ISD::ArgFlagsTy Flags) { + ISD::ArgFlagsTy Flags) const { const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0); unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); @@ -1753,7 +1748,7 @@ SDValue X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, bool Is64Bit, - int FPDiff, DebugLoc dl) { + int FPDiff, DebugLoc dl) const { // Adjust the Return address stack slot. EVT VT = getPointerTy(); OutRetAddr = getReturnAddressFrameIndex(DAG); @@ -1790,7 +1785,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); bool Is64Bit = Subtarget->is64Bit(); bool IsStructRet = CallIsStructReturn(Outs); @@ -2060,7 +2055,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // We should use extra load for direct calls to dllimported functions in // non-JIT mode. - GlobalValue *GV = G->getGlobal(); + const GlobalValue *GV = G->getGlobal(); if (!GV->hasDLLImportLinkage()) { unsigned char OpFlags = 0; @@ -2219,8 +2214,9 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, /// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned /// for a 16 byte align requirement. -unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, - SelectionDAG& DAG) { +unsigned +X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, + SelectionDAG& DAG) const { MachineFunction &MF = DAG.getMachineFunction(); const TargetMachine &TM = MF.getTarget(); const TargetFrameInfo &TFI = *TM.getFrameInfo(); @@ -2308,9 +2304,11 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // If -tailcallopt is specified, make fastcc functions tail-callable. const MachineFunction &MF = DAG.getMachineFunction(); const Function *CallerF = DAG.getMachineFunction().getFunction(); + CallingConv::ID CallerCC = CallerF->getCallingConv(); + bool CCMatch = CallerCC == CalleeCC; + if (GuaranteedTailCallOpt) { - if (IsTailCallConvention(CalleeCC) && - CallerF->getCallingConv() == CalleeCC) + if (IsTailCallConvention(CalleeCC) && CCMatch) return true; return false; } @@ -2348,13 +2346,43 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, CCState CCInfo(CalleeCC, false, getTargetMachine(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_X86); - for (unsigned i = 0; i != RVLocs.size(); ++i) { + for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) return false; } } + // If the calling conventions do not match, then we'd better make sure the + // results are returned in the same way as what the caller expects. + if (!CCMatch) { + SmallVector<CCValAssign, 16> RVLocs1; + CCState CCInfo1(CalleeCC, false, getTargetMachine(), + RVLocs1, *DAG.getContext()); + CCInfo1.AnalyzeCallResult(Ins, RetCC_X86); + + SmallVector<CCValAssign, 16> RVLocs2; + CCState CCInfo2(CallerCC, false, getTargetMachine(), + RVLocs2, *DAG.getContext()); + CCInfo2.AnalyzeCallResult(Ins, RetCC_X86); + + if (RVLocs1.size() != RVLocs2.size()) + return false; + for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { + if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) + return false; + if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) + return false; + if (RVLocs1[i].isRegLoc()) { + if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) + return false; + } else { + if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) + return false; + } + } + } + // If the callee takes no arguments then go on to check the results of the // call. if (!Outs.empty()) { @@ -2401,12 +2429,13 @@ FastISel * X86TargetLowering::createFastISel(MachineFunction &mf, DenseMap<const Value *, unsigned> &vm, DenseMap<const BasicBlock*, MachineBasicBlock*> &bm, - DenseMap<const AllocaInst *, int> &am + DenseMap<const AllocaInst *, int> &am, + std::vector<std::pair<MachineInstr*, unsigned> > &pn #ifndef NDEBUG - , SmallSet<Instruction*, 8> &cil + , SmallSet<const Instruction *, 8> &cil #endif - ) { - return X86::createFastISel(mf, vm, bm, am + ) const { + return X86::createFastISel(mf, vm, bm, am, pn #ifndef NDEBUG , cil #endif @@ -2419,7 +2448,7 @@ X86TargetLowering::createFastISel(MachineFunction &mf, //===----------------------------------------------------------------------===// -SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { +SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); int ReturnAddrIndex = FuncInfo->getRAIndex(); @@ -3440,7 +3469,7 @@ unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems, /// FIXME: split into pslldqi, psrldqi, palignr variants. static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { - int NumElems = SVOp->getValueType(0).getVectorNumElements(); + unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); isLeft = true; unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, true, DAG); @@ -3452,11 +3481,12 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, } bool SeenV1 = false; bool SeenV2 = false; - for (int i = NumZeros; i < NumElems; ++i) { - int Val = isLeft ? (i - NumZeros) : i; - int Idx = SVOp->getMaskElt(isLeft ? i : (i - NumZeros)); - if (Idx < 0) + for (unsigned i = NumZeros; i < NumElems; ++i) { + unsigned Val = isLeft ? (i - NumZeros) : i; + int Idx_ = SVOp->getMaskElt(isLeft ? i : (i - NumZeros)); + if (Idx_ < 0) continue; + unsigned Idx = (unsigned) Idx_; if (Idx < NumElems) SeenV1 = true; else { @@ -3479,7 +3509,8 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, /// static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, unsigned NumNonZero, unsigned NumZero, - SelectionDAG &DAG, TargetLowering &TLI) { + SelectionDAG &DAG, + const TargetLowering &TLI) { if (NumNonZero > 8) return SDValue(); @@ -3524,8 +3555,9 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, /// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. /// static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, - unsigned NumNonZero, unsigned NumZero, - SelectionDAG &DAG, TargetLowering &TLI) { + unsigned NumNonZero, unsigned NumZero, + SelectionDAG &DAG, + const TargetLowering &TLI) { if (NumNonZero > 4) return SDValue(); @@ -3567,7 +3599,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, SDValue X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { // Check if the scalar load can be widened into a vector load. And if // the address is "base + cst" see if the cst can be "absorbed" into @@ -3699,7 +3731,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, } SDValue -X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // All zero's are handled with pxor, all one's are handled with pcmpeqd. if (ISD::isBuildVectorAllZeros(Op.getNode()) @@ -3965,7 +3997,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { } SDValue -X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { // We support concatenate two MMX registers and place them in a MMX // register. This is better than doing a stack convert. DebugLoc dl = Op.getDebugLoc(); @@ -3998,7 +4030,8 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { // 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw) static SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG, X86TargetLowering &TLI) { + SelectionDAG &DAG, + const X86TargetLowering &TLI) { SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); @@ -4241,7 +4274,8 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, // 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw static SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, - SelectionDAG &DAG, X86TargetLowering &TLI) { + SelectionDAG &DAG, + const X86TargetLowering &TLI) { SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); DebugLoc dl = SVOp->getDebugLoc(); @@ -4387,7 +4421,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, static SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, - TargetLowering &TLI, DebugLoc dl) { + const TargetLowering &TLI, DebugLoc dl) { EVT VT = SVOp->getValueType(0); SDValue V1 = SVOp->getOperand(0); SDValue V2 = SVOp->getOperand(1); @@ -4619,7 +4653,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { } SDValue -X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -4806,7 +4840,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); if (VT.getSizeInBits() == 8) { @@ -4860,7 +4894,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SDValue -X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { if (!isa<ConstantSDNode>(Op.getOperand(1))) return SDValue(); @@ -4924,7 +4959,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { } SDValue -X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ +X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, + SelectionDAG &DAG) const { EVT VT = Op.getValueType(); EVT EltVT = VT.getVectorElementType(); DebugLoc dl = Op.getDebugLoc(); @@ -4973,7 +5009,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ } SDValue -X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); EVT EltVT = VT.getVectorElementType(); @@ -5002,7 +5038,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { } SDValue -X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); if (Op.getValueType() == MVT::v2f32) return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f32, @@ -5033,7 +5069,7 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { // be used to form addressing mode. These wrapped nodes will be selected // into MOV32ri. SDValue -X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the @@ -5066,7 +5102,7 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) { return Result; } -SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the @@ -5100,7 +5136,7 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { } SDValue -X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the @@ -5136,12 +5172,12 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) { } SDValue -X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { // Create the TargetBlockAddressAddress node. unsigned char OpFlags = Subtarget->ClassifyBlockAddressReference(); CodeModel::Model M = getTargetMachine().getCodeModel(); - BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); DebugLoc dl = Op.getDebugLoc(); SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true, OpFlags); @@ -5210,7 +5246,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, } SDValue -X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); return LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG); @@ -5310,7 +5346,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, } SDValue -X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // TODO: implement the "local dynamic" model // TODO: implement the "initial exec"model for pic executables assert(Subtarget->isTargetELF() && @@ -5346,7 +5382,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { /// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and /// take a 2 x i32 value to shift plus a shift amount. -SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); @@ -5390,7 +5426,8 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { return DAG.getMergeValues(Ops, 2, dl); } -SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { EVT SrcVT = Op.getOperand(0).getValueType(); if (SrcVT.isVector()) { @@ -5426,7 +5463,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { // Build the FILD DebugLoc dl = Op.getDebugLoc(); SDVTList Tys; @@ -5463,7 +5500,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, } // LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion. -SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, + SelectionDAG &DAG) const { // This algorithm is not obvious. Here it is in C code, more or less: /* double uint64_to_double( uint32_t hi, uint32_t lo ) { @@ -5547,7 +5585,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { } // LowerUINT_TO_FP_i32 - 32-bit unsigned integer to float expansion. -SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // FP constant to bias correct the final result. SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), @@ -5592,7 +5631,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) { return Sub; } -SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { SDValue N0 = Op.getOperand(0); DebugLoc dl = Op.getDebugLoc(); @@ -5628,7 +5668,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) { } std::pair<SDValue,SDValue> X86TargetLowering:: -FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { +FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const { DebugLoc dl = Op.getDebugLoc(); EVT DstTy = Op.getValueType(); @@ -5690,7 +5730,8 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { return std::make_pair(FIST, StackSlot); } -SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, + SelectionDAG &DAG) const { if (Op.getValueType().isVector()) { if (Op.getValueType() == MVT::v2i32 && Op.getOperand(0).getValueType() == MVT::v2f64) { @@ -5709,7 +5750,8 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { FIST, StackSlot, NULL, 0, false, false, 0); } -SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, + SelectionDAG &DAG) const { std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, false); SDValue FIST = Vals.first, StackSlot = Vals.second; assert(FIST.getNode() && "Unexpected failure"); @@ -5719,7 +5761,8 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) { FIST, StackSlot, NULL, 0, false, false, 0); } -SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerFABS(SDValue Op, + SelectionDAG &DAG) const { LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); @@ -5746,7 +5789,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask); } -SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { LLVMContext *Context = DAG.getContext(); DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); @@ -5781,7 +5824,7 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) { } } -SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { LLVMContext *Context = DAG.getContext(); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -5857,7 +5900,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent. SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // CF and OF aren't always set the way we want. Determine which @@ -5885,15 +5928,20 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, unsigned NumOperands = 0; switch (Op.getNode()->getOpcode()) { case ISD::ADD: - // Due to an isel shortcoming, be conservative if this add is likely to - // be selected as part of a load-modify-store instruction. When the root - // node in a match is a store, isel doesn't know how to remap non-chain - // non-flag uses of other nodes in the match, such as the ADD in this - // case. This leads to the ADD being left around and reselected, with - // the result being two adds in the output. + // Due to an isel shortcoming, be conservative if this add is + // likely to be selected as part of a load-modify-store + // instruction. When the root node in a match is a store, isel + // doesn't know how to remap non-chain non-flag uses of other + // nodes in the match, such as the ADD in this case. This leads + // to the ADD being left around and reselected, with the result + // being two adds in the output. Alas, even if none our users + // are stores, that doesn't prove we're O.K. Ergo, if we have + // any parents that aren't CopyToReg or SETCC, eschew INC/DEC. + // A better fix seems to require climbing the DAG back to the + // root, and it doesn't seem to be worth the effort. for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = Op.getNode()->use_end(); UI != UE; ++UI) - if (UI->getOpcode() == ISD::STORE) + UE = Op.getNode()->use_end(); UI != UE; ++UI) + if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC) goto default_case; if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) { @@ -5988,7 +6036,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, /// Emit nodes that will be selected as "cmp Op0,Op1", or something /// equivalent. SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1)) if (C->getAPIntValue() == 0) return EmitTest(Op0, X86CC, DAG); @@ -5999,8 +6047,8 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, /// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node /// if it's possible. -static SDValue LowerToBT(SDValue And, ISD::CondCode CC, - DebugLoc dl, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, + DebugLoc dl, SelectionDAG &DAG) const { SDValue Op0 = And.getOperand(0); SDValue Op1 = And.getOperand(1); if (Op0.getOpcode() == ISD::TRUNCATE) @@ -6031,11 +6079,13 @@ static SDValue LowerToBT(SDValue And, ISD::CondCode CC, } if (LHS.getNode()) { - // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT + // If LHS is i8, promote it to i32 with any_extend. There is no i8 BT // instruction. Since the shift amount is in-range-or-undefined, we know - // that doing a bittest on the i16 value is ok. We extend to i32 because + // that doing a bittest on the i32 value is ok. We extend to i32 because // the encoding for the i16 version is larger than the i32 version. - if (LHS.getValueType() == MVT::i8) + // Also promote i16 to i32 for performance / code size reason. + if (LHS.getValueType() == MVT::i8 || + LHS.getValueType() == MVT::i16) LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); // If the operand types disagree, extend the shift amount to match. Since @@ -6052,7 +6102,7 @@ static SDValue LowerToBT(SDValue And, ISD::CondCode CC, return SDValue(); } -SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -6088,7 +6138,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); } - bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); + bool isFP = Op1.getValueType().isFloatingPoint(); unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); if (X86CC == X86::COND_INVALID) return SDValue(); @@ -6106,7 +6156,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(X86CC, MVT::i8), Cond); } -SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue Cond; SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -6242,7 +6292,7 @@ static bool isX86LogicalCmp(SDValue Op) { return false; } -SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { bool addTest = true; SDValue Cond = Op.getOperand(0); DebugLoc dl = Op.getDebugLoc(); @@ -6362,7 +6412,7 @@ static bool isXor1OfSetCC(SDValue Op) { return false; } -SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { bool addTest = true; SDValue Chain = Op.getOperand(0); SDValue Cond = Op.getOperand(1); @@ -6514,7 +6564,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { // correct sequence. SDValue X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { assert(Subtarget->isTargetCygMing() && "This should be used only on Cygwin/Mingw targets"); DebugLoc dl = Op.getDebugLoc(); @@ -6550,7 +6600,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, SDValue Size, unsigned Align, bool isVolatile, const Value *DstSV, - uint64_t DstSVOff) { + uint64_t DstSVOff) const { ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); // If not DWORD aligned or size is more than the threshold, call the library. @@ -6689,8 +6739,10 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff) { + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const { // This requires the copy size to be a constant, preferrably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -6720,7 +6772,7 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, Count, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : - X86::EDI, + X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : @@ -6756,14 +6808,18 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, &Results[0], Results.size()); } -SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); DebugLoc dl = Op.getDebugLoc(); if (!Subtarget->is64Bit()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. - SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), + getPointerTy()); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, false, false, 0); } @@ -6777,7 +6833,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { SDValue FIN = Op.getOperand(1); // Store gp_offset SDValue Store = DAG.getStore(Op.getOperand(0), dl, - DAG.getConstant(VarArgsGPOffset, MVT::i32), + DAG.getConstant(FuncInfo->getVarArgsGPOffset(), + MVT::i32), FIN, SV, 0, false, false, 0); MemOps.push_back(Store); @@ -6785,14 +6842,16 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); Store = DAG.getStore(Op.getOperand(0), dl, - DAG.getConstant(VarArgsFPOffset, MVT::i32), + DAG.getConstant(FuncInfo->getVarArgsFPOffset(), + MVT::i32), FIN, SV, 0, false, false, 0); MemOps.push_back(Store); // Store ptr to overflow_arg_area FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); - SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); + SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), + getPointerTy()); Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0, false, false, 0); MemOps.push_back(Store); @@ -6800,7 +6859,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { // Store ptr to reg_save_area. FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getIntPtrConstant(8)); - SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); + SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), + getPointerTy()); Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0, false, false, 0); MemOps.push_back(Store); @@ -6808,18 +6868,18 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { &MemOps[0], MemOps.size()); } -SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { // X86-64 va_list is a struct { i32, i32, i8*, i8* }. assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!"); SDValue Chain = Op.getOperand(0); SDValue SrcPtr = Op.getOperand(1); SDValue SrcSV = Op.getOperand(2); - llvm_report_error("VAArgInst is not yet implemented for x86-64!"); + report_fatal_error("VAArgInst is not yet implemented for x86-64!"); return SDValue(); } -SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { // X86-64 va_list is a struct { i32, i32, i8*, i8* }. assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!"); SDValue Chain = Op.getOperand(0); @@ -6835,7 +6895,7 @@ SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) { } SDValue -X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { +X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); switch (IntNo) { @@ -7076,7 +7136,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { } } -SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); @@ -7097,7 +7158,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { RetAddrFI, NULL, 0, false, false, 0); } -SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); @@ -7112,12 +7173,11 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { return DAG.getIntPtrConstant(2*TD->getPointerSize()); } -SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) -{ +SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); SDValue Chain = Op.getOperand(0); SDValue Offset = Op.getOperand(1); @@ -7141,7 +7201,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) } SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { SDValue Root = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function @@ -7232,7 +7292,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32; if (InRegCount > 2) { - llvm_report_error("Nest register in use - reduce number of inreg parameters!"); + report_fatal_error("Nest register in use - reduce number of inreg parameters!"); } } break; @@ -7281,7 +7341,8 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, } } -SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, + SelectionDAG &DAG) const { /* The rounding mode is in bits 11:10 of FPSR, and has the following settings: @@ -7343,7 +7404,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); } -SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); EVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); @@ -7377,7 +7438,7 @@ SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) { return Op; } -SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); EVT OpVT = VT; unsigned NumBits = VT.getSizeInBits(); @@ -7407,7 +7468,7 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) { return Op; } -SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply"); DebugLoc dl = Op.getDebugLoc(); @@ -7452,7 +7513,7 @@ SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) { } -SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { // Lower the "add/sub/mul with overflow" instruction into a regular ins plus // a "setcc" instruction that checks the overflow flag. The "brcond" lowering // looks for this combo and may remove the "setcc" instruction if the "setcc" @@ -7520,7 +7581,7 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) { return Sum; } -SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const { EVT T = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); unsigned Reg = 0; @@ -7551,7 +7612,7 @@ SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) { } SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { assert(Subtarget->is64Bit() && "Result not type legalized?"); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); SDValue TheChain = Op.getOperand(0); @@ -7569,7 +7630,7 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op, return DAG.getMergeValues(Ops, 2, dl); } -SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); EVT T = Node->getValueType(0); @@ -7585,7 +7646,7 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { /// LowerOperation - Provide custom lowering hooks for some operations. /// -SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Should not custom lower this!"); case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); @@ -7643,7 +7704,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { void X86TargetLowering:: ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG, unsigned NewOp) { + SelectionDAG &DAG, unsigned NewOp) const { EVT T = Node->getValueType(0); DebugLoc dl = Node->getDebugLoc(); assert (T == MVT::i64 && "Only know how to expand i64 atomics"); @@ -7668,7 +7729,7 @@ ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results, /// with a new node built out of custom code. void X86TargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { DebugLoc dl = N->getDebugLoc(); switch (N->getOpcode()) { default: @@ -7941,9 +8002,9 @@ bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const { bool X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const { - // Only do shuffles on 128-bit vector types for now. + // Very little shuffling can be done for 64-bit vectors right now. if (VT.getSizeInBits() == 64) - return false; + return isPALIGNRMask(M, VT, Subtarget->hasSSSE3()); // FIXME: pshufb, blends, shifts. return (VT.getVectorNumElements() == 2 || @@ -8448,8 +8509,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( MachineBasicBlock * X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -8478,12 +8538,9 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, F->insert(It, sinkMBB); // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - // Also inform sdisel of the edge changes. for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) { - EM->insert(std::make_pair(*I, sinkMBB)); + E = BB->succ_end(); I != E; ++I) sinkMBB->addSuccessor(*I); - } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. while (!BB->succ_empty()) @@ -8495,27 +8552,22 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); + copy0MBB->addSuccessor(sinkMBB); // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... - BB = sinkMBB; - BuildMI(BB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg()) + BuildMI(sinkMBB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. - return BB; + return sinkMBB; } MachineBasicBlock * X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); MachineFunction *F = BB->getParent(); @@ -8538,12 +8590,11 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI, MachineBasicBlock * X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { switch (MI->getOpcode()) { default: assert(false && "Unexpected instr type to insert"); case X86::MINGW_ALLOCA: - return EmitLoweredMingwAlloca(MI, BB, EM); + return EmitLoweredMingwAlloca(MI, BB); case X86::CMOV_GR8: case X86::CMOV_V1I64: case X86::CMOV_FR32: @@ -8556,7 +8607,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::CMOV_RFP32: case X86::CMOV_RFP64: case X86::CMOV_RFP80: - return EmitLoweredSelect(MI, BB, EM); + return EmitLoweredSelect(MI, BB); case X86::FP32_TO_INT16_IN_MEM: case X86::FP32_TO_INT32_IN_MEM: @@ -8638,21 +8689,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. return BB; } - // DBG_VALUE. Only the frame index case is done here. - case X86::DBG_VALUE: { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - X86AddressMode AM; - MachineFunction *F = BB->getParent(); - AM.BaseType = X86AddressMode::FrameIndexBase; - AM.Base.FrameIndex = MI->getOperand(0).getImm(); - addFullAddress(BuildMI(BB, DL, TII->get(X86::DBG_VALUE)), AM). - addImm(MI->getOperand(1).getImm()). - addMetadata(MI->getOperand(2).getMetadata()); - F->DeleteMachineInstr(MI); // Remove pseudo. - return BB; - } - // String/text processing lowering. case X86::PCMPISTRM128REG: return EmitPCMP(MI, BB, 3, false /* in-mem */); @@ -8874,7 +8910,8 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the /// node is a GlobalAddress + offset. bool X86TargetLowering::isGAPlusOffset(SDNode *N, - GlobalValue* &GA, int64_t &Offset) const{ + const GlobalValue* &GA, + int64_t &Offset) const { if (N->getOpcode() == X86ISD::Wrapper) { if (isa<GlobalAddressSDNode>(N->getOperand(0))) { GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); @@ -9558,9 +9595,13 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, } static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + EVT VT = N->getValueType(0); - if (VT != MVT::i64 || !Subtarget->is64Bit()) + if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64) return SDValue(); // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) @@ -9570,6 +9611,8 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, std::swap(N0, N1); if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) return SDValue(); + if (!N0.hasOneUse() || !N1.hasOneUse()) + return SDValue(); SDValue ShAmt0 = N0.getOperand(1); if (ShAmt0.getValueType() != MVT::i8) @@ -9592,10 +9635,11 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, std::swap(ShAmt0, ShAmt1); } + unsigned Bits = VT.getSizeInBits(); if (ShAmt1.getOpcode() == ISD::SUB) { SDValue Sum = ShAmt1.getOperand(0); if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) { - if (SumC->getSExtValue() == 64 && + if (SumC->getSExtValue() == Bits && ShAmt1.getOperand(1) == ShAmt0) return DAG.getNode(Opc, DL, VT, Op0, Op1, @@ -9605,7 +9649,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) { ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0); if (ShAmt0C && - ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == 64) + ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == Bits) return DAG.getNode(Opc, DL, VT, N0.getOperand(0), N1.getOperand(0), DAG.getNode(ISD::TRUNCATE, DL, @@ -9769,8 +9813,9 @@ static SDValue PerformBTCombine(SDNode *N, unsigned BitWidth = Op1.getValueSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth)); APInt KnownZero, KnownOne; - TargetLowering::TargetLoweringOpt TLO(DAG); - TargetLowering &TLI = DAG.getTargetLoweringInfo(); + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) || TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO)) DCI.CommitTargetLoweringOpt(TLO); @@ -9883,7 +9928,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SHL: case ISD::SRA: case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); - case ISD::OR: return PerformOrCombine(N, DAG, Subtarget); + case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); case X86ISD::FXOR: case X86ISD::FOR: return PerformFORCombine(N, DAG); @@ -9897,6 +9942,111 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); } +/// isTypeDesirableForOp - Return true if the target has native support for +/// the specified value type and it is 'desirable' to use the type for the +/// given node type. e.g. On x86 i16 is legal, but undesirable since i16 +/// instruction encodings are longer and some i16 instructions are slow. +bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { + if (!isTypeLegal(VT)) + return false; + if (VT != MVT::i16) + return true; + + switch (Opc) { + default: + return true; + case ISD::LOAD: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + case ISD::SHL: + case ISD::SRL: + case ISD::SUB: + case ISD::ADD: + case ISD::MUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + return false; + } +} + +static bool MayFoldLoad(SDValue Op) { + return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode()); +} + +static bool MayFoldIntoStore(SDValue Op) { + return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin()); +} + +/// IsDesirableToPromoteOp - This method query the target whether it is +/// beneficial for dag combiner to promote the specified node. If true, it +/// should return the desired promotion type by reference. +bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { + EVT VT = Op.getValueType(); + if (VT != MVT::i16) + return false; + + bool Promote = false; + bool Commute = false; + switch (Op.getOpcode()) { + default: break; + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Op); + // If the non-extending load has a single use and it's not live out, then it + // might be folded. + if (LD->getExtensionType() == ISD::NON_EXTLOAD /*&& + Op.hasOneUse()*/) { + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + UE = Op.getNode()->use_end(); UI != UE; ++UI) { + // The only case where we'd want to promote LOAD (rather then it being + // promoted as an operand is when it's only use is liveout. + if (UI->getOpcode() != ISD::CopyToReg) + return false; + } + } + Promote = true; + break; + } + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + Promote = true; + break; + case ISD::SHL: + case ISD::SRL: { + SDValue N0 = Op.getOperand(0); + // Look out for (store (shl (load), x)). + if (MayFoldLoad(N0) && MayFoldIntoStore(Op)) + return false; + Promote = true; + break; + } + case ISD::ADD: + case ISD::MUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + Commute = true; + // fallthrough + case ISD::SUB: { + SDValue N0 = Op.getOperand(0); + SDValue N1 = Op.getOperand(1); + if (!Commute && MayFoldLoad(N1)) + return false; + // Avoid disabling potential load folding opportunities. + if (MayFoldLoad(N0) && (!isa<ConstantSDNode>(N1) || MayFoldIntoStore(Op))) + return false; + if (MayFoldLoad(N1) && (!isa<ConstantSDNode>(N0) || MayFoldIntoStore(Op))) + return false; + Promote = true; + } + } + + PVT = MVT::i32; + return Promote; +} + //===----------------------------------------------------------------------===// // X86 Inline Assembly Support //===----------------------------------------------------------------------===// @@ -10159,7 +10309,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, return; } - GlobalValue *GV = GA->getGlobal(); + const GlobalValue *GV = GA->getGlobal(); // If we require an extra load to get this address, as in PIC mode, we // can't accept it. if (isGlobalStubReference(Subtarget->ClassifyGlobalReference(GV, diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 1026480ef0a61..440601f982766 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -374,12 +374,6 @@ namespace llvm { //===--------------------------------------------------------------------===// // X86TargetLowering - X86 Implementation of the TargetLowering interface class X86TargetLowering : public TargetLowering { - int VarArgsFrameIndex; // FrameIndex for start of varargs area. - int RegSaveFrameIndex; // X86-64 vararg func register save area. - unsigned VarArgsGPOffset; // X86-64 vararg func int reg offset. - unsigned VarArgsFPOffset; // X86-64 vararg func fp reg offset. - int BytesToPopOnReturn; // Number of arg bytes ret should pop. - public: explicit X86TargetLowering(X86TargetMachine &TM); @@ -401,11 +395,6 @@ namespace llvm { getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const; - // Return the number of bytes that a function should pop when it returns (in - // addition to the space used by the return address). - // - unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; } - /// getStackPtrReg - Return the stack pointer register we are using: either /// ESP or RSP. unsigned getStackPtrReg() const { return X86StackPtr; } @@ -424,12 +413,14 @@ namespace llvm { /// probably because the source does not need to be loaded. If /// 'NonScalarIntSafe' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded - /// from memory. It returns EVT::Other if SelectionDAG should be responsible - /// for determining it. + /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is + /// constant so it does not need to be loaded. + /// It returns EVT::Other if the type should be determined using generic + /// target-independent logic. virtual EVT - getOptimalMemOpType(uint64_t Size, - unsigned DstAlign, unsigned SrcAlign, - bool NonScalarIntSafe, SelectionDAG &DAG) const; + getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool NonScalarIntSafe, bool MemcpyStrSrc, + MachineFunction &MF) const; /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. @@ -439,20 +430,32 @@ namespace llvm { /// LowerOperation - Provide custom lowering hooks for some operations. /// - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. /// virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG); + SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + /// isTypeDesirableForOp - Return true if the target has native support for + /// the specified value type and it is 'desirable' to use the type for the + /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 + /// instruction encodings are longer and some i16 instructions are slow. + virtual bool isTypeDesirableForOp(unsigned Opc, EVT VT) const; + + /// isTypeDesirable - Return true if the target has native support for the + /// specified value type and it is 'desirable' to use the type. e.g. On x86 + /// i16 is legal, but undesirable since i16 instruction encodings are longer + /// and some i16 instructions are slow. + virtual bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const; + + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; /// getTargetNodeName - This method returns the name of a target specific @@ -473,9 +476,9 @@ namespace llvm { unsigned Depth = 0) const; virtual bool - isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) const; + isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const; - SDValue getReturnAddressFrameIndex(SelectionDAG &DAG); + SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; virtual bool ExpandInlineAsm(CallInst *CI) const; @@ -560,7 +563,7 @@ namespace llvm { return !X86ScalarSSEf64 || VT == MVT::f80; } - virtual const X86Subtarget* getSubtarget() { + virtual const X86Subtarget* getSubtarget() const { return Subtarget; } @@ -577,11 +580,12 @@ namespace llvm { createFastISel(MachineFunction &mf, DenseMap<const Value *, unsigned> &, DenseMap<const BasicBlock *, MachineBasicBlock *> &, - DenseMap<const AllocaInst *, int> & + DenseMap<const AllocaInst *, int> &, + std::vector<std::pair<MachineInstr*, unsigned> > & #ifndef NDEBUG - , SmallSet<Instruction*, 8> & + , SmallSet<const Instruction *, 8> & #endif - ); + ) const; /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; @@ -616,17 +620,17 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl<ISD::InputArg> &ArgInfo, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, MachineFrameInfo *MFI, - unsigned i); + unsigned i) const; SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, - ISD::ArgFlagsTy Flags); + ISD::ArgFlagsTy Flags) const; // Call lowering helpers. @@ -641,114 +645,120 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const; - bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv); + bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const; SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, bool Is64Bit, - int FPDiff, DebugLoc dl); + int FPDiff, DebugLoc dl) const; CCAssignFn *CCAssignFnForNode(CallingConv::ID CallConv) const; - unsigned GetAlignedArgumentStackSize(unsigned StackSize, SelectionDAG &DAG); + unsigned GetAlignedArgumentStackSize(unsigned StackSize, + SelectionDAG &DAG) const; std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, - bool isSigned); + bool isSigned) const; SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, - SelectionDAG &DAG); - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG); - SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG); - SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG); - SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG); - SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG); - SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG); - SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG); - SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG); - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG); + SelectionDAG &DAG) const; + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, int64_t Offset, SelectionDAG &DAG) const; - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG); - SDValue LowerShift(SDValue Op, SelectionDAG &DAG); + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, - SelectionDAG &DAG); - SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG); - SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG); - SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG); - SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG); - SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG); - SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG); - SDValue LowerFABS(SDValue Op, SelectionDAG &DAG); - SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG); - SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG); - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG); - SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG); - SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG); - SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG); - SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG); - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); - SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG); - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG); - SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG); - SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG); - SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG); - SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG); - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); - SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG); - SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG); - SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG); - SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG); - SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG); - SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG); - SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG); - SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG); + SelectionDAG &DAG) const; + SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerToBT(SDValue And, ISD::CondCode CC, + DebugLoc dl, SelectionDAG &DAG) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG); - SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG); - SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG); + SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; virtual bool CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<EVT> &OutTys, const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, - SelectionDAG &DAG); + SelectionDAG &DAG) const; void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG, unsigned NewOp); + SelectionDAG &DAG, unsigned NewOp) const; SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, - const Value *DstSV, uint64_t DstSVOff); + const Value *DstSV, + uint64_t DstSVOff) const; SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, - const Value *DstSV, uint64_t DstSVOff, - const Value *SrcSV, uint64_t SrcSVOff); + const Value *DstSV, + uint64_t DstSVOff, + const Value *SrcSV, + uint64_t SrcSVOff) const; /// Utility function to emit string processing sse4.2 instructions /// that return in xmm0. @@ -796,30 +806,29 @@ namespace llvm { MachineBasicBlock *BB) const; MachineBasicBlock *EmitLoweredSelect(MachineInstr *I, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + MachineBasicBlock *BB) const; MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + MachineBasicBlock *BB) const; /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent, for use with the given x86 condition code. - SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG); + SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const; /// Emit nodes that will be selected as "cmp Op0,Op1", or something /// equivalent, for use with the given x86 condition code. SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, - SelectionDAG &DAG); + SelectionDAG &DAG) const; }; namespace X86 { FastISel *createFastISel(MachineFunction &mf, DenseMap<const Value *, unsigned> &, DenseMap<const BasicBlock *, MachineBasicBlock *> &, - DenseMap<const AllocaInst *, int> & + DenseMap<const AllocaInst *, int> &, + std::vector<std::pair<MachineInstr*, unsigned> > & #ifndef NDEBUG - , SmallSet<Instruction*, 8> & + , SmallSet<const Instruction*, 8> & #endif ); } diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index eef2ca0789671..f5c3dbf2ad283 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -2086,6 +2086,11 @@ def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), x86_subreg_8bit_hi))>, Requires<[In64BitMode]>; +def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), + (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, + GR32_ABCD)), + x86_subreg_8bit_hi))>, + Requires<[In64BitMode]>; def : Pat<(srl GR16:$src, (i8 8)), (EXTRACT_SUBREG (MOVZX32_NOREXrr8 @@ -2156,21 +2161,6 @@ def : Pat<(sra GR64:$src1, (and CL, 63)), def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SAR64mCL addr:$dst)>; -// Double shift patterns -def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)), - (SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1), - GR64:$src2, (i8 imm)), addr:$dst), - (SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; - -def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)), - (SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1), - GR64:$src2, (i8 imm)), addr:$dst), - (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; - // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. let AddedComplexity = 5 in { // Try this before the selecting to OR def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2), @@ -2294,7 +2284,7 @@ def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "mov{d|q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))]>; -def MOV64toSDrm : RPDI<0x6E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), +def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>; @@ -2347,15 +2337,3 @@ let isTwoAddress = 1 in { } defm PINSRQ : SS41I_insert64<0x22, "pinsrq">; - -// -disable-16bit support. -def : Pat<(truncstorei16 (i16 imm:$src), addr:$dst), - (MOV16mi addr:$dst, imm:$src)>; -def : Pat<(truncstorei16 GR64:$src, addr:$dst), - (MOV16mr addr:$dst, (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>; -def : Pat<(i64 (sextloadi16 addr:$dst)), - (MOVSX64rm16 addr:$dst)>; -def : Pat<(i64 (zextloadi16 addr:$dst)), - (MOVZX64rm16 addr:$dst)>; -def : Pat<(i64 (extloadi16 addr:$dst)), - (MOVZX64rm16 addr:$dst)>; diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index c475b56d12f45..5a82a7b1979b8 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -49,7 +49,7 @@ struct X86AddressMode { unsigned Scale; unsigned IndexReg; int Disp; - GlobalValue *GV; + const GlobalValue *GV; unsigned GVOpFlags; X86AddressMode() diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index e6d1fee16f610..0aae4a8653b0e 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -327,8 +327,8 @@ def TST_F : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9; // Versions of FP instructions that take a single memory operand. Added for the // disassembler; remove as they are included with patterns elsewhere. -def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{l}\t$src">; -def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{l}\t$src">; +def FCOM32m : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">; +def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">; def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">; def FSTENVm : FPI<0xD9, MRM6m, (outs f32mem:$dst), (ins), "fnstenv\t$dst">; @@ -336,15 +336,15 @@ def FSTENVm : FPI<0xD9, MRM6m, (outs f32mem:$dst), (ins), "fnstenv\t$dst">; def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">; def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">; -def FCOM64m : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{ll}\t$src">; -def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{ll}\t$src">; +def FCOM64m : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{l}\t$src">; +def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">; def FRSTORm : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">; def FSAVEm : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fnsave\t$dst">; def FNSTSWm : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fnstsw\t$dst">; -def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{w}\t$src">; -def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{w}\t$src">; +def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">; +def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">; def FBLDm : FPI<0xDF, MRM4m, (outs), (ins f32mem:$src), "fbld\t$src">; def FBSTPm : FPI<0xDF, MRM6m, (outs f32mem:$dst), (ins), "fbstp\t$dst">; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index ccb7b055b0795..a21bfb9ea9d02 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -1684,6 +1685,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // Start from the bottom of the block and work up, examining the // terminator instructions. MachineBasicBlock::iterator I = MBB.end(); + MachineBasicBlock::iterator UnCondBrIter = MBB.end(); while (I != MBB.begin()) { --I; if (I->isDebugValue()) @@ -1701,6 +1703,8 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // Handle unconditional branches. if (I->getOpcode() == X86::JMP_4) { + UnCondBrIter = I; + if (!AllowModify) { TBB = I->getOperand(0).getMBB(); continue; @@ -1718,10 +1722,11 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, TBB = 0; I->eraseFromParent(); I = MBB.end(); + UnCondBrIter = MBB.end(); continue; } - // TBB is used to indicate the unconditinal destination. + // TBB is used to indicate the unconditional destination. TBB = I->getOperand(0).getMBB(); continue; } @@ -1733,6 +1738,45 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // Working from the bottom, handle the first conditional branch. if (Cond.empty()) { + MachineBasicBlock *TargetBB = I->getOperand(0).getMBB(); + if (AllowModify && UnCondBrIter != MBB.end() && + MBB.isLayoutSuccessor(TargetBB)) { + // If we can modify the code and it ends in something like: + // + // jCC L1 + // jmp L2 + // L1: + // ... + // L2: + // + // Then we can change this to: + // + // jnCC L2 + // L1: + // ... + // L2: + // + // Which is a bit more efficient. + // We conditionally jump to the fall-through block. + BranchCode = GetOppositeBranchCondition(BranchCode); + unsigned JNCC = GetCondBranchFromCond(BranchCode); + MachineBasicBlock::iterator OldInst = I; + + BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC)) + .addMBB(UnCondBrIter->getOperand(0).getMBB()); + BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4)) + .addMBB(TargetBB); + MBB.addSuccessor(TargetBB); + + OldInst->eraseFromParent(); + UnCondBrIter->eraseFromParent(); + + // Restart the analysis. + UnCondBrIter = MBB.end(); + I = MBB.end(); + continue; + } + FBB = TBB; TBB = I->getOperand(0).getMBB(); Cond.push_back(MachineOperand::CreateImm(BranchCode)); @@ -2276,6 +2320,19 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } +MachineInstr* +X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const { + X86AddressMode AM; + AM.BaseType = X86AddressMode::FrameIndexBase; + AM.Base.FrameIndex = FrameIx; + MachineInstrBuilder MIB = BuildMI(MF, DL, get(X86::DBG_VALUE)); + addFullAddress(MIB, AM).addImm(Offset).addMetadata(MDPtr); + return &*MIB; +} + static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, const SmallVectorImpl<MachineOperand> &MOs, MachineInstr *MI, @@ -2586,7 +2643,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Ty = Type::getDoubleTy(MF.getFunction()->getContext()); else Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); - Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? + const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? Constant::getAllOnesValue(Ty) : Constant::getNullValue(Ty); unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); @@ -3406,6 +3463,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, } case TargetOpcode::DBG_LABEL: case TargetOpcode::EH_LABEL: + case TargetOpcode::DBG_VALUE: break; case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: @@ -3603,7 +3661,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, std::string msg; raw_string_ostream Msg(msg); Msg << "Cannot determine size: " << MI; - llvm_report_error(Msg.str()); + report_fatal_error(Msg.str()); } @@ -3709,3 +3767,9 @@ void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const { assert(table && "Cannot change domain"); MI->setDesc(get(table[Domain-1])); } + +/// getNoopForMachoTarget - Return the noop instruction to use for a noop. +void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { + NopInst.setOpcode(X86::NOOP); +} + diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index f0bdd06cce882..df99c7fd63c4e 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -623,6 +623,12 @@ public: MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI) const; + virtual + MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const; + /// foldMemoryOperand - If this target supports it, fold a load or store of /// the specified stack slot into the specified machine instruction for the /// specified operand(s). If this is possible, the target should perform the @@ -687,6 +693,8 @@ public: int64_t Offset1, int64_t Offset2, unsigned NumLoads) const; + virtual void getNoopForMachoTarget(MCInst &NopInst) const; + virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 940b439d22a9b..a2754eac2154b 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -487,34 +487,6 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ return (~KnownZero0 & ~KnownZero1) == 0; }]>; -// 'shld' and 'shrd' instruction patterns. Note that even though these have -// the srl and shl in their patterns, the C++ code must still check for them, -// because predicates are tested before children nodes are explored. - -def shrd : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2), - (or (srl node:$src1, node:$amt1), - (shl node:$src2, node:$amt2)), [{ - assert(N->getOpcode() == ISD::OR); - return N->getOperand(0).getOpcode() == ISD::SRL && - N->getOperand(1).getOpcode() == ISD::SHL && - isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) && - isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) && - N->getOperand(0).getConstantOperandVal(1) == - N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1); -}]>; - -def shld : PatFrag<(ops node:$src1, node:$amt1, node:$src2, node:$amt2), - (or (shl node:$src1, node:$amt1), - (srl node:$src2, node:$amt2)), [{ - assert(N->getOpcode() == ISD::OR); - return N->getOperand(0).getOpcode() == ISD::SHL && - N->getOperand(1).getOpcode() == ISD::SRL && - isa<ConstantSDNode>(N->getOperand(0).getOperand(1)) && - isa<ConstantSDNode>(N->getOperand(1).getOperand(1)) && - N->getOperand(0).getConstantOperandVal(1) == - N->getValueSizeInBits(0) - N->getOperand(1).getConstantOperandVal(1); -}]>; - //===----------------------------------------------------------------------===// // Instruction list... // @@ -781,11 +753,11 @@ def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>; } let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in { -def PUSH32i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm), - "push{l}\t$imm", []>; -def PUSH32i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), +def PUSHi8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm), "push{l}\t$imm", []>; -def PUSH32i32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), +def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm), + "push{w}\t$imm", []>, OpSize; +def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm), "push{l}\t$imm", []>; } @@ -809,10 +781,11 @@ let isTwoAddress = 1 in // GR32 = bswap GR32 let Defs = [EFLAGS] in { def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsf{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>, TB; + [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>, TB, OpSize; def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsf{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>, TB; + [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>, TB, + OpSize; def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsf{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))]>, TB; @@ -822,10 +795,11 @@ def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), "bsr{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>, TB; + [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>, TB, OpSize; def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), "bsr{w}\t{$src, $dst|$dst, $src}", - [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>, TB; + [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>, TB, + OpSize; def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "bsr{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))]>, TB; @@ -4476,7 +4450,11 @@ def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>; // avoid partial-register updates. def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>; def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; -def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>; + +// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32. +def : Pat<(i32 (anyext GR16:$src)), + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>; + //===----------------------------------------------------------------------===// // Some peepholes @@ -4537,11 +4515,11 @@ def : Pat<(i8 (trunc GR16:$src)), // h-register tricks def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))), - (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), + (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), x86_subreg_8bit_hi)>, Requires<[In32BitMode]>; def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), - (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), x86_subreg_8bit_hi)>, Requires<[In32BitMode]>; def : Pat<(srl GR16:$src, (i8 8)), @@ -4566,6 +4544,11 @@ def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), GR32_ABCD)), x86_subreg_8bit_hi))>, Requires<[In32BitMode]>; +def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)), + (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, + GR32_ABCD)), + x86_subreg_8bit_hi))>, + Requires<[In32BitMode]>; // (shl x, 1) ==> (add x, x) def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; @@ -4612,111 +4595,13 @@ def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst), def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst), (SAR32mCL addr:$dst)>; -// (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c) -def : Pat<(or (srl GR32:$src1, CL:$amt), - (shl GR32:$src2, (sub 32, CL:$amt))), - (SHRD32rrCL GR32:$src1, GR32:$src2)>; - -def : Pat<(store (or (srl (loadi32 addr:$dst), CL:$amt), - (shl GR32:$src2, (sub 32, CL:$amt))), addr:$dst), - (SHRD32mrCL addr:$dst, GR32:$src2)>; - -def : Pat<(or (srl GR32:$src1, (i8 (trunc ECX:$amt))), - (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), - (SHRD32rrCL GR32:$src1, GR32:$src2)>; - -def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), - (shl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), - addr:$dst), - (SHRD32mrCL addr:$dst, GR32:$src2)>; - -def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)), - (SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1), - GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst), - (SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; - -// (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c) -def : Pat<(or (shl GR32:$src1, CL:$amt), - (srl GR32:$src2, (sub 32, CL:$amt))), - (SHLD32rrCL GR32:$src1, GR32:$src2)>; - -def : Pat<(store (or (shl (loadi32 addr:$dst), CL:$amt), - (srl GR32:$src2, (sub 32, CL:$amt))), addr:$dst), - (SHLD32mrCL addr:$dst, GR32:$src2)>; - -def : Pat<(or (shl GR32:$src1, (i8 (trunc ECX:$amt))), - (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), - (SHLD32rrCL GR32:$src1, GR32:$src2)>; - -def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))), - (srl GR32:$src2, (i8 (trunc (sub 32, ECX:$amt))))), - addr:$dst), - (SHLD32mrCL addr:$dst, GR32:$src2)>; - -def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)), - (SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1), - GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst), - (SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>; - -// (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c) -def : Pat<(or (srl GR16:$src1, CL:$amt), - (shl GR16:$src2, (sub 16, CL:$amt))), - (SHRD16rrCL GR16:$src1, GR16:$src2)>; - -def : Pat<(store (or (srl (loadi16 addr:$dst), CL:$amt), - (shl GR16:$src2, (sub 16, CL:$amt))), addr:$dst), - (SHRD16mrCL addr:$dst, GR16:$src2)>; - -def : Pat<(or (srl GR16:$src1, (i8 (trunc CX:$amt))), - (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), - (SHRD16rrCL GR16:$src1, GR16:$src2)>; - -def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), - (shl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), - addr:$dst), - (SHRD16mrCL addr:$dst, GR16:$src2)>; - -def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)), - (SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1), - GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst), - (SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; - -// (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c) -def : Pat<(or (shl GR16:$src1, CL:$amt), - (srl GR16:$src2, (sub 16, CL:$amt))), - (SHLD16rrCL GR16:$src1, GR16:$src2)>; - -def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt), - (srl GR16:$src2, (sub 16, CL:$amt))), addr:$dst), - (SHLD16mrCL addr:$dst, GR16:$src2)>; - -def : Pat<(or (shl GR16:$src1, (i8 (trunc CX:$amt))), - (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), - (SHLD16rrCL GR16:$src1, GR16:$src2)>; - -def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))), - (srl GR16:$src2, (i8 (trunc (sub 16, CX:$amt))))), - addr:$dst), - (SHLD16mrCL addr:$dst, GR16:$src2)>; - -def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)), - (SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>; - -def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1), - GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst), - (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>; - // (anyext (setcc_carry)) -> (setcc_carry) def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C16r)>; def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C32r)>; +def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C32r)>; // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. let AddedComplexity = 5 in { // Try this before the selecting to OR @@ -4907,18 +4792,6 @@ def : Pat<(and GR16:$src1, i16immSExt8:$src2), def : Pat<(and GR32:$src1, i32immSExt8:$src2), (AND32ri8 GR32:$src1, i32immSExt8:$src2)>; -// -disable-16bit support. -def : Pat<(truncstorei16 (i16 imm:$src), addr:$dst), - (MOV16mi addr:$dst, imm:$src)>; -def : Pat<(truncstorei16 GR32:$src, addr:$dst), - (MOV16mr addr:$dst, (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>; -def : Pat<(i32 (sextloadi16 addr:$dst)), - (MOVSX32rm16 addr:$dst)>; -def : Pat<(i32 (zextloadi16 addr:$dst)), - (MOVZX32rm16 addr:$dst)>; -def : Pat<(i32 (extloadi16 addr:$dst)), - (MOVZX32rm16 addr:$dst)>; - //===----------------------------------------------------------------------===// // Floating Point Stack Support //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 1c81c5e981ad4..744af50e3e454 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -117,7 +117,7 @@ def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src), "movd\t{$src, $dst|$dst, $src}", []>; -def MMX_MOVQ64gmr : MMXRI<0x7E, MRMDestMem, (outs), +def MMX_MOVQ64gmr : MMXRI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movq\t{$src, $dst|$dst, $src}", []>; @@ -167,6 +167,9 @@ let neverHasSideEffects = 1 in def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", []>; +def MMX_MOVFR642Qrr: SSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src), + "movdq2q\t{$src, $dst|$dst, $src}", []>; + def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movntq\t{$src, $dst|$dst, $src}", [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>; @@ -569,6 +572,14 @@ def : Pat<(f64 (bitconvert (v4i16 VR64:$src))), (MMX_MOVQ2FR64rr VR64:$src)>; def : Pat<(f64 (bitconvert (v8i8 VR64:$src))), (MMX_MOVQ2FR64rr VR64:$src)>; +def : Pat<(v1i64 (bitconvert (f64 FR64:$src))), + (MMX_MOVFR642Qrr FR64:$src)>; +def : Pat<(v2i32 (bitconvert (f64 FR64:$src))), + (MMX_MOVFR642Qrr FR64:$src)>; +def : Pat<(v4i16 (bitconvert (f64 FR64:$src))), + (MMX_MOVFR642Qrr FR64:$src)>; +def : Pat<(v8i8 (bitconvert (f64 FR64:$src))), + (MMX_MOVFR642Qrr FR64:$src)>; let AddedComplexity = 20 in { def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 11f7e27c4fc3a..212958025bde2 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2878,6 +2878,7 @@ let Constraints = "$src1 = $dst" in { } } +let ImmT = NoImm in { // None of these have i8 immediate fields. defm PHADDW : SS3I_binop_rm_int_16<0x01, "phaddw", int_x86_ssse3_phadd_w, int_x86_ssse3_phadd_w_128>; @@ -2902,6 +2903,7 @@ defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw", defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw", int_x86_ssse3_pmul_hr_sw, int_x86_ssse3_pmul_hr_sw_128, 1>; + defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb", int_x86_ssse3_pshuf_b, int_x86_ssse3_pshuf_b_128>; @@ -2914,7 +2916,9 @@ defm PSIGNW : SS3I_binop_rm_int_16<0x09, "psignw", defm PSIGND : SS3I_binop_rm_int_32<0x0A, "psignd", int_x86_ssse3_psign_d, int_x86_ssse3_psign_d_128>; +} +// palignr patterns. let Constraints = "$src1 = $dst" in { def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src1, VR64:$src2, i8imm:$src3), @@ -2935,26 +2939,29 @@ let Constraints = "$src1 = $dst" in { []>, OpSize; } -// palignr patterns. -def : Pat<(int_x86_ssse3_palign_r VR64:$src1, VR64:$src2, (i8 imm:$src3)), - (PALIGNR64rr VR64:$src1, VR64:$src2, (BYTE_imm imm:$src3))>, +let AddedComplexity = 5 in { + +def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)), + (PALIGNR64rr VR64:$src2, VR64:$src1, + (SHUFFLE_get_palign_imm VR64:$src3))>, Requires<[HasSSSE3]>; -def : Pat<(int_x86_ssse3_palign_r VR64:$src1, - (memop64 addr:$src2), - (i8 imm:$src3)), - (PALIGNR64rm VR64:$src1, addr:$src2, (BYTE_imm imm:$src3))>, +def : Pat<(v2i32 (palign:$src3 VR64:$src1, VR64:$src2)), + (PALIGNR64rr VR64:$src2, VR64:$src1, + (SHUFFLE_get_palign_imm VR64:$src3))>, Requires<[HasSSSE3]>; - -def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1, VR128:$src2, (i8 imm:$src3)), - (PALIGNR128rr VR128:$src1, VR128:$src2, (BYTE_imm imm:$src3))>, +def : Pat<(v2f32 (palign:$src3 VR64:$src1, VR64:$src2)), + (PALIGNR64rr VR64:$src2, VR64:$src1, + (SHUFFLE_get_palign_imm VR64:$src3))>, Requires<[HasSSSE3]>; -def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1, - (memopv2i64 addr:$src2), - (i8 imm:$src3)), - (PALIGNR128rm VR128:$src1, addr:$src2, (BYTE_imm imm:$src3))>, +def : Pat<(v4i16 (palign:$src3 VR64:$src1, VR64:$src2)), + (PALIGNR64rr VR64:$src2, VR64:$src1, + (SHUFFLE_get_palign_imm VR64:$src3))>, + Requires<[HasSSSE3]>; +def : Pat<(v8i8 (palign:$src3 VR64:$src1, VR64:$src2)), + (PALIGNR64rr VR64:$src2, VR64:$src1, + (SHUFFLE_get_palign_imm VR64:$src3))>, Requires<[HasSSSE3]>; -let AddedComplexity = 5 in { def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)), (PALIGNR128rr VR128:$src2, VR128:$src1, (SHUFFLE_get_palign_imm VR128:$src3))>, @@ -3510,7 +3517,7 @@ defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, 1>; defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", - int_x86_sse41_mpsadbw, 1>; + int_x86_sse41_mpsadbw, 0>; /// SS41I_ternary_int - SSE 4.1 ternary operator diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp index d257ee38c5c78..2b8720bac3438 100644 --- a/lib/Target/X86/X86MCAsmInfo.cpp +++ b/lib/Target/X86/X86MCAsmInfo.cpp @@ -14,6 +14,7 @@ #include "X86MCAsmInfo.h" #include "X86TargetMachine.h" #include "llvm/ADT/Triple.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -95,9 +96,10 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { Data64bitsDirective = 0; } -MCSection *X86ELFMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const { - return MCSectionELF::Create(".note.GNU-stack", MCSectionELF::SHT_PROGBITS, - 0, SectionKind::getMetadata(), false, Ctx); +const MCSection *X86ELFMCAsmInfo:: +getNonexecutableStackSection(MCContext &Ctx) const { + return Ctx.getELFSection(".note.GNU-stack", MCSectionELF::SHT_PROGBITS, + 0, SectionKind::getMetadata(), false); } X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) { diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h index 69716bfc07f60..581522567d094 100644 --- a/lib/Target/X86/X86MCAsmInfo.h +++ b/lib/Target/X86/X86MCAsmInfo.h @@ -27,7 +27,7 @@ namespace llvm { struct X86ELFMCAsmInfo : public MCAsmInfo { explicit X86ELFMCAsmInfo(const Triple &Triple); - virtual MCSection *getNonexecutableStackSection(MCContext &Ctx) const; + virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const; }; struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF { diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index 4b2529bccf01d..06043ecd3f30e 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -31,7 +31,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// stack frame in bytes. unsigned CalleeSavedFrameSize; - /// BytesToPopOnReturn - Number of bytes function pops on return. + /// BytesToPopOnReturn - Number of bytes function pops on return (in addition + /// to the space used by the return address). /// Used on windows platform for stdcall & fastcall name decoration unsigned BytesToPopOnReturn; @@ -52,6 +53,19 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// relocation models. unsigned GlobalBaseReg; + /// ReserveFP - whether the function should reserve the frame pointer + /// when allocating, even if there may not actually be a frame pointer used. + bool ReserveFP; + + /// VarArgsFrameIndex - FrameIndex for start of varargs area. + int VarArgsFrameIndex; + /// RegSaveFrameIndex - X86-64 vararg func register save area. + int RegSaveFrameIndex; + /// VarArgsGPOffset - X86-64 vararg func int reg offset. + unsigned VarArgsGPOffset; + /// VarArgsFPOffset - X86-64 vararg func fp reg offset. + unsigned VarArgsFPOffset; + public: X86MachineFunctionInfo() : ForceFramePointer(false), CalleeSavedFrameSize(0), @@ -59,7 +73,11 @@ public: ReturnAddrIndex(0), TailCallReturnAddrDelta(0), SRetReturnReg(0), - GlobalBaseReg(0) {} + GlobalBaseReg(0), + VarArgsFrameIndex(0), + RegSaveFrameIndex(0), + VarArgsGPOffset(0), + VarArgsFPOffset(0) {} explicit X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false), @@ -68,7 +86,12 @@ public: ReturnAddrIndex(0), TailCallReturnAddrDelta(0), SRetReturnReg(0), - GlobalBaseReg(0) {} + GlobalBaseReg(0), + ReserveFP(false), + VarArgsFrameIndex(0), + RegSaveFrameIndex(0), + VarArgsGPOffset(0), + VarArgsFPOffset(0) {} bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } @@ -90,6 +113,21 @@ public: unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } + + bool getReserveFP() const { return ReserveFP; } + void setReserveFP(bool reserveFP) { ReserveFP = reserveFP; } + + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; } + + int getRegSaveFrameIndex() const { return RegSaveFrameIndex; } + void setRegSaveFrameIndex(int Idx) { RegSaveFrameIndex = Idx; } + + unsigned getVarArgsGPOffset() const { return VarArgsGPOffset; } + void setVarArgsGPOffset(unsigned Offset) { VarArgsGPOffset = Offset; } + + unsigned getVarArgsFPOffset() const { return VarArgsFPOffset; } + void setVarArgsFPOffset(unsigned Offset) { VarArgsFPOffset = Offset; } }; } // End llvm namespace diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 32f28a5200742..a3e04b0986006 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -439,7 +439,7 @@ bool X86RegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineModuleInfo &MMI = MF.getMMI(); - return (NoFramePointerElim || + return (DisableFramePointerElim(MF) || needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || @@ -464,7 +464,7 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { // variable-sized allocas. // FIXME: Temporary disable the error - it seems to be too conservative. if (0 && requiresRealignment && MFI->hasVarSizedObjects()) - llvm_report_error( + report_fatal_error( "Stack realignment in presense of dynamic allocas is not supported"); return (requiresRealignment && !MFI->hasVarSizedObjects()); @@ -608,8 +608,12 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int FrameIndex = MI.getOperand(i).getIndex(); unsigned BasePtr; + unsigned Opc = MI.getOpcode(); + bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm; if (needsStackRealignment(MF)) BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); + else if (AfterFPPop) + BasePtr = StackPtr; else BasePtr = (hasFP(MF) ? FramePtr : StackPtr); @@ -618,16 +622,22 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(i).ChangeToRegister(BasePtr, false); // Now add the frame object offset to the offset from EBP. + int FIOffset; + if (AfterFPPop) { + // Tail call jmp happens after FP is popped. + const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo(); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + FIOffset = MFI->getObjectOffset(FrameIndex) - TFI.getOffsetOfLocalArea(); + } else + FIOffset = getFrameIndexOffset(MF, FrameIndex); + if (MI.getOperand(i+3).isImm()) { // Offset is a 32-bit integer. - int Offset = getFrameIndexOffset(MF, FrameIndex) + - (int)(MI.getOperand(i + 3).getImm()); - + int Offset = FIOffset + (int)(MI.getOperand(i + 3).getImm()); MI.getOperand(i + 3).ChangeToImmediate(Offset); } else { // Offset is symbolic. This is extremely rare. - uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) + - (uint64_t)MI.getOperand(i+3).getOffset(); + uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset(); MI.getOperand(i+3).setOffset(Offset); } return 0; @@ -1487,3 +1497,46 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) { } #include "X86GenRegisterInfo.inc" + +namespace { + struct MSAH : public MachineFunctionPass { + static char ID; + MSAH() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + const X86TargetMachine *TM = + static_cast<const X86TargetMachine *>(&MF.getTarget()); + const X86RegisterInfo *X86RI = TM->getRegisterInfo(); + MachineRegisterInfo &RI = MF.getRegInfo(); + X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); + unsigned StackAlignment = X86RI->getStackAlignment(); + + // Be over-conservative: scan over all vreg defs and find whether vector + // registers are used. If yes, there is a possibility that vector register + // will be spilled and thus require dynamic stack realignment. + for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister; + RegNum < RI.getLastVirtReg(); ++RegNum) + if (RI.getRegClass(RegNum)->getAlignment() > StackAlignment) { + FuncInfo->setReserveFP(true); + return true; + } + + // Nothing to do + return false; + } + + virtual const char *getPassName() const { + return "X86 Maximal Stack Alignment Check"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; + + char MSAH::ID = 0; +} + +FunctionPass* +llvm::createX86MaxStackAlignmentHeuristicPass() { return new MSAH(); } diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 76b8f7a953c9c..49a6ca0928145 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -352,11 +352,12 @@ def GR8 : RegisterClass<"X86", [i8], 8, const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP / EBP to being a frame ptr? if (!Subtarget.is64Bit()) // In 32-mode, none of the 8-bit registers aliases EBP or ESP. return begin() + 8; - else if (RI->hasFP(MF)) + else if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SPL or BPL. return array_endof(X86_GR8_AO_64) - 1; else @@ -396,9 +397,10 @@ def GR16 : RegisterClass<"X86", [i16], 16, const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (Subtarget.is64Bit()) { // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SP or BP. return array_endof(X86_GR16_AO_64) - 1; else @@ -406,7 +408,7 @@ def GR16 : RegisterClass<"X86", [i16], 16, return array_endof(X86_GR16_AO_64); } else { // Does the function dedicate EBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SP or BP. return begin() + 6; else @@ -447,9 +449,10 @@ def GR32 : RegisterClass<"X86", [i32], 32, const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (Subtarget.is64Bit()) { // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate ESP or EBP. return array_endof(X86_GR32_AO_64) - 1; else @@ -457,7 +460,7 @@ def GR32 : RegisterClass<"X86", [i32], 32, return array_endof(X86_GR32_AO_64); } else { // Does the function dedicate EBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate ESP or EBP. return begin() + 6; else @@ -484,9 +487,11 @@ def GR64 : RegisterClass<"X86", [i64], 64, const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (!Subtarget.is64Bit()) return begin(); // None of these are allocatable in 32-bit. - if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr? + // Does the function dedicate RBP to being a frame ptr? + if (RI->hasFP(MF) || MFI->getReserveFP()) return end()-3; // If so, don't allocate RIP, RSP or RBP else return end()-2; // If not, just don't allocate RIP or RSP @@ -589,8 +594,9 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16, GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP / EBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate SP or BP. return end() - 2; else @@ -611,8 +617,9 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32, GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP / EBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate ESP or EBP. return end() - 2; else @@ -633,8 +640,9 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64, GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate RIP, RSP or RBP. return end() - 3; else @@ -675,9 +683,10 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32, const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (Subtarget.is64Bit()) { // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate EBP. return array_endof(X86_GR32_NOSP_AO_64) - 1; else @@ -685,7 +694,7 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32, return array_endof(X86_GR32_NOSP_AO_64); } else { // Does the function dedicate EBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate EBP. return begin() + 6; else @@ -710,9 +719,11 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64, const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); if (!Subtarget.is64Bit()) return begin(); // None of these are allocatable in 32-bit. - if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr? + // Does the function dedicate RBP to being a frame ptr? + if (RI->hasFP(MF) || MFI->getReserveFP()) return end()-1; // If so, don't allocate RBP else return end(); // If not, any reg in this class is ok. @@ -733,8 +744,9 @@ def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64, { const TargetMachine &TM = MF.getTarget(); const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>(); // Does the function dedicate RBP to being a frame ptr? - if (RI->hasFP(MF)) + if (RI->hasFP(MF) || MFI->getReserveFP()) // If so, don't allocate RBP. return end() - 1; else diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp new file mode 100644 index 0000000000000..cd87b82d31a63 --- /dev/null +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the X86SelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "x86-selectiondag-info" +#include "X86SelectionDAGInfo.h" +using namespace llvm; + +X86SelectionDAGInfo::X86SelectionDAGInfo() { +} + +X86SelectionDAGInfo::~X86SelectionDAGInfo() { +} diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h new file mode 100644 index 0000000000000..983475461f5e3 --- /dev/null +++ b/lib/Target/X86/X86SelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- X86SelectionDAGInfo.h - X86 SelectionDAG Info -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the X86 subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef X86SELECTIONDAGINFO_H +#define X86SELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class X86SelectionDAGInfo : public TargetSelectionDAGInfo { +public: + X86SelectionDAGInfo(); + ~X86SelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 8a873f04df4ee..646af91517fe7 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -85,8 +85,7 @@ protected: bool IsUAMemFast; /// HasVectorUAMem - True if SIMD operations can have unaligned memory - /// operands. This may require setting a feature bit in the - /// processor. + /// operands. This may require setting a feature bit in the processor. bool HasVectorUAMem; /// DarwinVers - Nonzero if this is a darwin platform: the numeric diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index c608e56c8fbf2..f39904ef7eb70 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -17,12 +17,17 @@ #include "llvm/PassManager.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/CommandLine.h" + using namespace llvm; +static cl::opt<bool> DisableSSEDomain("disable-sse-domain", + cl::init(false), cl::Hidden, + cl::desc("Disable SSE Domain Fixing")); + static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { @@ -161,6 +166,7 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM, bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { + PM.add(createX86MaxStackAlignmentHeuristicPass()); return false; // -print-machineinstr shouldn't print after this. } @@ -172,7 +178,8 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { - if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) { + if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2() && + !DisableSSEDomain) { PM.add(createSSEDomainFixPass()); return true; } diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index ae7b5b29af145..dc4234c4a90a4 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -51,8 +51,8 @@ public: virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; } virtual X86JITInfo *getJITInfo() { return &JITInfo; } virtual const X86Subtarget *getSubtargetImpl() const{ return &Subtarget; } - virtual X86TargetLowering *getTargetLowering() const { - return const_cast<X86TargetLowering*>(&TLInfo); + virtual const X86TargetLowering *getTargetLowering() const { + return &TLInfo; } virtual const X86RegisterInfo *getRegisterInfo() const { return &InstrInfo.getRegisterInfo(); diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp index 5801b40b7e900..c100c590135eb 100644 --- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp @@ -123,7 +123,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { switch (GV->getLinkage()) { case GlobalValue::AppendingLinkage: - llvm_report_error("AppendingLinkage is not supported by this target!"); + report_fatal_error("AppendingLinkage is not supported by this target!"); case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: case GlobalValue::WeakAnyLinkage: @@ -148,7 +148,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { llvm_unreachable("Unknown linkage type!"); } - EmitAlignment(Align, GV, 2); + EmitAlignment(Align > 2 ? Align : 2, GV); unsigned Size = TD->getTypeAllocSize(C->getType()); if (GV->isThreadLocal()) { diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt index 0965323b998ad..1b8e7edfc7ca9 100644 --- a/lib/Target/XCore/CMakeLists.txt +++ b/lib/Target/XCore/CMakeLists.txt @@ -11,7 +11,6 @@ tablegen(XCoreGenCallingConv.inc -gen-callingconv) tablegen(XCoreGenSubtarget.inc -gen-subtarget) add_llvm_target(XCore - MCSectionXCore.cpp XCoreFrameInfo.cpp XCoreInstrInfo.cpp XCoreISelDAGToDAG.cpp @@ -21,4 +20,5 @@ add_llvm_target(XCore XCoreSubtarget.cpp XCoreTargetMachine.cpp XCoreTargetObjectFile.cpp + XCoreSelectionDAGInfo.cpp ) diff --git a/lib/Target/XCore/MCSectionXCore.cpp b/lib/Target/XCore/MCSectionXCore.cpp deleted file mode 100644 index 5acceafe9ea3b..0000000000000 --- a/lib/Target/XCore/MCSectionXCore.cpp +++ /dev/null @@ -1,35 +0,0 @@ -//===- MCSectionXCore.cpp - XCore-specific section representation ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the MCSectionXCore class. -// -//===----------------------------------------------------------------------===// - -#include "MCSectionXCore.h" -#include "llvm/MC/MCContext.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -MCSectionXCore * -MCSectionXCore::Create(const StringRef &Section, unsigned Type, - unsigned Flags, SectionKind K, - bool isExplicit, MCContext &Ctx) { - return new (Ctx) MCSectionXCore(Section, Type, Flags, K, isExplicit); -} - - -/// PrintTargetSpecificSectionFlags - This handles the XCore-specific cp/dp -/// section flags. -void MCSectionXCore::PrintTargetSpecificSectionFlags(const MCAsmInfo &MAI, - raw_ostream &OS) const { - if (getFlags() & MCSectionXCore::SHF_CP_SECTION) - OS << 'c'; - if (getFlags() & MCSectionXCore::SHF_DP_SECTION) - OS << 'd'; -} diff --git a/lib/Target/XCore/MCSectionXCore.h b/lib/Target/XCore/MCSectionXCore.h deleted file mode 100644 index 02f8f95572c8b..0000000000000 --- a/lib/Target/XCore/MCSectionXCore.h +++ /dev/null @@ -1,54 +0,0 @@ -//===- MCSectionXCore.h - XCore-specific section representation -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the MCSectionXCore class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_MCSECTION_XCORE_H -#define LLVM_MCSECTION_XCORE_H - -#include "llvm/MC/MCSectionELF.h" - -namespace llvm { - -class MCSectionXCore : public MCSectionELF { - MCSectionXCore(const StringRef &Section, unsigned Type, unsigned Flags, - SectionKind K, bool isExplicit) - : MCSectionELF(Section, Type, Flags, K, isExplicit) {} - -public: - - enum { - /// SHF_CP_SECTION - All sections with the "c" flag are grouped together - /// by the linker to form the constant pool and the cp register is set to - /// the start of the constant pool by the boot code. - SHF_CP_SECTION = FIRST_TARGET_DEP_FLAG, - - /// SHF_DP_SECTION - All sections with the "d" flag are grouped together - /// by the linker to form the data section and the dp register is set to - /// the start of the section by the boot code. - SHF_DP_SECTION = FIRST_TARGET_DEP_FLAG << 1 - }; - - static MCSectionXCore *Create(const StringRef &Section, unsigned Type, - unsigned Flags, SectionKind K, - bool isExplicit, MCContext &Ctx); - - - /// PrintTargetSpecificSectionFlags - This handles the XCore-specific cp/dp - /// section flags. - virtual void PrintTargetSpecificSectionFlags(const MCAsmInfo &MAI, - raw_ostream &OS) const; - -}; - -} // end namespace llvm - -#endif diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 1615547b4152f..5564ddf133eaf 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "XCore.h" -#include "XCoreISelLowering.h" #include "XCoreTargetMachine.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -40,7 +39,7 @@ using namespace llvm; /// namespace { class XCoreDAGToDAGISel : public SelectionDAGISel { - XCoreTargetLowering &Lowering; + const XCoreTargetLowering &Lowering; const XCoreSubtarget &Subtarget; public: diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 27e5233246643..3990b8b3c012d 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -158,7 +158,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) } SDValue XCoreTargetLowering:: -LowerOperation(SDValue Op, SelectionDAG &DAG) { +LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); @@ -187,7 +187,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) { /// type with new values built out of custom code. void XCoreTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { switch (N->getOpcode()) { default: llvm_unreachable("Don't know how to custom expand this!"); @@ -210,7 +210,7 @@ getFunctionAlignment(const Function *) const { //===----------------------------------------------------------------------===// SDValue XCoreTargetLowering:: -LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) +LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i32, Op.getOperand(2), @@ -220,7 +220,8 @@ LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -getGlobalAddressWrapper(SDValue GA, GlobalValue *GV, SelectionDAG &DAG) +getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV, + SelectionDAG &DAG) const { // FIXME there is no actual debug info here DebugLoc dl = GA.getDebugLoc(); @@ -241,9 +242,9 @@ getGlobalAddressWrapper(SDValue GA, GlobalValue *GV, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) +LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); // If it's a debug information descriptor, don't mess with it. if (DAG.isVerifiedDebugInfoDesc(Op)) @@ -262,12 +263,12 @@ static inline bool isZeroLengthArray(const Type *Ty) { } SDValue XCoreTargetLowering:: -LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) +LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME there isn't really debug info here DebugLoc dl = Op.getDebugLoc(); // transform to label + getid() * size - GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); if (!GVar) { @@ -296,18 +297,18 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -LowerBlockAddress(SDValue Op, SelectionDAG &DAG) +LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { DebugLoc DL = Op.getDebugLoc(); - BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true); return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result); } SDValue XCoreTargetLowering:: -LowerConstantPool(SDValue Op, SelectionDAG &DAG) +LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); // FIXME there isn't really debug info here @@ -329,7 +330,7 @@ unsigned XCoreTargetLowering::getJumpTableEncoding() const { } SDValue XCoreTargetLowering:: -LowerBR_JT(SDValue Op, SelectionDAG &DAG) +LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Table = Op.getOperand(1); @@ -391,7 +392,7 @@ IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase, } SDValue XCoreTargetLowering:: -LowerLOAD(SDValue Op, SelectionDAG &DAG) +LowerLOAD(SDValue Op, SelectionDAG &DAG) const { LoadSDNode *LD = cast<LoadSDNode>(Op); assert(LD->getExtensionType() == ISD::NON_EXTLOAD && @@ -494,7 +495,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -LowerSTORE(SDValue Op, SelectionDAG &DAG) +LowerSTORE(SDValue Op, SelectionDAG &DAG) const { StoreSDNode *ST = cast<StoreSDNode>(Op); assert(!ST->isTruncatingStore() && "Unexpected store type"); @@ -554,7 +555,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) +LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::SMUL_LOHI && "Unexpected operand to lower!"); @@ -571,7 +572,7 @@ LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) +LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::UMUL_LOHI && "Unexpected operand to lower!"); @@ -647,7 +648,7 @@ isADDADDMUL(SDValue Op, SDValue &Mul0, SDValue &Mul1, SDValue &Addend0, } SDValue XCoreTargetLowering:: -TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG) +TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG) const { SDValue Mul; SDValue Other; @@ -707,7 +708,7 @@ TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -ExpandADDSUB(SDNode *N, SelectionDAG &DAG) +ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const { assert(N->getValueType(0) == MVT::i64 && (N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && @@ -747,7 +748,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -LowerVAARG(SDValue Op, SelectionDAG &DAG) +LowerVAARG(SDValue Op, SelectionDAG &DAG) const { llvm_unreachable("unimplemented"); // FIX Arguments passed by reference need a extra dereference. @@ -769,7 +770,7 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: -LowerVASTART(SDValue Op, SelectionDAG &DAG) +LowerVASTART(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // vastart stores the address of the VarArgsFrameIndex slot into the @@ -782,7 +783,8 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG) false, false, 0); } -SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { +SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); // Depths > 0 not supported yet! if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0) @@ -812,7 +814,7 @@ XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // XCore target does not yet support tail call optimization. isTailCall = false; @@ -839,7 +841,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; @@ -962,7 +964,7 @@ XCoreTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; @@ -994,7 +996,8 @@ XCoreTargetLowering::LowerFormalArguments(SDValue Chain, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) + const { switch (CallConv) { default: @@ -1018,7 +1021,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) { + SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &RegInfo = MF.getRegInfo(); @@ -1132,7 +1135,7 @@ bool XCoreTargetLowering:: CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<EVT> &OutTys, const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, - SelectionDAG &DAG) { + SelectionDAG &DAG) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, *DAG.getContext()); @@ -1143,7 +1146,7 @@ SDValue XCoreTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG) { + DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of // the return value to a location @@ -1194,8 +1197,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, MachineBasicBlock * XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { + MachineBasicBlock *BB) const { const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); assert((MI->getOpcode() == XCore::SELECT_CC) && @@ -1225,12 +1227,9 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, sinkMBB); // Update machine-CFG edges by first adding all successors of the current // block to the new block which will contain the Phi node for the select. - // Also inform sdisel of the edge changes. for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) { - EM->insert(std::make_pair(*I, sinkMBB)); + E = BB->succ_end(); I != E; ++I) sinkMBB->addSuccessor(*I); - } // Next, remove all successors of the current block, and add the true // and fallthrough blocks as its successors. while (!BB->succ_empty()) diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 3ccdeec141bbc..d8d2a3aa73156 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -83,21 +83,21 @@ namespace llvm { virtual unsigned getJumpTableEncoding() const; /// LowerOperation - Provide custom lowering hooks for some operations. - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. /// virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG); + SelectionDAG &DAG) const; /// getTargetNodeName - This method returns the name of a target specific // DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; - virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB, - DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const; + virtual MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const; virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const; @@ -115,37 +115,37 @@ namespace llvm { bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); - SDValue getReturnAddressFrameIndex(SelectionDAG &DAG); - SDValue getGlobalAddressWrapper(SDValue GA, GlobalValue *GV, - SelectionDAG &DAG); + SmallVectorImpl<SDValue> &InVals) const; + SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; + SDValue getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV, + SelectionDAG &DAG) const; // Lower Operand specifics - SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG); - SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG); - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); - SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG); - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG); - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG); - SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG); - SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG); - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; // Inline asm support std::vector<unsigned> @@ -153,8 +153,8 @@ namespace llvm { EVT VT) const; // Expand specifics - SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG); - SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG); + SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const; + SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; @@ -171,7 +171,7 @@ namespace llvm { bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerCall(SDValue Chain, SDValue Callee, @@ -180,19 +180,19 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals); + SmallVectorImpl<SDValue> &InVals) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - DebugLoc dl, SelectionDAG &DAG); + DebugLoc dl, SelectionDAG &DAG) const; virtual bool CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<EVT> &OutTys, const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, - SelectionDAG &DAG); + SelectionDAG &DAG) const; }; } diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index ab71d05354448..0cfb358617e82 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -113,7 +113,7 @@ XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { } bool XCoreRegisterInfo::hasFP(const MachineFunction &MF) const { - return NoFramePointerElim || MF.getFrameInfo()->hasVarSizedObjects(); + return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects(); } // This function eliminates ADJCALLSTACKDOWN, @@ -225,12 +225,9 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned FramePtr = XCore::R10; if (!isUs) { - if (!RS) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "eliminateFrameIndex Frame size too big: " << Offset; - llvm_report_error(Msg.str()); - } + if (!RS) + report_fatal_error("eliminateFrameIndex Frame size too big: " + + Twine(Offset)); unsigned ScratchReg = RS->scavengeRegister(XCore::GRRegsRegisterClass, II, SPAdj); loadConstant(MBB, II, ScratchReg, Offset, dl); @@ -278,12 +275,9 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } } else { bool isU6 = isImmU6(Offset); - if (!isU6 && !isImmU16(Offset)) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "eliminateFrameIndex Frame size too big: " << Offset; - llvm_report_error(Msg.str()); - } + if (!isU6 && !isImmU16(Offset)) + report_fatal_error("eliminateFrameIndex Frame size too big: " + + Twine(Offset)); switch (MI.getOpcode()) { int NewOpcode; @@ -360,10 +354,7 @@ loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // TODO use mkmsk if possible. if (!isImmU16(Value)) { // TODO use constant pool. - std::string msg; - raw_string_ostream Msg(msg); - Msg << "loadConstant value too big " << Value; - llvm_report_error(Msg.str()); + report_fatal_error("loadConstant value too big " + Twine(Value)); } int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6; BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value); @@ -375,12 +366,8 @@ storeToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, assert(Offset%4 == 0 && "Misaligned stack offset"); Offset/=4; bool isU6 = isImmU6(Offset); - if (!isU6 && !isImmU16(Offset)) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "storeToStack offset too big " << Offset; - llvm_report_error(Msg.str()); - } + if (!isU6 && !isImmU16(Offset)) + report_fatal_error("storeToStack offset too big " + Twine(Offset)); int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6; BuildMI(MBB, I, dl, TII.get(Opcode)) .addReg(SrcReg) @@ -393,12 +380,8 @@ loadFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, assert(Offset%4 == 0 && "Misaligned stack offset"); Offset/=4; bool isU6 = isImmU6(Offset); - if (!isU6 && !isImmU16(Offset)) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "loadFromStack offset too big " << Offset; - llvm_report_error(Msg.str()); - } + if (!isU6 && !isImmU16(Offset)) + report_fatal_error("loadFromStack offset too big " + Twine(Offset)); int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6; BuildMI(MBB, I, dl, TII.get(Opcode), DstReg) .addImm(Offset); @@ -425,10 +408,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const { if (!isU6 && !isImmU16(FrameSize)) { // FIXME could emit multiple instructions. - std::string msg; - raw_string_ostream Msg(msg); - Msg << "emitPrologue Frame size too big: " << FrameSize; - llvm_report_error(Msg.str()); + report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize)); } bool emitFrameMoves = needsFrameMoves(MF); @@ -549,10 +529,7 @@ void XCoreRegisterInfo::emitEpilogue(MachineFunction &MF, if (!isU6 && !isImmU16(FrameSize)) { // FIXME could emit multiple instructions. - std::string msg; - raw_string_ostream Msg(msg); - Msg << "emitEpilogue Frame size too big: " << FrameSize; - llvm_report_error(Msg.str()); + report_fatal_error("emitEpilogue Frame size too big: " + Twine(FrameSize)); } if (FrameSize) { diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp new file mode 100644 index 0000000000000..6aac237531016 --- /dev/null +++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp @@ -0,0 +1,22 @@ +//===-- XCoreSelectionDAGInfo.cpp - XCore SelectionDAG Info ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the XCoreSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "xcore-selectiondag-info" +#include "XCoreSelectionDAGInfo.h" +using namespace llvm; + +XCoreSelectionDAGInfo::XCoreSelectionDAGInfo() { +} + +XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() { +} diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h new file mode 100644 index 0000000000000..fd9671681fc6d --- /dev/null +++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h @@ -0,0 +1,29 @@ +//===-- XCoreSelectionDAGInfo.h - XCore SelectionDAG Info -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the XCore subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef XCORESELECTIONDAGINFO_H +#define XCORESELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + XCoreSelectionDAGInfo(); + ~XCoreSelectionDAGInfo(); +}; + +} + +#endif diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index b0b1464dbe0c8..701a6f1dfafcf 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -36,8 +36,8 @@ public: virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; } virtual const XCoreFrameInfo *getFrameInfo() const { return &FrameInfo; } virtual const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual XCoreTargetLowering *getTargetLowering() const { - return const_cast<XCoreTargetLowering*>(&TLInfo); + virtual const XCoreTargetLowering *getTargetLowering() const { + return &TLInfo; } virtual const TargetRegisterInfo *getRegisterInfo() const { diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp index 7de3b55d38f64..cdf5a5371e22f 100644 --- a/lib/Target/XCore/XCoreTargetObjectFile.cpp +++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp @@ -9,7 +9,8 @@ #include "XCoreTargetObjectFile.h" #include "XCoreSubtarget.h" -#include "MCSectionXCore.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -18,34 +19,31 @@ void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ TargetLoweringObjectFileELF::Initialize(Ctx, TM); DataSection = - MCSectionXCore::Create(".dp.data", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE | - MCSectionXCore::SHF_DP_SECTION, - SectionKind::getDataRel(), false, getContext()); + Ctx.getELFSection(".dp.data", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE | + MCSectionELF::XCORE_SHF_DP_SECTION, + SectionKind::getDataRel(), false); BSSSection = - MCSectionXCore::Create(".dp.bss", MCSectionELF::SHT_NOBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE | - MCSectionXCore::SHF_DP_SECTION, - SectionKind::getBSS(), false, getContext()); + Ctx.getELFSection(".dp.bss", MCSectionELF::SHT_NOBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_WRITE | + MCSectionELF::XCORE_SHF_DP_SECTION, + SectionKind::getBSS(), false); MergeableConst4Section = - MCSectionXCore::Create(".cp.rodata.cst4", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE | - MCSectionXCore::SHF_CP_SECTION, - SectionKind::getMergeableConst4(), false, - getContext()); + Ctx.getELFSection(".cp.rodata.cst4", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE | + MCSectionELF::XCORE_SHF_CP_SECTION, + SectionKind::getMergeableConst4(), false); MergeableConst8Section = - MCSectionXCore::Create(".cp.rodata.cst8", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE | - MCSectionXCore::SHF_CP_SECTION, - SectionKind::getMergeableConst8(), false, - getContext()); + Ctx.getELFSection(".cp.rodata.cst8", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE | + MCSectionELF::XCORE_SHF_CP_SECTION, + SectionKind::getMergeableConst8(), false); MergeableConst16Section = - MCSectionXCore::Create(".cp.rodata.cst16", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE | - MCSectionXCore::SHF_CP_SECTION, - SectionKind::getMergeableConst16(), false, - getContext()); + Ctx.getELFSection(".cp.rodata.cst16", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_MERGE | + MCSectionELF::XCORE_SHF_CP_SECTION, + SectionKind::getMergeableConst16(), false); // TLS globals are lowered in the backend to arrays indexed by the current // thread id. After lowering they require no special handling by the linker @@ -54,11 +52,10 @@ void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ TLSBSSSection = BSSSection; ReadOnlySection = - MCSectionXCore::Create(".cp.rodata", MCSectionELF::SHT_PROGBITS, - MCSectionELF::SHF_ALLOC | - MCSectionXCore::SHF_CP_SECTION, - SectionKind::getReadOnlyWithRel(), false, - getContext()); + Ctx.getELFSection(".cp.rodata", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_ALLOC | + MCSectionELF::XCORE_SHF_CP_SECTION, + SectionKind::getReadOnlyWithRel(), false); // Dynamic linking is not supported. Data with relocations is placed in the // same section as data without relocations. |
