diff options
Diffstat (limited to 'lib/Target/ARM')
43 files changed, 1544 insertions, 831 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 487ce1dd434b6..76cc06e963859 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -105,6 +105,7 @@ FunctionPass *createARMObjectCodeEmitterPass(ARMBaseTargetMachine &TM, FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMConstantIslandPass(); FunctionPass *createNEONPreAllocPass(); +FunctionPass *createNEONMoveFixPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createThumb2SizeReductionPass(); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 8851fbbf24815..cb9bd6a0948eb 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -125,12 +125,16 @@ def ARMInstrInfo : InstrInfo { "SizeFlag", "IndexModeBits", "Form", - "isUnaryDataProc"]; + "isUnaryDataProc", + "canXformTo16Bit", + "Dom"]; let TSFlagsShifts = [0, 4, 7, 9, - 15]; + 15, + 16, + 17]; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index ecdf5a0be6436..7c5b0f0bd83da 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -16,6 +16,7 @@ #include "ARMAddressingModes.h" #include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" +#include "ARMRegisterInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -25,6 +26,7 @@ #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -32,8 +34,9 @@ static cl::opt<bool> EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv")); -ARMBaseInstrInfo::ARMBaseInstrInfo() - : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)) { +ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) + : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)), + Subtarget(STI) { } MachineInstr * @@ -249,7 +252,8 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, // ...likewise if it ends with a branch table followed by an unconditional // branch. The branch folder can create these, and we must get rid of them for // correctness of Thumb constant islands. - if (isJumpTableBranchOpcode(SecondLastOpc) && + if ((isJumpTableBranchOpcode(SecondLastOpc) || + isIndirectBranchOpcode(SecondLastOpc)) && isUncondBranchOpcode(LastOpc)) { I = LastInst; if (AllowModify) @@ -444,7 +448,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case ARM::Int_eh_sjlj_setjmp: return 24; case ARM::t2Int_eh_sjlj_setjmp: - return 20; + return 22; case ARM::BR_JTr: case ARM::BR_JTm: case ARM::BR_JTadd: @@ -503,7 +507,7 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, case ARM::FCPYS: case ARM::FCPYD: case ARM::VMOVD: - case ARM::VMOVQ: { + case ARM::VMOVQ: { SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); return true; @@ -615,28 +619,12 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, if (I != MBB.end()) DL = I->getDebugLoc(); if (DestRC != SrcRC) { - // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies - // Allow QPR / QPR_VFP2 cross-class copies - if (DestRC == ARM::DPRRegisterClass) { - if (SrcRC == ARM::DPR_VFP2RegisterClass || - SrcRC == ARM::DPR_8RegisterClass) { - } else - return false; - } else if (DestRC == ARM::DPR_VFP2RegisterClass) { - if (SrcRC == ARM::DPRRegisterClass || - SrcRC == ARM::DPR_8RegisterClass) { - } else - return false; - } else if (DestRC == ARM::DPR_8RegisterClass) { - if (SrcRC == ARM::DPRRegisterClass || - SrcRC == ARM::DPR_VFP2RegisterClass) { - } else - return false; - } else if ((DestRC == ARM::QPRRegisterClass && - SrcRC == ARM::QPR_VFP2RegisterClass) || - (DestRC == ARM::QPR_VFP2RegisterClass && - SrcRC == ARM::QPRRegisterClass)) { - } else + if (DestRC->getSize() != SrcRC->getSize()) + return false; + + // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies. + // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies. + if (DestRC->getSize() != 8 && DestRC->getSize() != 16) return false; } @@ -646,13 +634,18 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, } else if (DestRC == ARM::SPRRegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg) .addReg(SrcReg)); - } else if ((DestRC == ARM::DPRRegisterClass) || - (DestRC == ARM::DPR_VFP2RegisterClass) || - (DestRC == ARM::DPR_8RegisterClass)) { + } else if (DestRC == ARM::DPRRegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg) .addReg(SrcReg)); + } else if (DestRC == ARM::DPR_VFP2RegisterClass || + DestRC == ARM::DPR_8RegisterClass || + SrcRC == ARM::DPR_VFP2RegisterClass || + SrcRC == ARM::DPR_8RegisterClass) { + // Always use neon reg-reg move if source or dest is NEON-only regclass. + BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg).addReg(SrcReg); } else if (DestRC == ARM::QPRRegisterClass || - DestRC == ARM::QPR_VFP2RegisterClass) { + DestRC == ARM::QPR_VFP2RegisterClass || + DestRC == ARM::QPR_8RegisterClass) { BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg); } else { return false; @@ -727,9 +720,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else { assert((RC == ARM::QPRRegisterClass || - RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!"); + RC == ARM::QPR_VFP2RegisterClass || + RC == ARM::QPR_8RegisterClass) && "Unknown regclass!"); // FIXME: Neon instructions should support predicates - BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0).addMemOperand(MMO); + BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0). + addMemOperand(MMO); } } @@ -749,18 +744,24 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned PredReg = MI->getOperand(3).getReg(); if (OpNum == 0) { // move -> store unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned SrcSubReg = MI->getOperand(1).getSubReg(); bool isKill = MI->getOperand(1).isKill(); bool isUndef = MI->getOperand(1).isUndef(); if (Opc == ARM::MOVr) NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) + .addReg(SrcReg, + getKillRegState(isKill) | getUndefRegState(isUndef), + SrcSubReg) .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg); else // ARM::t2MOVr NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) + .addReg(SrcReg, + getKillRegState(isKill) | getUndefRegState(isUndef), + SrcSubReg) .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); } else { // move -> load unsigned DstReg = MI->getOperand(0).getReg(); + unsigned DstSubReg = MI->getOperand(0).getSubReg(); bool isDead = MI->getOperand(0).isDead(); bool isUndef = MI->getOperand(0).isUndef(); if (Opc == ARM::MOVr) @@ -768,14 +769,14 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)) + getUndefRegState(isUndef), DstSubReg) .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg); else // ARM::t2MOVr NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12)) .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)) + getUndefRegState(isUndef), DstSubReg) .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); } } else if (Opc == ARM::tMOVgpr2gpr || @@ -783,20 +784,25 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, Opc == ARM::tMOVgpr2tgpr) { if (OpNum == 0) { // move -> store unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned SrcSubReg = MI->getOperand(1).getSubReg(); bool isKill = MI->getOperand(1).isKill(); bool isUndef = MI->getOperand(1).isUndef(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) + .addReg(SrcReg, + getKillRegState(isKill) | getUndefRegState(isUndef), + SrcSubReg) .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0); } else { // move -> load unsigned DstReg = MI->getOperand(0).getReg(); + unsigned DstSubReg = MI->getOperand(0).getSubReg(); bool isDead = MI->getOperand(0).isDead(); bool isUndef = MI->getOperand(0).isUndef(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12)) .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)) + getUndefRegState(isUndef), + DstSubReg) .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0); } } else if (Opc == ARM::FCPYS) { @@ -804,21 +810,25 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned PredReg = MI->getOperand(3).getReg(); if (OpNum == 0) { // move -> store unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned SrcSubReg = MI->getOperand(1).getSubReg(); bool isKill = MI->getOperand(1).isKill(); bool isUndef = MI->getOperand(1).isUndef(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) + .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef), + SrcSubReg) .addFrameIndex(FI) .addImm(0).addImm(Pred).addReg(PredReg); } else { // move -> load unsigned DstReg = MI->getOperand(0).getReg(); + unsigned DstSubReg = MI->getOperand(0).getSubReg(); bool isDead = MI->getOperand(0).isDead(); bool isUndef = MI->getOperand(0).isUndef(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS)) .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)) + getUndefRegState(isUndef), + DstSubReg) .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); } } @@ -827,20 +837,25 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned PredReg = MI->getOperand(3).getReg(); if (OpNum == 0) { // move -> store unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned SrcSubReg = MI->getOperand(1).getSubReg(); bool isKill = MI->getOperand(1).isKill(); bool isUndef = MI->getOperand(1).isUndef(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) + .addReg(SrcReg, + getKillRegState(isKill) | getUndefRegState(isUndef), + SrcSubReg) .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); } else { // move -> load unsigned DstReg = MI->getOperand(0).getReg(); + unsigned DstSubReg = MI->getOperand(0).getSubReg(); bool isDead = MI->getOperand(0).isDead(); bool isUndef = MI->getOperand(0).isUndef(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD)) .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)) + getUndefRegState(isUndef), + DstSubReg) .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); } } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index a13155b9fd0d1..2ba377474e9ef 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -131,6 +131,14 @@ namespace ARMII { Xform16Bit = 1 << 16, //===------------------------------------------------------------------===// + // Code domain. + DomainShift = 17, + DomainMask = 3 << DomainShift, + DomainGeneral = 0 << DomainShift, + DomainVFP = 1 << DomainShift, + DomainNEON = 2 << DomainShift, + + //===------------------------------------------------------------------===// // Field shifts - such shifts are used to set field while generating // machine instructions. M_BitShift = 5, @@ -157,9 +165,10 @@ namespace ARMII { } class ARMBaseInstrInfo : public TargetInstrInfoImpl { + const ARMSubtarget& Subtarget; protected: // Can be only subclassed. - explicit ARMBaseInstrInfo(); + explicit ARMBaseInstrInfo(const ARMSubtarget &STI); public: // Return the non-pre/post incrementing version of 'Opc'. Return 0 // if there is not such an opcode. @@ -173,6 +182,7 @@ public: LiveVariables *LV) const; virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0; + const ARMSubtarget &getSubtarget() const { return Subtarget; } // Branch analysis. virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, @@ -293,6 +303,11 @@ bool isJumpTableBranchOpcode(int Opc) { Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT; } +static inline +bool isIndirectBranchOpcode(int Opc) { + return Opc == ARM::BRIND || Opc == ARM::tBRIND; +} + /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 00e75310b6fbf..c1c531c9376bb 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineLocation.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameInfo.h" @@ -40,13 +41,12 @@ using namespace llvm; static cl::opt<bool> -ScavengeFrameIndexVals("arm-virtual-frame-index-vals", cl::Hidden, - cl::init(false), - cl::desc("Resolve frame index values via scavenging in PEI")); +ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(true), + cl::desc("Reuse repeated frame index values")); static cl::opt<bool> -ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(false), - cl::desc("Reuse repeated frame index values")); +ARMDynamicStackAlign("arm-dynamic-stack-alignment", cl::Hidden, cl::init(false), + cl::desc("Dynamically re-align the stack as needed")); unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum, bool *isSPVFP) { @@ -254,6 +254,42 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF, } const TargetRegisterClass * +ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, + const TargetRegisterClass *B, + unsigned SubIdx) const { + switch (SubIdx) { + default: return 0; + case 1: + case 2: + case 3: + case 4: + // S sub-registers. + if (A->getSize() == 8) { + if (B == &ARM::SPR_8RegClass) + return &ARM::DPR_8RegClass; + assert(B == &ARM::SPRRegClass && "Expecting SPR register class!"); + if (A == &ARM::DPR_8RegClass) + return A; + return &ARM::DPR_VFP2RegClass; + } + + assert(A->getSize() == 16 && "Expecting a Q register class!"); + if (B == &ARM::SPR_8RegClass) + return &ARM::QPR_8RegClass; + return &ARM::QPR_VFP2RegClass; + case 5: + case 6: + // D sub-registers. + if (B == &ARM::DPR_VFP2RegClass) + return &ARM::QPR_VFP2RegClass; + if (B == &ARM::DPR_8RegClass) + return &ARM::QPR_8RegClass; + return A; + } + return 0; +} + +const TargetRegisterClass * ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const { return ARM::GPRRegisterClass; } @@ -439,6 +475,21 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, } } +static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) { + unsigned MaxAlign = 0; + + for (int i = FFI->getObjectIndexBegin(), + e = FFI->getObjectIndexEnd(); i != e; ++i) { + if (FFI->isDeadObjectIndex(i)) + continue; + + unsigned Align = FFI->getObjectAlignment(i); + MaxAlign = std::max(MaxAlign, Align); + } + + return MaxAlign; +} + /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. @@ -446,10 +497,27 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); return (NoFramePointerElim || + needsStackRealignment(MF) || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()); } +bool ARMBaseRegisterInfo:: +needsStackRealignment(const MachineFunction &MF) const { + // Only do this for ARM if explicitly enabled + // FIXME: Once it's passing all the tests, enable by default + if (!ARMDynamicStackAlign) + return false; + + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + return (RealignStack && + !AFI->isThumb1OnlyFunction() && + (MFI->getMaxAlignment() > StackAlign) && + !MFI->hasVarSizedObjects()); +} + bool ARMBaseRegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); if (NoFramePointerElim && MFI->hasCalls()) @@ -525,6 +593,16 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, SmallVector<unsigned, 4> UnspilledCS2GPRs; ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Calculate and set max stack object alignment early, so we can decide + // whether we will need stack realignment (and thus FP). + if (ARMDynamicStackAlign) { + unsigned MaxAlign = std::max(MFI->getMaxAlignment(), + calculateMaxStackAlignment(MFI)); + MFI->setMaxAlignment(MaxAlign); + } + // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. const unsigned *CSRegs = getCalleeSavedRegs(); @@ -947,7 +1025,7 @@ requiresRegisterScavenging(const MachineFunction &MF) const { bool ARMBaseRegisterInfo:: requiresFrameIndexScavenging(const MachineFunction &MF) const { - return ScavengeFrameIndexVals; + return true; } // hasReservedCallFrame - Under normal circumstances, when a frame pointer is @@ -1025,17 +1103,6 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -/// findScratchRegister - Find a 'free' ARM register. If register scavenger -/// is not being used, R12 is available. Otherwise, try for a call-clobbered -/// register first and then a spilled callee-saved register if that fails. -static -unsigned findScratchRegister(RegScavenger *RS, const TargetRegisterClass *RC, - ARMFunctionInfo *AFI) { - unsigned Reg = RS ? RS->FindUnusedReg(RC) : (unsigned) ARM::R12; - assert(!AFI->isThumb1OnlyFunction()); - return Reg; -} - unsigned ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, int *Value, @@ -1057,22 +1124,44 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, unsigned FrameReg = ARM::SP; int FrameIndex = MI.getOperand(i).getIndex(); int Offset = MFI->getObjectOffset(FrameIndex) + MFI->getStackSize() + SPAdj; + bool isFixed = MFI->isFixedObjectIndex(FrameIndex); + // When doing dynamic stack realignment, all of these need to change(?) if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex)) Offset -= AFI->getGPRCalleeSavedArea1Offset(); else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) Offset -= AFI->getGPRCalleeSavedArea2Offset(); else if (AFI->isDPRCalleeSavedAreaFrame(FrameIndex)) Offset -= AFI->getDPRCalleeSavedAreaOffset(); - else if (hasFP(MF) && AFI->hasStackFrame()) { + else if (needsStackRealignment(MF)) { + // When dynamically realigning the stack, use the frame pointer for + // parameters, and the stack pointer for locals. + assert (hasFP(MF) && "dynamic stack realignment without a FP!"); + if (isFixed) { + FrameReg = getFrameRegister(MF); + Offset -= AFI->getFramePtrSpillOffset(); + // When referencing from the frame pointer, stack pointer adjustments + // don't matter. + SPAdj = 0; + } + } else if (hasFP(MF) && AFI->hasStackFrame()) { assert(SPAdj == 0 && "Unexpected stack offset!"); - // Use frame pointer to reference fixed objects unless this is a - // frameless function, - FrameReg = getFrameRegister(MF); - Offset -= AFI->getFramePtrSpillOffset(); + if (isFixed || MFI->hasVarSizedObjects()) { + // Use frame pointer to reference fixed objects unless this is a + // frameless function. + FrameReg = getFrameRegister(MF); + Offset -= AFI->getFramePtrSpillOffset(); + } else if (AFI->isThumb2Function()) { + // In Thumb2 mode, the negative offset is very limited. + int FPOffset = Offset - AFI->getFramePtrSpillOffset(); + if (FPOffset >= -255 && FPOffset < 0) { + FrameReg = getFrameRegister(MF); + Offset = FPOffset; + } + } } - // modify MI as necessary to handle as much of 'Offset' as possible + // Modify MI as necessary to handle as much of 'Offset' as possible bool Done = false; if (!AFI->isThumbFunction()) Done = rewriteARMFrameIndex(MI, i, FrameReg, Offset, TII); @@ -1099,19 +1188,8 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Must be addrmode4. MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false); else { - if (!ScavengeFrameIndexVals) { - // Insert a set of r12 with the full address: r12 = sp + offset - // If the offset we have is too large to fit into the instruction, we need - // to form it with a series of ADDri's. Do this by taking 8-bit chunks - // out of 'Offset'. - ScratchReg = findScratchRegister(RS, ARM::GPRRegisterClass, AFI); - if (ScratchReg == 0) - // No register is "free". Scavenge a register. - ScratchReg = RS->scavengeRegister(ARM::GPRRegisterClass, II, SPAdj); - } else { - ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass); - *Value = Offset; - } + ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass); + if (Value) *Value = Offset; if (!AFI->isThumbFunction()) emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, Pred, PredReg, TII); @@ -1121,7 +1199,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset, Pred, PredReg, TII); } MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); - if (!ReuseFrameIndexVals || !ScavengeFrameIndexVals) + if (!ReuseFrameIndexVals) ScratchReg = 0; } return ScratchReg; @@ -1276,6 +1354,18 @@ emitPrologue(MachineFunction &MF) const { AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); + + // If we need dynamic stack realignment, do it here. + if (needsStackRealignment(MF)) { + unsigned Opc; + unsigned MaxAlign = MFI->getMaxAlignment(); + assert (!AFI->isThumb1OnlyFunction()); + Opc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri; + + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), ARM::SP) + .addReg(ARM::SP, RegState::Kill) + .addImm(MaxAlign-1))); + } } static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index f7d38e540def6..029e468d42567 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -74,6 +74,13 @@ public: BitVector getReservedRegs(const MachineFunction &MF) const; + /// getMatchingSuperRegClass - Return a subclass of the specified register + /// class A so that each register in it has a sub-register of the + /// specified sub-register index which is in the specified register class B. + virtual const TargetRegisterClass * + getMatchingSuperRegClass(const TargetRegisterClass *A, + const TargetRegisterClass *B, unsigned Idx) const; + const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator> @@ -89,6 +96,8 @@ public: bool hasFP(const MachineFunction &MF) const; + bool needsStackRealignment(const MachineFunction &MF) const; + bool cannotEliminateFrame(const MachineFunction &MF) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 716163958d9c5..8fdb07f81626a 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -68,6 +68,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[ "ArgFlags.getOrigAlign() != 8", CCAssignToReg<[R0, R1, R2, R3]>>>, + CCIfType<[i32], CCIfAlign<"8", CCAssignToStack<4, 8>>>, CCIfType<[i32, f32], CCAssignToStack<4, 4>>, CCIfType<[f64], CCAssignToStack<8, 8>>, CCIfType<[v2f64], CCAssignToStack<16, 8>> diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 6f1c624cbf524..13cf6765c2764 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -34,7 +34,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -56,8 +55,7 @@ namespace { }; template<class CodeEmitter> - class VISIBILITY_HIDDEN Emitter : public MachineFunctionPass, - public ARMCodeEmitter { + class Emitter : public MachineFunctionPass, public ARMCodeEmitter { ARMJITInfo *JTI; const ARMInstrInfo *II; const TargetData *TD; @@ -430,6 +428,7 @@ void Emitter<CodeEmitter>::emitConstPoolInstruction(const MachineInstr &MI) { DEBUG(errs() << " ** ARM constant pool #" << CPI << " @ " << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n'); + assert(ACPV->isGlobalValue() && "unsupported constant pool value"); GlobalValue *GV = ACPV->getGV(); if (GV) { Reloc::Model RelocM = TM.getRelocationModel(); diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index c995ff2d9906c..981962522db5e 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -24,7 +24,6 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -42,6 +41,7 @@ STATISTIC(NumUBrFixed, "Number of uncond branches fixed"); STATISTIC(NumTBs, "Number of table branches generated"); STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk"); STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk"); +STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed"); namespace { /// ARMConstantIslands - Due to limited PC-relative displacements, ARM @@ -55,7 +55,7 @@ namespace { /// Water - Potential places where an island could be formed. /// CPE - A constant pool entry that has been placed somewhere, which /// tracks a list of users. - class VISIBILITY_HIDDEN ARMConstantIslands : public MachineFunctionPass { + class ARMConstantIslands : public MachineFunctionPass { /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed /// by MBB Number. The two-byte pads required for Thumb alignment are /// counted as part of the following block (i.e., the offset and size for @@ -1487,24 +1487,65 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { Bits = 11; Scale = 2; break; - case ARM::t2Bcc: + case ARM::t2Bcc: { NewOpc = ARM::tBcc; Bits = 8; - Scale = 2; + Scale = 2; break; } - if (!NewOpc) + } + if (NewOpc) { + unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; + MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); + if (BBIsInRange(Br.MI, DestBB, MaxOffs)) { + Br.MI->setDesc(TII->get(NewOpc)); + MachineBasicBlock *MBB = Br.MI->getParent(); + BBSizes[MBB->getNumber()] -= 2; + AdjustBBOffsetsAfter(MBB, -2); + ++NumT2BrShrunk; + MadeChange = true; + } + } + + Opcode = Br.MI->getOpcode(); + if (Opcode != ARM::tBcc) continue; - unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale; + NewOpc = 0; + unsigned PredReg = 0; + ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg); + if (Pred == ARMCC::EQ) + NewOpc = ARM::tCBZ; + else if (Pred == ARMCC::NE) + NewOpc = ARM::tCBNZ; + if (!NewOpc) + continue; MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB(); - if (BBIsInRange(Br.MI, DestBB, MaxOffs)) { - Br.MI->setDesc(TII->get(NewOpc)); - MachineBasicBlock *MBB = Br.MI->getParent(); - BBSizes[MBB->getNumber()] -= 2; - AdjustBBOffsetsAfter(MBB, -2); - ++NumT2BrShrunk; - MadeChange = true; + // Check if the distance is within 126. Subtract starting offset by 2 + // because the cmp will be eliminated. + unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2; + unsigned DestOffset = BBOffsets[DestBB->getNumber()]; + if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) { + MachineBasicBlock::iterator CmpMI = Br.MI; --CmpMI; + if (CmpMI->getOpcode() == ARM::tCMPzi8) { + unsigned Reg = CmpMI->getOperand(0).getReg(); + Pred = llvm::getInstrPredicate(CmpMI, PredReg); + if (Pred == ARMCC::AL && + CmpMI->getOperand(1).getImm() == 0 && + isARMLowRegister(Reg)) { + MachineBasicBlock *MBB = Br.MI->getParent(); + MachineInstr *NewBR = + BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc)) + .addReg(Reg).addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags()); + CmpMI->eraseFromParent(); + Br.MI->eraseFromParent(); + Br.MI = NewBR; + BBSizes[MBB->getNumber()] -= 2; + AdjustBBOffsetsAfter(MBB, -2); + ++NumCBZ; + MadeChange = true; + } + } } } diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index 71700893a3e8e..efa941a677c9c 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -13,19 +13,21 @@ #include "ARMConstantPoolValue.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/Constant.h" +#include "llvm/Constants.h" #include "llvm/GlobalValue.h" #include "llvm/Type.h" #include "llvm/Support/raw_ostream.h" #include <cstdlib> using namespace llvm; -ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, unsigned id, +ARMConstantPoolValue::ARMConstantPoolValue(Constant *cval, unsigned id, ARMCP::ARMCPKind K, unsigned char PCAdj, const char *Modif, bool AddCA) - : MachineConstantPoolValue((const Type*)gv->getType()), - GV(gv), S(NULL), LabelId(id), Kind(K), PCAdjust(PCAdj), + : MachineConstantPoolValue((const Type*)cval->getType()), + CVal(cval), S(NULL), LabelId(id), Kind(K), PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {} ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, @@ -34,14 +36,22 @@ ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, const char *Modif, bool AddCA) : MachineConstantPoolValue((const Type*)Type::getInt32Ty(C)), - GV(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPValue), PCAdjust(PCAdj), - Modifier(Modif), AddCurrentAddress(AddCA) {} + CVal(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPExtSymbol), + PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {} ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, const char *Modif) : MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())), - GV(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0), + CVal(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0), Modifier(Modif) {} +GlobalValue *ARMConstantPoolValue::getGV() const { + return dyn_cast_or_null<GlobalValue>(CVal); +} + +BlockAddress *ARMConstantPoolValue::getBlockAddress() const { + return dyn_cast_or_null<BlockAddress>(CVal); +} + int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { unsigned AlignMask = Alignment - 1; @@ -51,7 +61,7 @@ int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, (Constants[i].getAlignment() & AlignMask) == 0) { ARMConstantPoolValue *CPV = (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; - if (CPV->GV == GV && + if (CPV->CVal == CVal && CPV->S == S && CPV->LabelId == LabelId && CPV->PCAdjust == PCAdjust) @@ -68,7 +78,7 @@ ARMConstantPoolValue::~ARMConstantPoolValue() { void ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) { - ID.AddPointer(GV); + ID.AddPointer(CVal); ID.AddPointer(S); ID.AddInteger(LabelId); ID.AddInteger(PCAdjust); @@ -80,8 +90,8 @@ void ARMConstantPoolValue::dump() const { void ARMConstantPoolValue::print(raw_ostream &O) const { - if (GV) - O << GV->getName(); + if (CVal) + O << CVal->getName(); else O << S; if (Modifier) O << "(" << Modifier << ")"; diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 00c48086aef66..8fb3f9245ecf3 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -18,31 +18,35 @@ namespace llvm { +class Constant; +class BlockAddress; class GlobalValue; class LLVMContext; namespace ARMCP { enum ARMCPKind { CPValue, + CPExtSymbol, + CPBlockAddress, CPLSDA }; } /// ARMConstantPoolValue - ARM specific constantpool value. This is used to -/// represent PC relative displacement between the address of the load -/// instruction and the global value being loaded, i.e. (&GV-(LPIC+8)). +/// represent PC-relative displacement between the address of the load +/// instruction and the constant being loaded, i.e. (&GV-(LPIC+8)). class ARMConstantPoolValue : public MachineConstantPoolValue { - GlobalValue *GV; // GlobalValue being loaded. + Constant *CVal; // Constant being loaded. const char *S; // ExtSymbol being loaded. unsigned LabelId; // Label id of the load. - ARMCP::ARMCPKind Kind; // Value or LSDA? - unsigned char PCAdjust; // Extra adjustment if constantpool is pc relative. + ARMCP::ARMCPKind Kind; // Kind of constant. + unsigned char PCAdjust; // Extra adjustment if constantpool is pc-relative. // 8 for ARM, 4 for Thumb. const char *Modifier; // GV modifier i.e. (&GV(modifier)-(LPIC+8)) bool AddCurrentAddress; public: - ARMConstantPoolValue(GlobalValue *gv, unsigned id, + ARMConstantPoolValue(Constant *cval, unsigned id, ARMCP::ARMCPKind Kind = ARMCP::CPValue, unsigned char PCAdj = 0, const char *Modifier = NULL, bool AddCurrentAddress = false); @@ -53,14 +57,17 @@ public: ARMConstantPoolValue(); ~ARMConstantPoolValue(); - - GlobalValue *getGV() const { return GV; } + GlobalValue *getGV() const; const char *getSymbol() const { return S; } + BlockAddress *getBlockAddress() const; const char *getModifier() const { return Modifier; } bool hasModifier() const { return Modifier != NULL; } bool mustAddCurrentAddress() const { return AddCurrentAddress; } unsigned getLabelId() const { return LabelId; } unsigned char getPCAdjustment() const { return PCAdjust; } + bool isGlobalValue() const { return Kind == ARMCP::CPValue; } + bool isExtSymbol() const { return Kind == ARMCP::CPExtSymbol; } + bool isBlockAddress() { return Kind == ARMCP::CPBlockAddress; } bool isLSDA() { return Kind == ARMCP::CPLSDA; } virtual unsigned getRelocationInfo() const { @@ -69,7 +76,6 @@ public: return 2; } - virtual int getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment); @@ -80,7 +86,6 @@ public: void dump() const; }; - inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) { V.print(O); return O; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 5c1835b46a224..1489cab6f16e6 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -13,7 +13,6 @@ #include "ARM.h" #include "ARMAddressingModes.h" -#include "ARMConstantPoolValue.h" #include "ARMISelLowering.h" #include "ARMTargetMachine.h" #include "llvm/CallingConv.h" @@ -284,7 +283,7 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N, } } - // Otherwise this is R +/- [possibly shifted] R + // Otherwise this is R +/- [possibly shifted] R. ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::ADD ? ARM_AM::add:ARM_AM::sub; ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1)); unsigned ShAmt = 0; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 6a264fdfc44f5..b6ce5ddb7035f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -330,9 +330,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!Subtarget->hasV6Ops()) setOperationAction(ISD::MULHS, MVT::i32, Expand); } - setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); - setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); - setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL, MVT::i64, Custom); setOperationAction(ISD::SRA, MVT::i64, Custom); @@ -363,6 +363,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); + setOperationAction(ISD::BlockAddress, MVT::i32, Custom); // Use the default implementation. setOperationAction(ISD::VASTART, MVT::Other, Custom); @@ -495,6 +496,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::FMRRD: return "ARMISD::FMRRD"; case ARMISD::FMDRR: return "ARMISD::FMDRR"; + case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; + case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; + case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; @@ -1017,7 +1021,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, - DAG.getEntryNode(), CPAddr, NULL, 0); + DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); @@ -1036,7 +1041,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); Callee = DAG.getLoad(getPointerTy(), dl, - DAG.getEntryNode(), CPAddr, NULL, 0); + DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); @@ -1201,6 +1207,30 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); } +SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { + DebugLoc DL = Op.getDebugLoc(); + EVT PtrVT = getPointerTy(); + BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + Reloc::Model RelocM = getTargetMachine().getRelocationModel(); + SDValue CPAddr; + if (RelocM == Reloc::Static) { + CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); + } else { + unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; + ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex, + ARMCP::CPBlockAddress, + PCAdj); + CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); + } + CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); + SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0); + if (RelocM == Reloc::Static) + return Result; + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); +} + // Lower ISD::GlobalTLSAddress using the "general dynamic" model SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, @@ -1213,7 +1243,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, ARMCP::CPValue, PCAdj, "tlsgd", true); SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); - Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, NULL, 0); + Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, + PseudoSourceValue::getConstantPool(), 0); SDValue Chain = Argument.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); @@ -1255,19 +1286,22 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, ARMCP::CPValue, PCAdj, "gottpoff", true); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); - Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); + Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, + PseudoSourceValue::getConstantPool(), 0); Chain = Offset.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); - Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); + Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, + PseudoSourceValue::getConstantPool(), 0); } else { // local exec model ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff"); Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); - Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, NULL, 0); + Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, + PseudoSourceValue::getConstantPool(), 0); } // The address of the thread local variable is the add of the thread @@ -1336,7 +1370,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, } CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0); SDValue Chain = Result.getValue(1); if (RelocM == Reloc::PIC_) { @@ -1345,7 +1380,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, } if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) - Result = DAG.getLoad(PtrVT, dl, Chain, Result, NULL, 0); + Result = DAG.getLoad(PtrVT, dl, Chain, Result, + PseudoSourceValue::getGOT(), 0); return Result; } @@ -1392,7 +1428,8 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = - DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, NULL, 0); + DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0); SDValue Chain = Result.getValue(1); if (RelocM == Reloc::PIC_) { @@ -1489,7 +1526,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, // Create load node to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, NULL, 0); + ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, + PseudoSourceValue::getFixedStack(FI), 0); } else { Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); @@ -1602,7 +1640,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0)); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, + PseudoSourceValue::getFixedStack(FI), 0)); } } @@ -1618,13 +1657,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); unsigned VARegSize = (4 - NumGPRs) * 4; unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); - unsigned ArgOffset = 0; + unsigned ArgOffset = CCInfo.getNextStackOffset(); if (VARegSaveSize) { // If this function is vararg, store any remaining integer argument regs // to their spots on the stack so that they may be loaded by deferencing // the result of va_next. AFI->setVarArgsRegSaveSize(VARegSaveSize); - ArgOffset = CCInfo.getNextStackOffset(); VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset + VARegSaveSize - VARegSize); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); @@ -1639,7 +1677,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); - SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); + SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, + PseudoSourceValue::getFixedStack(VarArgsFrameIndex), 0); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getConstant(4, getPointerTy())); @@ -1839,12 +1878,14 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) { Addr, Op.getOperand(2), JTI, UId); } if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { - Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, NULL, 0); + Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, + PseudoSourceValue::getJumpTable(), 0); Chain = Addr.getValue(1); Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); } else { - Addr = DAG.getLoad(PTy, dl, Chain, Addr, NULL, 0); + Addr = DAG.getLoad(PTy, dl, Chain, Addr, + PseudoSourceValue::getJumpTable(), 0); Chain = Addr.getValue(1); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); } @@ -2055,7 +2096,7 @@ static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { assert(VT.isVector() && "Expected a vector type"); - // Always build ones vectors as <16 x i32> or <8 x i32> bitcasted to their + // Always build ones vectors as <16 x i8> or <8 x i8> bitcasted to their // dest type. This ensures they get CSE'd. SDValue Vec; SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8); @@ -2072,6 +2113,76 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); } +/// LowerShiftRightParts - Lower SRA_PARTS, which returns two +/// i32 values and take a 2 x i32 value to shift plus a shift amount. +static SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + assert(Op.getNumOperands() == 3 && "Not a double-shift!"); + EVT VT = Op.getValueType(); + unsigned VTBits = VT.getSizeInBits(); + DebugLoc dl = Op.getDebugLoc(); + SDValue ShOpLo = Op.getOperand(0); + SDValue ShOpHi = Op.getOperand(1); + SDValue ShAmt = Op.getOperand(2); + SDValue ARMCC; + unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; + + assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); + + SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant(VTBits, MVT::i32), ShAmt); + SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); + SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, + DAG.getConstant(VTBits, MVT::i32)); + SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); + SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); + + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, + ARMCC, DAG, ST->isThumb1Only(), dl); + SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); + SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, + CCR, Cmp); + + SDValue Ops[2] = { Lo, Hi }; + return DAG.getMergeValues(Ops, 2, dl); +} + +/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two +/// i32 values and take a 2 x i32 value to shift plus a shift amount. +static SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + assert(Op.getNumOperands() == 3 && "Not a double-shift!"); + EVT VT = Op.getValueType(); + unsigned VTBits = VT.getSizeInBits(); + DebugLoc dl = Op.getDebugLoc(); + SDValue ShOpLo = Op.getOperand(0); + SDValue ShOpHi = Op.getOperand(1); + SDValue ShAmt = Op.getOperand(2); + SDValue ARMCC; + + assert(Op.getOpcode() == ISD::SHL_PARTS); + SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant(VTBits, MVT::i32), ShAmt); + SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); + SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, + DAG.getConstant(VTBits, MVT::i32)); + SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); + SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); + + SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, + ARMCC, DAG, ST->isThumb1Only(), dl); + SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); + SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC, + CCR, Cmp); + + SDValue Ops[2] = { Lo, Hi }; + return DAG.getMergeValues(Ops, 2, dl); +} + static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); @@ -2641,6 +2752,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { int Lane = SVN->getSplatIndex(); + // If this is undef splat, generate it via "just" vdup, if possible. + if (Lane == -1) Lane = 0; + if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); } @@ -2741,6 +2855,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : LowerGlobalAddressELF(Op, DAG); @@ -2763,6 +2878,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::SHL: case ISD::SRL: case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); + case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG, Subtarget); + case ISD::SRL_PARTS: + case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG, Subtarget); case ISD::VSETCC: return LowerVSETCC(Op, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); @@ -3990,3 +4108,60 @@ ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. return false; } + +int ARM::getVFPf32Imm(const APFloat &FPImm) { + APInt Imm = FPImm.bitcastToAPInt(); + uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; + int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 + int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits + + // We can handle 4 bits of mantissa. + // mantissa = (16+UInt(e:f:g:h))/16. + if (Mantissa & 0x7ffff) + return -1; + Mantissa >>= 19; + if ((Mantissa & 0xf) != Mantissa) + return -1; + + // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 + if (Exp < -3 || Exp > 4) + return -1; + Exp = ((Exp+3) & 0x7) ^ 4; + + return ((int)Sign << 7) | (Exp << 4) | Mantissa; +} + +int ARM::getVFPf64Imm(const APFloat &FPImm) { + APInt Imm = FPImm.bitcastToAPInt(); + uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; + int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 + uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL; + + // We can handle 4 bits of mantissa. + // mantissa = (16+UInt(e:f:g:h))/16. + if (Mantissa & 0xffffffffffffLL) + return -1; + Mantissa >>= 48; + if ((Mantissa & 0xf) != Mantissa) + return -1; + + // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 + if (Exp < -3 || Exp > 4) + return -1; + Exp = ((Exp+3) & 0x7) ^ 4; + + return ((int)Sign << 7) | (Exp << 4) | Mantissa; +} + +/// isFPImmLegal - Returns true if the target can instruction select the +/// specified FP immediate natively. If false, the legalizer will +/// materialize the FP immediate as a load from a constant pool. +bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { + if (!Subtarget->hasVFP3()) + return false; + if (VT == MVT::f32) + return ARM::getVFPf32Imm(Imm) != -1; + if (VT == MVT::f64) + return ARM::getVFPf64Imm(Imm) != -1; + return false; +} diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 7d85f458d8e9f..9c7a91d68d2f4 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -137,6 +137,13 @@ namespace llvm { /// return the constant being splatted. The ByteSize field indicates the /// number of bytes of each element [1248]. SDValue getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); + + /// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be + /// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd) + /// instruction, returns its 8-bit integer representation. Otherwise, + /// returns -1. + int getVFPf32Imm(const APFloat &FPImm); + int getVFPf64Imm(const APFloat &FPImm); } //===--------------------------------------------------------------------===// @@ -224,6 +231,12 @@ namespace llvm { bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + + /// isFPImmLegal - Returns true if the target can instruction select the + /// specified FP immediate natively. If false, the legalizer will + /// materialize the FP immediate as a load from a constant pool. + virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; + private: /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. @@ -255,6 +268,7 @@ namespace llvm { ISD::ArgFlagsTy Flags); SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG); SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG); + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG); SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG); SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG); SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 8225fd741bb8b..83b5cb4cac970 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -108,6 +108,15 @@ def IndexModeNone : IndexMode<0>; def IndexModePre : IndexMode<1>; def IndexModePost : IndexMode<2>; +// Instruction execution domain. +class Domain<bits<2> val> { + bits<2> Value = val; +} +def GenericDomain : Domain<0>; +def VFPDomain : Domain<1>; // Instructions in VFP domain only +def NeonDomain : Domain<2>; // Instructions in Neon domain only +def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains + //===----------------------------------------------------------------------===// // ARM special operands. @@ -136,7 +145,7 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> { // class InstARM<AddrMode am, SizeFlagVal sz, IndexMode im, - Format f, string cstr, InstrItinClass itin> + Format f, Domain d, string cstr, InstrItinClass itin> : Instruction { field bits<32> Inst; @@ -155,6 +164,9 @@ class InstARM<AddrMode am, SizeFlagVal sz, IndexMode im, Format F = f; bits<5> Form = F.Value; + Domain D = d; + bits<2> Dom = D.Value; + // // Attributes specific to ARM instructions... // @@ -167,7 +179,8 @@ class InstARM<AddrMode am, SizeFlagVal sz, IndexMode im, class PseudoInst<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern> - : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, "", itin> { + : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, GenericDomain, + "", itin> { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; @@ -179,7 +192,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> - : InstARM<am, sz, im, f, cstr, itin> { + : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); @@ -194,7 +207,7 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> - : InstARM<am, sz, im, f, cstr, itin> { + : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p, cc_out:$s)); let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm)); @@ -206,7 +219,7 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, IndexMode im, Format f, InstrItinClass itin, string asm, string cstr, list<dag> pattern> - : InstARM<am, sz, im, f, cstr, itin> { + : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; @@ -807,7 +820,7 @@ class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> { class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, InstrItinClass itin, string asm, string cstr, list<dag> pattern> - : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> { + : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; @@ -833,7 +846,7 @@ class TJTI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> patter // Thumb1 only class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, InstrItinClass itin, string asm, string cstr, list<dag> pattern> - : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> { + : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; @@ -861,7 +874,7 @@ class T1It<dag oops, dag iops, InstrItinClass itin, class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> - : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> { + : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = !con(oops, (ops s_cc_out:$s)); let InOperandList = !con(iops, (ops pred:$p)); let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); @@ -883,7 +896,7 @@ class T1sIt<dag oops, dag iops, InstrItinClass itin, class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> - : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> { + : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); @@ -918,7 +931,7 @@ class T1pIs<dag oops, dag iops, class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> - : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> { + : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); @@ -934,7 +947,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> - : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> { + : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p, cc_out:$s)); let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); @@ -946,7 +959,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, InstrItinClass itin, string asm, string cstr, list<dag> pattern> - : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr, itin> { + : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; @@ -993,7 +1006,7 @@ class T2Ix2<dag oops, dag iops, InstrItinClass itin, class T2Iidxldst<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> - : InstARM<am, Size4Bytes, im, ThumbFrm, cstr, itin> { + : InstARM<am, Size4Bytes, im, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); @@ -1026,7 +1039,7 @@ class T2Pat<dag pattern, dag result> : Pat<pattern, result> { class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> - : InstARM<am, sz, im, f, cstr, itin> { + : InstARM<am, sz, im, f, VFPDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = !con(iops, (ops pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); @@ -1038,7 +1051,7 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, IndexMode im, Format f, InstrItinClass itin, string asm, string cstr, list<dag> pattern> - : InstARM<am, sz, im, f, cstr, itin> { + : InstARM<am, sz, im, f, VFPDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; @@ -1061,6 +1074,9 @@ class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, let Inst{27-24} = opcod1; let Inst{21-20} = opcod2; let Inst{11-8} = 0b1011; + + // 64-bit loads & stores operate on both NEON and VFP pipelines. + let Dom = VFPNeonDomain.Value; } class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops, @@ -1082,6 +1098,9 @@ class AXDI5<dag oops, dag iops, InstrItinClass itin, // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-25} = 0b110; let Inst{11-8} = 0b1011; + + // 64-bit loads & stores operate on both NEON and VFP pipelines. + let Dom = VFPNeonDomain.Value; } class AXSI5<dag oops, dag iops, InstrItinClass itin, @@ -1125,8 +1144,8 @@ class ASuI<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops, // Single precision unary, if no NEON // Same as ASuI except not available if NEON is enabled class ASuIn<bits<8> opcod1, bits<4> opcod2, bits<4> opcod3, dag oops, dag iops, - InstrItinClass itin, string opc, string asm, list<dag> pattern> - : ASuI<opcod1, opcod2, opcod2, oops, iops, itin, opc, asm, pattern> { + InstrItinClass itin, string opc, string asm, list<dag> pattern> + : ASuI<opcod1, opcod2, opcod3, oops, iops, itin, opc, asm, pattern> { list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP]; } @@ -1199,7 +1218,7 @@ class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin, string asm, string cstr, list<dag> pattern> - : InstARM<am, Size4Bytes, im, NEONFrm, cstr, itin> { + : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> { let OutOperandList = oops; let InOperandList = iops; let AsmString = asm; diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index dd4123bfa0a3e..86bbe2a4c61f2 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -25,7 +25,7 @@ using namespace llvm; ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) - : RI(*this, STI), Subtarget(STI) { + : ARMBaseInstrInfo(STI), RI(*this, STI) { } unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { @@ -67,6 +67,7 @@ bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { case ARM::BX_RET: // Return. case ARM::LDM_RET: case ARM::B: + case ARM::BRIND: case ARM::BR_JTr: // Jumptable branch. case ARM::BR_JTm: // Jumptable branch through mem. case ARM::BR_JTadd: // Jumptable branch add to pc. diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index c616949e37903..5d1678d68544b 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -25,7 +25,6 @@ namespace llvm { class ARMInstrInfo : public ARMBaseInstrInfo { ARMRegisterInfo RI; - const ARMSubtarget &Subtarget; public: explicit ARMInstrInfo(const ARMSubtarget &STI); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 384b98cf540c0..cbe80b4800c25 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -370,19 +370,19 @@ include "ARMInstrFormats.td" multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, - IIC_iALUi, opc, " $dst, $a, $b", + IIC_iALUi, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> { let Inst{25} = 1; } def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, - IIC_iALUr, opc, " $dst, $a, $b", + IIC_iALUr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> { let Inst{4} = 0; let Inst{25} = 0; let isCommutable = Commutable; } def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, - IIC_iALUsr, opc, " $dst, $a, $b", + IIC_iALUsr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> { let Inst{4} = 1; let Inst{7} = 0; @@ -396,22 +396,25 @@ let Defs = [CPSR] in { multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { def ri : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, - IIC_iALUi, opc, "s $dst, $a, $b", + IIC_iALUi, opc, "s\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> { + let Inst{20} = 1; let Inst{25} = 1; } def rr : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, - IIC_iALUr, opc, "s $dst, $a, $b", + IIC_iALUr, opc, "s\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> { let isCommutable = Commutable; let Inst{4} = 0; + let Inst{20} = 1; let Inst{25} = 0; } def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, - IIC_iALUsr, opc, "s $dst, $a, $b", + IIC_iALUsr, opc, "s\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> { let Inst{4} = 1; let Inst{7} = 0; + let Inst{20} = 1; let Inst{25} = 0; } } @@ -424,13 +427,13 @@ let Defs = [CPSR] in { multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iCMPi, - opc, " $a, $b", + opc, "\t$a, $b", [(opnode GPR:$a, so_imm:$b)]> { let Inst{20} = 1; let Inst{25} = 1; } def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm, IIC_iCMPr, - opc, " $a, $b", + opc, "\t$a, $b", [(opnode GPR:$a, GPR:$b)]> { let Inst{4} = 0; let Inst{20} = 1; @@ -438,7 +441,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode, let isCommutable = Commutable; } def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iCMPsr, - opc, " $a, $b", + opc, "\t$a, $b", [(opnode GPR:$a, so_reg:$b)]> { let Inst{4} = 1; let Inst{7} = 0; @@ -453,28 +456,31 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode, /// FIXME: Remove the 'r' variant. Its rot_imm is zero. multiclass AI_unary_rrot<bits<8> opcod, string opc, PatFrag opnode> { def r : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src), - IIC_iUNAr, opc, " $dst, $src", + IIC_iUNAr, opc, "\t$dst, $src", [(set GPR:$dst, (opnode GPR:$src))]>, Requires<[IsARM, HasV6]> { - let Inst{19-16} = 0b1111; - } + let Inst{11-10} = 0b00; + let Inst{19-16} = 0b1111; + } def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src, i32imm:$rot), - IIC_iUNAsi, opc, " $dst, $src, ror $rot", + IIC_iUNAsi, opc, "\t$dst, $src, ror $rot", [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>, Requires<[IsARM, HasV6]> { - let Inst{19-16} = 0b1111; - } + let Inst{19-16} = 0b1111; + } } /// AI_bin_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> { def rr : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), - IIC_iALUr, opc, " $dst, $LHS, $RHS", + IIC_iALUr, opc, "\t$dst, $LHS, $RHS", [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]> { + let Inst{11-10} = 0b00; + } def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot), - IIC_iALUsi, opc, " $dst, $LHS, $RHS, ror $rot", + IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot", [(set GPR:$dst, (opnode GPR:$LHS, (rotr GPR:$RHS, rot_imm:$rot)))]>, Requires<[IsARM, HasV6]>; @@ -485,13 +491,13 @@ let Uses = [CPSR] in { multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), - DPFrm, IIC_iALUi, opc, " $dst, $a, $b", + DPFrm, IIC_iALUi, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>, Requires<[IsARM, CarryDefIsUnused]> { let Inst{25} = 1; } def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - DPFrm, IIC_iALUr, opc, " $dst, $a, $b", + DPFrm, IIC_iALUr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>, Requires<[IsARM, CarryDefIsUnused]> { let isCommutable = Commutable; @@ -499,7 +505,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{25} = 0; } def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), - DPSoRegFrm, IIC_iALUsr, opc, " $dst, $a, $b", + DPSoRegFrm, IIC_iALUsr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>, Requires<[IsARM, CarryDefIsUnused]> { let Inst{4} = 1; @@ -508,27 +514,30 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, } // Carry setting variants def Sri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), - DPFrm, IIC_iALUi, !strconcat(opc, "s $dst, $a, $b"), + DPFrm, IIC_iALUi, !strconcat(opc, "s\t$dst, $a, $b"), [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>, Requires<[IsARM, CarryDefIsUsed]> { let Defs = [CPSR]; + let Inst{20} = 1; let Inst{25} = 1; } def Srr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - DPFrm, IIC_iALUr, !strconcat(opc, "s $dst, $a, $b"), + DPFrm, IIC_iALUr, !strconcat(opc, "s\t$dst, $a, $b"), [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>, Requires<[IsARM, CarryDefIsUsed]> { let Defs = [CPSR]; let Inst{4} = 0; + let Inst{20} = 1; let Inst{25} = 0; } def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), - DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "s $dst, $a, $b"), + DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "s\t$dst, $a, $b"), [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>, Requires<[IsARM, CarryDefIsUsed]> { let Defs = [CPSR]; let Inst{4} = 1; let Inst{7} = 0; + let Inst{20} = 1; let Inst{25} = 0; } } @@ -573,42 +582,42 @@ PseudoInst<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), NoItinerary, // Address computation and loads and stores in PIC mode. let isNotDuplicable = 1 in { def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), - Pseudo, IIC_iALUr, "\n$cp:\n\tadd$p $dst, pc, $a", + Pseudo, IIC_iALUr, "\n$cp:\n\tadd$p\t$dst, pc, $a", [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>; let AddedComplexity = 10 in { let canFoldAsLoad = 1 in def PICLDR : AXI2ldw<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr$p $dst, $addr", + Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr$p\t$dst, $addr", [(set GPR:$dst, (load addrmodepc:$addr))]>; def PICLDRH : AXI3ldh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}h $dst, $addr", + Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}h\t$dst, $addr", [(set GPR:$dst, (zextloadi16 addrmodepc:$addr))]>; def PICLDRB : AXI2ldb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}b $dst, $addr", + Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}b\t$dst, $addr", [(set GPR:$dst, (zextloadi8 addrmodepc:$addr))]>; def PICLDRSH : AXI3ldsh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sh $dst, $addr", + Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sh\t$dst, $addr", [(set GPR:$dst, (sextloadi16 addrmodepc:$addr))]>; def PICLDRSB : AXI3ldsb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sb $dst, $addr", + Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sb\t$dst, $addr", [(set GPR:$dst, (sextloadi8 addrmodepc:$addr))]>; } let AddedComplexity = 10 in { def PICSTR : AXI2stw<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr$p $src, $addr", + Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr$p\t$src, $addr", [(store GPR:$src, addrmodepc:$addr)]>; def PICSTRH : AXI3sth<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}h $src, $addr", + Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}h\t$src, $addr", [(truncstorei16 GPR:$src, addrmodepc:$addr)]>; def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}b $src, $addr", + Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}b\t$src, $addr", [(truncstorei8 GPR:$src, addrmodepc:$addr)]>; } } // isNotDuplicable = 1 @@ -618,10 +627,10 @@ def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), // assembler. def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), Pseudo, IIC_iALUi, - !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, ($label-(", - "${:private}PCRELL${:uid}+8))\n"), - !strconcat("${:private}PCRELL${:uid}:\n\t", - "add$p $dst, pc, #${:private}PCRELV${:uid}")), + !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, ($label-(", + "${:private}PCRELL${:uid}+8))\n"), + !strconcat("${:private}PCRELL${:uid}:\n\t", + "add$p\t$dst, pc, #${:private}PCRELV${:uid}")), []>; def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), @@ -631,7 +640,7 @@ def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), "(${label}_${id}-(", "${:private}PCRELL${:uid}+8))\n"), !strconcat("${:private}PCRELL${:uid}:\n\t", - "add$p $dst, pc, #${:private}PCRELV${:uid}")), + "add$p\t$dst, pc, #${:private}PCRELV${:uid}")), []> { let Inst{25} = 1; } @@ -642,19 +651,29 @@ def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), let isReturn = 1, isTerminator = 1, isBarrier = 1 in def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br, - "bx", " lr", [(ARMretflag)]> { + "bx", "\tlr", [(ARMretflag)]> { let Inst{7-4} = 0b0001; let Inst{19-8} = 0b111111111111; let Inst{27-20} = 0b00010010; } +// Indirect branches +let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { + def BRIND : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst", + [(brind GPR:$dst)]> { + let Inst{7-4} = 0b0001; + let Inst{19-8} = 0b111111111111; + let Inst{27-20} = 0b00010010; + } +} + // FIXME: remove when we have a way to marking a MI with these properties. // FIXME: Should pc be an implicit operand like PICADD, etc? let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1 in def LDM_RET : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - LdStMulFrm, IIC_Br, "ldm${p}${addr:submode} $addr, $wb", + LdStMulFrm, IIC_Br, "ldm${p}${addr:submode}\t$addr, $wb", []>; // On non-Darwin platforms R9 is callee-saved. @@ -664,18 +683,20 @@ let isCall = 1, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in { def BL : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops), - IIC_Br, "bl ${func:call}", + IIC_Br, "bl\t${func:call}", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsARM, IsNotDarwin]>; + Requires<[IsARM, IsNotDarwin]> { + let Inst{31-28} = 0b1110; + } def BL_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops), - IIC_Br, "bl", " ${func:call}", + IIC_Br, "bl", "\t${func:call}", [(ARMcall_pred tglobaladdr:$func)]>, Requires<[IsARM, IsNotDarwin]>; // ARMv5T and above def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, - IIC_Br, "blx $func", + IIC_Br, "blx\t$func", [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsNotDarwin]> { let Inst{7-4} = 0b0011; @@ -685,7 +706,7 @@ let isCall = 1, // ARMv4T def BX : ABXIx2<(outs), (ins GPR:$func, variable_ops), - IIC_Br, "mov lr, pc\n\tbx $func", + IIC_Br, "mov\tlr, pc\n\tbx\t$func", [(ARMcall_nolink GPR:$func)]>, Requires<[IsARM, IsNotDarwin]> { let Inst{7-4} = 0b0001; @@ -701,17 +722,19 @@ let isCall = 1, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in { def BLr9 : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops), - IIC_Br, "bl ${func:call}", - [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]>; + IIC_Br, "bl\t${func:call}", + [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]> { + let Inst{31-28} = 0b1110; + } def BLr9_pred : ABI<0b1011, (outs), (ins i32imm:$func, variable_ops), - IIC_Br, "bl", " ${func:call}", + IIC_Br, "bl", "\t${func:call}", [(ARMcall_pred tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]>; // ARMv5T and above def BLXr9 : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, - IIC_Br, "blx $func", + IIC_Br, "blx\t$func", [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]> { let Inst{7-4} = 0b0011; let Inst{19-8} = 0b111111111111; @@ -720,7 +743,7 @@ let isCall = 1, // ARMv4T def BXr9 : ABXIx2<(outs), (ins GPR:$func, variable_ops), - IIC_Br, "mov lr, pc\n\tbx $func", + IIC_Br, "mov\tlr, pc\n\tbx\t$func", [(ARMcall_nolink GPR:$func)]>, Requires<[IsARM, IsDarwin]> { let Inst{7-4} = 0b0001; let Inst{19-8} = 0b111111111111; @@ -733,11 +756,11 @@ let isBranch = 1, isTerminator = 1 in { let isBarrier = 1 in { let isPredicable = 1 in def B : ABXI<0b1010, (outs), (ins brtarget:$target), IIC_Br, - "b $target", [(br bb:$target)]>; + "b\t$target", [(br bb:$target)]>; let isNotDuplicable = 1, isIndirectBranch = 1 in { def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "mov pc, $target \n$jt", + IIC_Br, "mov\tpc, $target \n$jt", [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> { let Inst{20} = 0; // S Bit let Inst{24-21} = 0b1101; @@ -745,7 +768,7 @@ let isBranch = 1, isTerminator = 1 in { } def BR_JTm : JTI<(outs), (ins addrmode2:$target, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "ldr pc, $target \n$jt", + IIC_Br, "ldr\tpc, $target \n$jt", [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt, imm:$id)]> { let Inst{20} = 1; // L bit @@ -756,7 +779,7 @@ let isBranch = 1, isTerminator = 1 in { } def BR_JTadd : JTI<(outs), (ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "add pc, $target, $idx \n$jt", + IIC_Br, "add\tpc, $target, $idx \n$jt", [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, imm:$id)]> { let Inst{20} = 0; // S bit @@ -769,7 +792,7 @@ let isBranch = 1, isTerminator = 1 in { // FIXME: should be able to write a pattern for ARMBrcond, but can't use // a two-value operand where a dag node expects two operands. :( def Bcc : ABI<0b1010, (outs), (ins brtarget:$target), - IIC_Br, "b", " $target", + IIC_Br, "b", "\t$target", [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>; } @@ -780,140 +803,140 @@ let isBranch = 1, isTerminator = 1 in { // Load let canFoldAsLoad = 1, isReMaterializable = 1 in def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, - "ldr", " $dst, $addr", + "ldr", "\t$dst, $addr", [(set GPR:$dst, (load addrmode2:$addr))]>; // Special LDR for loads from non-pc-relative constpools. let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, - "ldr", " $dst, $addr", []>; + "ldr", "\t$dst, $addr", []>; // Loads with zero extension def LDRH : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldr", "h $dst, $addr", + IIC_iLoadr, "ldr", "h\t$dst, $addr", [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>; def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, - IIC_iLoadr, "ldr", "b $dst, $addr", + IIC_iLoadr, "ldr", "b\t$dst, $addr", [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>; // Loads with sign extension def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldr", "sh $dst, $addr", + IIC_iLoadr, "ldr", "sh\t$dst, $addr", [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>; def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldr", "sb $dst, $addr", + IIC_iLoadr, "ldr", "sb\t$dst, $addr", [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>; let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldr", "d $dst1, $addr", + IIC_iLoadr, "ldr", "d\t$dst1, $addr", []>, Requires<[IsARM, HasV5TE]>; // Indexed loads def LDR_PRE : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb), (ins addrmode2:$addr), LdFrm, IIC_iLoadru, - "ldr", " $dst, $addr!", "$addr.base = $base_wb", []>; + "ldr", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru, - "ldr", " $dst, [$base], $offset", "$base = $base_wb", []>; + "ldr", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def LDRH_PRE : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, - "ldr", "h $dst, $addr!", "$addr.base = $base_wb", []>; + "ldr", "h\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, - "ldr", "h $dst, [$base], $offset", "$base = $base_wb", []>; + "ldr", "h\t$dst, [$base], $offset", "$base = $base_wb", []>; def LDRB_PRE : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb), (ins addrmode2:$addr), LdFrm, IIC_iLoadru, - "ldr", "b $dst, $addr!", "$addr.base = $base_wb", []>; + "ldr", "b\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru, - "ldr", "b $dst, [$base], $offset", "$base = $base_wb", []>; + "ldr", "b\t$dst, [$base], $offset", "$base = $base_wb", []>; def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, - "ldr", "sh $dst, $addr!", "$addr.base = $base_wb", []>; + "ldr", "sh\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, - "ldr", "sh $dst, [$base], $offset", "$base = $base_wb", []>; + "ldr", "sh\t$dst, [$base], $offset", "$base = $base_wb", []>; def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, - "ldr", "sb $dst, $addr!", "$addr.base = $base_wb", []>; + "ldr", "sb\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, - "ldr", "sb $dst, [$base], $offset", "$base = $base_wb", []>; + "ldr", "sb\t$dst, [$base], $offset", "$base = $base_wb", []>; } // Store def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, - "str", " $src, $addr", + "str", "\t$src, $addr", [(store GPR:$src, addrmode2:$addr)]>; // Stores with truncate def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, IIC_iStorer, - "str", "h $src, $addr", + "str", "h\t$src, $addr", [(truncstorei16 GPR:$src, addrmode3:$addr)]>; def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, - "str", "b $src, $addr", + "str", "b\t$src, $addr", [(truncstorei8 GPR:$src, addrmode2:$addr)]>; // Store doubleword let mayStore = 1, hasExtraSrcRegAllocReq = 1 in def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr), StMiscFrm, IIC_iStorer, - "str", "d $src1, $addr", []>, Requires<[IsARM, HasV5TE]>; + "str", "d\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>; // Indexed stores def STR_PRE : AI2stwpr<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base, am2offset:$offset), StFrm, IIC_iStoreru, - "str", " $src, [$base, $offset]!", "$base = $base_wb", + "str", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_store GPR:$src, GPR:$base, am2offset:$offset))]>; def STR_POST : AI2stwpo<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, IIC_iStoreru, - "str", " $src, [$base], $offset", "$base = $base_wb", + "str", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_store GPR:$src, GPR:$base, am2offset:$offset))]>; def STRH_PRE : AI3sthpr<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm, IIC_iStoreru, - "str", "h $src, [$base, $offset]!", "$base = $base_wb", + "str", "h\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>; def STRH_POST: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm, IIC_iStoreru, - "str", "h $src, [$base], $offset", "$base = $base_wb", + "str", "h\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti16 GPR:$src, GPR:$base, am3offset:$offset))]>; def STRB_PRE : AI2stbpr<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, IIC_iStoreru, - "str", "b $src, [$base, $offset]!", "$base = $base_wb", + "str", "b\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti8 GPR:$src, GPR:$base, am2offset:$offset))]>; def STRB_POST: AI2stbpo<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, IIC_iStoreru, - "str", "b $src, [$base], $offset", "$base = $base_wb", + "str", "b\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti8 GPR:$src, GPR:$base, am2offset:$offset))]>; @@ -924,13 +947,13 @@ def STRB_POST: AI2stbpo<(outs GPR:$base_wb), let mayLoad = 1, hasExtraDefRegAllocReq = 1 in def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - LdStMulFrm, IIC_iLoadm, "ldm${p}${addr:submode} $addr, $wb", + LdStMulFrm, IIC_iLoadm, "ldm${p}${addr:submode}\t$addr, $wb", []>; let mayStore = 1, hasExtraSrcRegAllocReq = 1 in def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - LdStMulFrm, IIC_iStorem, "stm${p}${addr:submode} $addr, $wb", + LdStMulFrm, IIC_iStorem, "stm${p}${addr:submode}\t$addr, $wb", []>; //===----------------------------------------------------------------------===// @@ -939,14 +962,14 @@ def STM : AXI4st<(outs), let neverHasSideEffects = 1 in def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, - "mov", " $dst, $src", []>, UnaryDP { + "mov", "\t$dst, $src", []>, UnaryDP { let Inst{4} = 0; let Inst{25} = 0; } def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, IIC_iMOVsr, - "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP { + "mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP { let Inst{4} = 1; let Inst{7} = 0; let Inst{25} = 0; @@ -954,14 +977,14 @@ def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), let isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm, IIC_iMOVi, - "mov", " $dst, $src", [(set GPR:$dst, so_imm:$src)]>, UnaryDP { + "mov", "\t$dst, $src", [(set GPR:$dst, so_imm:$src)]>, UnaryDP { let Inst{25} = 1; } let isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src), DPFrm, IIC_iMOVi, - "movw", " $dst, $src", + "movw", "\t$dst, $src", [(set GPR:$dst, imm0_65535:$src)]>, Requires<[IsARM, HasV6T2]> { let Inst{20} = 0; @@ -971,7 +994,7 @@ def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src), let Constraints = "$src = $dst" in def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm), DPFrm, IIC_iMOVi, - "movt", " $dst, $imm", + "movt", "\t$dst, $imm", [(set GPR:$dst, (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>, UnaryDP, @@ -985,7 +1008,7 @@ def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>, let Uses = [CPSR] in def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi, - "mov", " $dst, $src, rrx", + "mov", "\t$dst, $src, rrx", [(set GPR:$dst, (ARMrrx GPR:$src))]>, UnaryDP; // These aren't really mov instructions, but we have to define them this way @@ -993,10 +1016,10 @@ def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi, let Defs = [CPSR] in { def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, - IIC_iMOVsi, "mov", "s $dst, $src, lsr #1", + IIC_iMOVsi, "mov", "s\t$dst, $src, lsr #1", [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP; def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, - IIC_iMOVsi, "mov", "s $dst, $src, asr #1", + IIC_iMOVsi, "mov", "s\t$dst, $src, asr #1", [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP; } @@ -1047,7 +1070,7 @@ defm UXTAH : AI_bin_rrot<0b01101111, "uxtah", def SBFX : I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi, - "sbfx", " $dst, $src, $lsb, $width", "", []>, + "sbfx", "\t$dst, $src, $lsb, $width", "", []>, Requires<[IsARM, HasV6T2]> { let Inst{27-21} = 0b0111101; let Inst{6-4} = 0b101; @@ -1056,7 +1079,7 @@ def SBFX : I<(outs GPR:$dst), def UBFX : I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi, - "ubfx", " $dst, $src, $lsb, $width", "", []>, + "ubfx", "\t$dst, $src, $lsb, $width", "", []>, Requires<[IsARM, HasV6T2]> { let Inst{27-21} = 0b0111111; let Inst{6-4} = 0b101; @@ -1084,52 +1107,72 @@ defm SBC : AI1_adde_sube_irs<0b0110, "sbc", // These don't define reg/reg forms, because they are handled above. def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, - IIC_iALUi, "rsb", " $dst, $a, $b", + IIC_iALUi, "rsb", "\t$dst, $a, $b", [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> { let Inst{25} = 1; } def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, - IIC_iALUsr, "rsb", " $dst, $a, $b", - [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]>; + IIC_iALUsr, "rsb", "\t$dst, $a, $b", + [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> { + let Inst{4} = 1; + let Inst{7} = 0; + let Inst{25} = 0; +} // RSB with 's' bit set. let Defs = [CPSR] in { def RSBSri : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, - IIC_iALUi, "rsb", "s $dst, $a, $b", + IIC_iALUi, "rsb", "s\t$dst, $a, $b", [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]> { + let Inst{20} = 1; let Inst{25} = 1; } def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, - IIC_iALUsr, "rsb", "s $dst, $a, $b", - [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]>; + IIC_iALUsr, "rsb", "s\t$dst, $a, $b", + [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]> { + let Inst{4} = 1; + let Inst{7} = 0; + let Inst{20} = 1; + let Inst{25} = 0; +} } let Uses = [CPSR] in { def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), - DPFrm, IIC_iALUi, "rsc", " $dst, $a, $b", + DPFrm, IIC_iALUi, "rsc", "\t$dst, $a, $b", [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>, Requires<[IsARM, CarryDefIsUnused]> { let Inst{25} = 1; } def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), - DPSoRegFrm, IIC_iALUsr, "rsc", " $dst, $a, $b", + DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b", [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>, - Requires<[IsARM, CarryDefIsUnused]>; + Requires<[IsARM, CarryDefIsUnused]> { + let Inst{4} = 1; + let Inst{7} = 0; + let Inst{25} = 0; +} } // FIXME: Allow these to be predicated. let Defs = [CPSR], Uses = [CPSR] in { def RSCSri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), - DPFrm, IIC_iALUi, "rscs $dst, $a, $b", + DPFrm, IIC_iALUi, "rscs\t$dst, $a, $b", [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>, Requires<[IsARM, CarryDefIsUnused]> { + let Inst{20} = 1; let Inst{25} = 1; } def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), - DPSoRegFrm, IIC_iALUsr, "rscs $dst, $a, $b", + DPSoRegFrm, IIC_iALUsr, "rscs\t$dst, $a, $b", [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>, - Requires<[IsARM, CarryDefIsUnused]>; + Requires<[IsARM, CarryDefIsUnused]> { + let Inst{4} = 1; + let Inst{7} = 0; + let Inst{20} = 1; + let Inst{25} = 0; +} } // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. @@ -1162,8 +1205,8 @@ defm BIC : AsI1_bin_irs<0b1110, "bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>; def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), - AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALUi, - "bfc", " $dst, $imm", "$src = $dst", + AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, + "bfc", "\t$dst, $imm", "$src = $dst", [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>, Requires<[IsARM, HasV6T2]> { let Inst{27-21} = 0b0111110; @@ -1171,19 +1214,19 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), } def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, - "mvn", " $dst, $src", + "mvn", "\t$dst, $src", [(set GPR:$dst, (not GPR:$src))]>, UnaryDP { let Inst{4} = 0; } def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, - IIC_iMOVsr, "mvn", " $dst, $src", + IIC_iMOVsr, "mvn", "\t$dst, $src", [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP { let Inst{4} = 1; let Inst{7} = 0; } let isReMaterializable = 1, isAsCheapAsAMove = 1 in def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, - IIC_iMOVi, "mvn", " $dst, $imm", + IIC_iMOVi, "mvn", "\t$dst, $imm", [(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP { let Inst{25} = 1; } @@ -1197,15 +1240,15 @@ def : ARMPat<(and GPR:$src, so_imm_not:$imm), let isCommutable = 1 in def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL32, "mul", " $dst, $a, $b", + IIC_iMUL32, "mul", "\t$dst, $a, $b", [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>; def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iMAC32, "mla", " $dst, $a, $b, $c", + IIC_iMAC32, "mla", "\t$dst, $a, $b, $c", [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>; def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iMAC32, "mls", " $dst, $a, $b, $c", + IIC_iMAC32, "mls", "\t$dst, $a, $b, $c", [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>, Requires<[IsARM, HasV6T2]>; @@ -1214,31 +1257,31 @@ let neverHasSideEffects = 1 in { let isCommutable = 1 in { def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, - "smull", " $ldst, $hdst, $a, $b", []>; + "smull", "\t$ldst, $hdst, $a, $b", []>; def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, - "umull", " $ldst, $hdst, $a, $b", []>; + "umull", "\t$ldst, $hdst, $a, $b", []>; } // Multiply + accumulate def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, - "smlal", " $ldst, $hdst, $a, $b", []>; + "smlal", "\t$ldst, $hdst, $a, $b", []>; def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, - "umlal", " $ldst, $hdst, $a, $b", []>; + "umlal", "\t$ldst, $hdst, $a, $b", []>; def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, - "umaal", " $ldst, $hdst, $a, $b", []>, + "umaal", "\t$ldst, $hdst, $a, $b", []>, Requires<[IsARM, HasV6]>; } // neverHasSideEffects // Most significant word multiply def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL32, "smmul", " $dst, $a, $b", + IIC_iMUL32, "smmul", "\t$dst, $a, $b", [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>, Requires<[IsARM, HasV6]> { let Inst{7-4} = 0b0001; @@ -1246,7 +1289,7 @@ def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), } def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iMAC32, "smmla", " $dst, $a, $b, $c", + IIC_iMAC32, "smmla", "\t$dst, $a, $b, $c", [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>, Requires<[IsARM, HasV6]> { let Inst{7-4} = 0b0001; @@ -1254,7 +1297,7 @@ def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), - IIC_iMAC32, "smmls", " $dst, $a, $b, $c", + IIC_iMAC32, "smmls", "\t$dst, $a, $b, $c", [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>, Requires<[IsARM, HasV6]> { let Inst{7-4} = 0b1101; @@ -1262,7 +1305,7 @@ def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), multiclass AI_smul<string opc, PatFrag opnode> { def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL32, !strconcat(opc, "bb"), " $dst, $a, $b", + IIC_iMUL32, !strconcat(opc, "bb"), "\t$dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sext_inreg GPR:$b, i16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1271,7 +1314,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { } def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL32, !strconcat(opc, "bt"), " $dst, $a, $b", + IIC_iMUL32, !strconcat(opc, "bt"), "\t$dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sra GPR:$b, (i32 16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1280,7 +1323,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { } def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL32, !strconcat(opc, "tb"), " $dst, $a, $b", + IIC_iMUL32, !strconcat(opc, "tb"), "\t$dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1289,7 +1332,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { } def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL32, !strconcat(opc, "tt"), " $dst, $a, $b", + IIC_iMUL32, !strconcat(opc, "tt"), "\t$dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sra GPR:$b, (i32 16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1298,7 +1341,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { } def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL16, !strconcat(opc, "wb"), " $dst, $a, $b", + IIC_iMUL16, !strconcat(opc, "wb"), "\t$dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, (sext_inreg GPR:$b, i16)), (i32 16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1307,7 +1350,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { } def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - IIC_iMUL16, !strconcat(opc, "wt"), " $dst, $a, $b", + IIC_iMUL16, !strconcat(opc, "wt"), "\t$dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, (sra GPR:$b, (i32 16))), (i32 16)))]>, Requires<[IsARM, HasV5TE]> { @@ -1319,7 +1362,7 @@ multiclass AI_smul<string opc, PatFrag opnode> { multiclass AI_smla<string opc, PatFrag opnode> { def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iMAC16, !strconcat(opc, "bb"), " $dst, $a, $b, $acc", + IIC_iMAC16, !strconcat(opc, "bb"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), (sext_inreg GPR:$b, i16))))]>, @@ -1329,7 +1372,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { } def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iMAC16, !strconcat(opc, "bt"), " $dst, $a, $b, $acc", + IIC_iMAC16, !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), (sra GPR:$b, (i32 16)))))]>, Requires<[IsARM, HasV5TE]> { @@ -1338,7 +1381,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { } def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iMAC16, !strconcat(opc, "tb"), " $dst, $a, $b, $acc", + IIC_iMAC16, !strconcat(opc, "tb"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1347,16 +1390,16 @@ multiclass AI_smla<string opc, PatFrag opnode> { } def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iMAC16, !strconcat(opc, "tt"), " $dst, $a, $b, $acc", - [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), - (sra GPR:$b, (i32 16)))))]>, + IIC_iMAC16, !strconcat(opc, "tt"), "\t$dst, $a, $b, $acc", + [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), + (sra GPR:$b, (i32 16)))))]>, Requires<[IsARM, HasV5TE]> { let Inst{5} = 1; let Inst{6} = 1; } def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iMAC16, !strconcat(opc, "wb"), " $dst, $a, $b, $acc", + IIC_iMAC16, !strconcat(opc, "wb"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, (sext_inreg GPR:$b, i16)), (i32 16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1365,7 +1408,7 @@ multiclass AI_smla<string opc, PatFrag opnode> { } def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), - IIC_iMAC16, !strconcat(opc, "wt"), " $dst, $a, $b, $acc", + IIC_iMAC16, !strconcat(opc, "wt"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, (sra GPR:$b, (i32 16))), (i32 16))))]>, Requires<[IsARM, HasV5TE]> { @@ -1385,7 +1428,7 @@ defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; // def CLZ : AMiscA1I<0b000010110, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "clz", " $dst, $src", + "clz", "\t$dst, $src", [(set GPR:$dst, (ctlz GPR:$src))]>, Requires<[IsARM, HasV5T]> { let Inst{7-4} = 0b0001; let Inst{11-8} = 0b1111; @@ -1393,7 +1436,7 @@ def CLZ : AMiscA1I<0b000010110, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, } def REV : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "rev", " $dst, $src", + "rev", "\t$dst, $src", [(set GPR:$dst, (bswap GPR:$src))]>, Requires<[IsARM, HasV6]> { let Inst{7-4} = 0b0011; let Inst{11-8} = 0b1111; @@ -1401,7 +1444,7 @@ def REV : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, } def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "rev16", " $dst, $src", + "rev16", "\t$dst, $src", [(set GPR:$dst, (or (and (srl GPR:$src, (i32 8)), 0xFF), (or (and (shl GPR:$src, (i32 8)), 0xFF00), @@ -1414,7 +1457,7 @@ def REV16 : AMiscA1I<0b01101011, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, } def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "revsh", " $dst, $src", + "revsh", "\t$dst, $src", [(set GPR:$dst, (sext_inreg (or (srl (and GPR:$src, 0xFF00), (i32 8)), @@ -1427,7 +1470,7 @@ def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhbt", " $dst, $src1, $src2, LSL $shamt", + IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, LSL $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), (and (shl GPR:$src2, (i32 imm:$shamt)), 0xFFFF0000)))]>, @@ -1444,7 +1487,7 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhtb", " $dst, $src1, $src2, ASR $shamt", + IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, ASR $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), (and (sra GPR:$src2, imm16_31:$shamt), 0xFFFF)))]>, Requires<[IsARM, HasV6]> { @@ -1490,7 +1533,7 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm), // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm, - IIC_iCMOVr, "mov", " $dst, $true", + IIC_iCMOVr, "mov", "\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $dst">, UnaryDP { let Inst{4} = 0; @@ -1499,7 +1542,7 @@ def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm, def MOVCCs : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, so_reg:$true), DPSoRegFrm, IIC_iCMOVsr, - "mov", " $dst, $true", + "mov", "\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $dst">, UnaryDP { let Inst{4} = 1; @@ -1509,7 +1552,7 @@ def MOVCCs : AI1<0b1101, (outs GPR:$dst), def MOVCCi : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, so_imm:$true), DPFrm, IIC_iCMOVi, - "mov", " $dst, $true", + "mov", "\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, so_imm:$true, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $dst">, UnaryDP { let Inst{25} = 1; @@ -1524,7 +1567,7 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst), let isCall = 1, Defs = [R0, R12, LR, CPSR] in { def TPsoft : ABXI<0b1011, (outs), (ins), IIC_Br, - "bl __aeabi_read_tp", + "bl\t__aeabi_read_tp", [(set R0, ARMthread_pointer)]>; } @@ -1548,12 +1591,12 @@ let Defs = def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src), AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, NoItinerary, - "str sp, [$src, #+8] @ eh_setjmp begin\n\t" - "add r12, pc, #8\n\t" - "str r12, [$src, #+4]\n\t" - "mov r0, #0\n\t" - "add pc, pc, #0\n\t" - "mov r0, #1 @ eh_setjmp end", "", + "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t" + "add\tr12, pc, #8\n\t" + "str\tr12, [$src, #+4]\n\t" + "mov\tr0, #0\n\t" + "add\tpc, pc, #0\n\t" + "mov\tr0, #1 @ eh_setjmp end", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>; } @@ -1573,7 +1616,7 @@ def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), let isReMaterializable = 1 in def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src), Pseudo, IIC_iMOVi, - "mov", " $dst, $src", + "mov", "\t$dst, $src", [(set GPR:$dst, so_imm2part:$src)]>, Requires<[IsARM, NoV6T2]>; @@ -1596,7 +1639,7 @@ def : ARMPat<(sub GPR:$LHS, so_imm2part:$RHS), // FIXME: Remove this when we can do generalized remat. let isReMaterializable = 1 in def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi, - "movw", " $dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}", + "movw", "\t$dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}", [(set GPR:$dst, (i32 imm:$src))]>, Requires<[IsARM, HasV6T2]>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 822950c52836e..25c4acd095a32 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -2409,10 +2409,10 @@ def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane))>; def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2), - (EXTRACT_SUBREG (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2), + (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1), DPR_VFP2)), (SSubReg_f32_reg imm:$src2))>; def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2), - (EXTRACT_SUBREG (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2), + (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1), QPR_VFP2)), (SSubReg_f32_reg imm:$src2))>; //def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2), // (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>; @@ -2459,11 +2459,11 @@ def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane), (DSubReg_i32_reg imm:$lane)))>; def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)), - (INSERT_SUBREG (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2), - SPR:$src2, (SSubReg_f32_reg imm:$src3))>; + (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)), + SPR:$src2, (SSubReg_f32_reg imm:$src3))>; def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)), - (INSERT_SUBREG (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2), - SPR:$src2, (SSubReg_f32_reg imm:$src3))>; + (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)), + SPR:$src2, (SSubReg_f32_reg imm:$src3))>; //def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)), // (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>; @@ -2841,13 +2841,16 @@ def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul,1>; def : N3VDsPat<fmul, VMULfd_sfp>; // Vector Multiply-Accumulate/Subtract used for single-precision FP -let neverHasSideEffects = 1 in -def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>; -def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>; +// vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so +// we want to avoid them for now. e.g., alternating vmla/vadd instructions. -let neverHasSideEffects = 1 in -def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>; -def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>; +//let neverHasSideEffects = 1 in +//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>; +//def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>; + +//let neverHasSideEffects = 1 in +//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>; +//def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>; // Vector Absolute used for single-precision FP let neverHasSideEffects = 1 in diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 9816addf7d6ac..5d0292593ba13 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -130,61 +130,67 @@ PseudoInst<(outs), (ins i32imm:$amt), NoItinerary, // For both thumb1 and thumb2. let isNotDuplicable = 1 in def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, - "\n$cp:\n\tadd $dst, pc", + "\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>; // PC relative add. def tADDrPCi : T1I<(outs tGPR:$dst), (ins i32imm:$rhs), IIC_iALUi, - "add $dst, pc, $rhs * 4", []>; + "add\t$dst, pc, $rhs * 4", []>; // ADD rd, sp, #imm8 def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs), IIC_iALUi, - "add $dst, $sp, $rhs * 4", []>; + "add\t$dst, $sp, $rhs * 4", []>; // ADD sp, sp, #imm7 def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALUi, - "add $dst, $rhs * 4", []>; + "add\t$dst, $rhs * 4", []>; // SUB sp, sp, #imm7 def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALUi, - "sub $dst, $rhs * 4", []>; + "sub\t$dst, $rhs * 4", []>; // ADD rm, sp def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, - "add $dst, $rhs", []>; + "add\t$dst, $rhs", []>; // ADD sp, rm def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, - "add $dst, $rhs", []>; + "add\t$dst, $rhs", []>; // Pseudo instruction that will expand into a tSUBspi + a copy. -let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. +let usesCustomInserter = 1 in { // Expanded after instruction selection. def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), - NoItinerary, "@ sub $dst, $rhs * 4", []>; + NoItinerary, "@ sub\t$dst, $rhs * 4", []>; def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), - NoItinerary, "@ add $dst, $rhs", []>; + NoItinerary, "@ add\t$dst, $rhs", []>; let Defs = [CPSR] in def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), - NoItinerary, "@ and $dst, $rhs", []>; -} // usesCustomDAGSchedInserter + NoItinerary, "@ and\t$dst, $rhs", []>; +} // usesCustomInserter //===----------------------------------------------------------------------===// // Control Flow Instructions. // let isReturn = 1, isTerminator = 1, isBarrier = 1 in { - def tBX_RET : TI<(outs), (ins), IIC_Br, "bx lr", [(ARMretflag)]>; + def tBX_RET : TI<(outs), (ins), IIC_Br, "bx\tlr", [(ARMretflag)]>; // Alternative return instruction used by vararg functions. - def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx $target", []>; + def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx\t$target", []>; +} + +// Indirect branches +let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { + def tBRIND : TI<(outs), (ins GPR:$dst), IIC_Br, "mov\tpc, $dst", + [(brind GPR:$dst)]>; } // FIXME: remove when we have a way to marking a MI with these properties. let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1 in def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br, - "pop${p} $wb", []>; + "pop${p}\t$wb", []>; let isCall = 1, Defs = [R0, R1, R2, R3, R12, LR, @@ -193,25 +199,25 @@ let isCall = 1, D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in { // Also used for Thumb2 def tBL : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, - "bl ${func:call}", + "bl\t${func:call}", [(ARMtcall tglobaladdr:$func)]>, Requires<[IsThumb, IsNotDarwin]>; // ARMv5T and above, also used for Thumb2 def tBLXi : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, - "blx ${func:call}", + "blx\t${func:call}", [(ARMcall tglobaladdr:$func)]>, Requires<[IsThumb, HasV5T, IsNotDarwin]>; // Also used for Thumb2 def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, - "blx $func", + "blx\t$func", [(ARMtcall GPR:$func)]>, Requires<[IsThumb, HasV5T, IsNotDarwin]>; // ARMv4T def tBX : TIx2<(outs), (ins tGPR:$func, variable_ops), IIC_Br, - "mov lr, pc\n\tbx $func", + "mov\tlr, pc\n\tbx\t$func", [(ARMcall_nolink tGPR:$func)]>, Requires<[IsThumb1Only, IsNotDarwin]>; } @@ -224,25 +230,25 @@ let isCall = 1, D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in { // Also used for Thumb2 def tBLr9 : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, - "bl ${func:call}", + "bl\t${func:call}", [(ARMtcall tglobaladdr:$func)]>, Requires<[IsThumb, IsDarwin]>; // ARMv5T and above, also used for Thumb2 def tBLXi_r9 : TIx2<(outs), (ins i32imm:$func, variable_ops), IIC_Br, - "blx ${func:call}", + "blx\t${func:call}", [(ARMcall tglobaladdr:$func)]>, Requires<[IsThumb, HasV5T, IsDarwin]>; // Also used for Thumb2 def tBLXr_r9 : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br, - "blx $func", + "blx\t$func", [(ARMtcall GPR:$func)]>, Requires<[IsThumb, HasV5T, IsDarwin]>; // ARMv4T def tBXr9 : TIx2<(outs), (ins tGPR:$func, variable_ops), IIC_Br, - "mov lr, pc\n\tbx $func", + "mov\tlr, pc\n\tbx\t$func", [(ARMcall_nolink tGPR:$func)]>, Requires<[IsThumb1Only, IsDarwin]>; } @@ -251,16 +257,16 @@ let isBranch = 1, isTerminator = 1 in { let isBarrier = 1 in { let isPredicable = 1 in def tB : T1I<(outs), (ins brtarget:$target), IIC_Br, - "b $target", [(br bb:$target)]>; + "b\t$target", [(br bb:$target)]>; // Far jump let Defs = [LR] in def tBfar : TIx2<(outs), (ins brtarget:$target), IIC_Br, - "bl $target\t@ far jump",[]>; + "bl\t$target\t@ far jump",[]>; def tBR_JTr : T1JTI<(outs), (ins tGPR:$target, jtblock_operand:$jt, i32imm:$id), - IIC_Br, "mov pc, $target\n\t.align\t2\n$jt", + IIC_Br, "mov\tpc, $target\n\t.align\t2\n$jt", [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>; } } @@ -269,79 +275,89 @@ let isBranch = 1, isTerminator = 1 in { // a two-value operand where a dag node expects two operands. :( let isBranch = 1, isTerminator = 1 in def tBcc : T1I<(outs), (ins brtarget:$target, pred:$cc), IIC_Br, - "b$cc $target", + "b$cc\t$target", [/*(ARMbrcond bb:$target, imm:$cc)*/]>; +// Compare and branch on zero / non-zero +let isBranch = 1, isTerminator = 1 in { + def tCBZ : T1I<(outs), (ins tGPR:$cmp, brtarget:$target), IIC_Br, + "cbz\t$cmp, $target", []>; + + def tCBNZ : T1I<(outs), (ins tGPR:$cmp, brtarget:$target), IIC_Br, + "cbnz\t$cmp, $target", []>; +} + //===----------------------------------------------------------------------===// // Load Store Instructions. // let canFoldAsLoad = 1 in def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, - "ldr", " $dst, $addr", + "ldr", "\t$dst, $addr", [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>; def tLDRB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_s1:$addr), IIC_iLoadr, - "ldrb", " $dst, $addr", + "ldrb", "\t$dst, $addr", [(set tGPR:$dst, (zextloadi8 t_addrmode_s1:$addr))]>; def tLDRH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_s2:$addr), IIC_iLoadr, - "ldrh", " $dst, $addr", + "ldrh", "\t$dst, $addr", [(set tGPR:$dst, (zextloadi16 t_addrmode_s2:$addr))]>; let AddedComplexity = 10 in def tLDRSB : T1pI1<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr, - "ldrsb", " $dst, $addr", + "ldrsb", "\t$dst, $addr", [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>; let AddedComplexity = 10 in def tLDRSH : T1pI2<(outs tGPR:$dst), (ins t_addrmode_rr:$addr), IIC_iLoadr, - "ldrsh", " $dst, $addr", + "ldrsh", "\t$dst, $addr", [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>; let canFoldAsLoad = 1 in def tLDRspi : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, - "ldr", " $dst, $addr", + "ldr", "\t$dst, $addr", [(set tGPR:$dst, (load t_addrmode_sp:$addr))]>; // Special instruction for restore. It cannot clobber condition register // when it's expanded by eliminateCallFramePseudoInstr(). let canFoldAsLoad = 1, mayLoad = 1 in def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, - "ldr", " $dst, $addr", []>; + "ldr", "\t$dst, $addr", []>; // Load tconstpool +// FIXME: Use ldr.n to work around a Darwin assembler bug. let canFoldAsLoad = 1 in def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, - "ldr", " $dst, $addr", + "ldr", ".n\t$dst, $addr", [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>; // Special LDR for loads from non-pc-relative constpools. let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, - "ldr", " $dst, $addr", []>; + "ldr", "\t$dst, $addr", []>; def tSTR : T1pI4<(outs), (ins tGPR:$src, t_addrmode_s4:$addr), IIC_iStorer, - "str", " $src, $addr", + "str", "\t$src, $addr", [(store tGPR:$src, t_addrmode_s4:$addr)]>; def tSTRB : T1pI1<(outs), (ins tGPR:$src, t_addrmode_s1:$addr), IIC_iStorer, - "strb", " $src, $addr", + "strb", "\t$src, $addr", [(truncstorei8 tGPR:$src, t_addrmode_s1:$addr)]>; def tSTRH : T1pI2<(outs), (ins tGPR:$src, t_addrmode_s2:$addr), IIC_iStorer, - "strh", " $src, $addr", + "strh", "\t$src, $addr", [(truncstorei16 tGPR:$src, t_addrmode_s2:$addr)]>; def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, - "str", " $src, $addr", + "str", "\t$src, $addr", [(store tGPR:$src, t_addrmode_sp:$addr)]>; let mayStore = 1 in { // Special instruction for spill. It cannot clobber condition register // when it's expanded by eliminateCallFramePseudoInstr(). def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei, - "str", " $src, $addr", []>; + "str", "\t$src, $addr", []>; } //===----------------------------------------------------------------------===// @@ -353,21 +369,21 @@ let mayLoad = 1, hasExtraDefRegAllocReq = 1 in def tLDM : T1I<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), IIC_iLoadm, - "ldm${addr:submode}${p} $addr, $wb", []>; + "ldm${addr:submode}${p}\t$addr, $wb", []>; let mayStore = 1, hasExtraSrcRegAllocReq = 1 in def tSTM : T1I<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), IIC_iStorem, - "stm${addr:submode}${p} $addr, $wb", []>; + "stm${addr:submode}${p}\t$addr, $wb", []>; let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in def tPOP : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br, - "pop${p} $wb", []>; + "pop${p}\t$wb", []>; let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in def tPUSH : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br, - "push${p} $wb", []>; + "push${p}\t$wb", []>; //===----------------------------------------------------------------------===// // Arithmetic Instructions. @@ -376,66 +392,66 @@ def tPUSH : T1I<(outs), (ins pred:$p, reglist:$wb, variable_ops), IIC_Br, // Add with carry register let isCommutable = 1, Uses = [CPSR] in def tADC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, - "adc", " $dst, $rhs", + "adc", "\t$dst, $rhs", [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>; // Add immediate def tADDi3 : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi, - "add", " $dst, $lhs, $rhs", + "add", "\t$dst, $lhs, $rhs", [(set tGPR:$dst, (add tGPR:$lhs, imm0_7:$rhs))]>; def tADDi8 : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi, - "add", " $dst, $rhs", + "add", "\t$dst, $rhs", [(set tGPR:$dst, (add tGPR:$lhs, imm8_255:$rhs))]>; // Add register let isCommutable = 1 in def tADDrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, - "add", " $dst, $lhs, $rhs", + "add", "\t$dst, $lhs, $rhs", [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>; let neverHasSideEffects = 1 in def tADDhirr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, - "add", " $dst, $rhs", []>; + "add", "\t$dst, $rhs", []>; // And register let isCommutable = 1 in def tAND : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, - "and", " $dst, $rhs", + "and", "\t$dst, $rhs", [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>; // ASR immediate def tASRri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi, - "asr", " $dst, $lhs, $rhs", + "asr", "\t$dst, $lhs, $rhs", [(set tGPR:$dst, (sra tGPR:$lhs, (i32 imm:$rhs)))]>; // ASR register def tASRrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr, - "asr", " $dst, $rhs", + "asr", "\t$dst, $rhs", [(set tGPR:$dst, (sra tGPR:$lhs, tGPR:$rhs))]>; // BIC register def tBIC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, - "bic", " $dst, $rhs", + "bic", "\t$dst, $rhs", [(set tGPR:$dst, (and tGPR:$lhs, (not tGPR:$rhs)))]>; // CMN register let Defs = [CPSR] in { def tCMN : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, - "cmn", " $lhs, $rhs", + "cmn", "\t$lhs, $rhs", [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>; def tCMNZ : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, - "cmn", " $lhs, $rhs", + "cmn", "\t$lhs, $rhs", [(ARMcmpZ tGPR:$lhs, (ineg tGPR:$rhs))]>; } // CMP immediate let Defs = [CPSR] in { def tCMPi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMPi, - "cmp", " $lhs, $rhs", + "cmp", "\t$lhs, $rhs", [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>; def tCMPzi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMPi, - "cmp", " $lhs, $rhs", + "cmp", "\t$lhs, $rhs", [(ARMcmpZ tGPR:$lhs, imm0_255:$rhs)]>; } @@ -443,48 +459,48 @@ def tCMPzi8 : T1pI<(outs), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMPi, // CMP register let Defs = [CPSR] in { def tCMPr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, - "cmp", " $lhs, $rhs", + "cmp", "\t$lhs, $rhs", [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>; def tCMPzr : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, - "cmp", " $lhs, $rhs", + "cmp", "\t$lhs, $rhs", [(ARMcmpZ tGPR:$lhs, tGPR:$rhs)]>; def tCMPhir : T1pI<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr, - "cmp", " $lhs, $rhs", []>; + "cmp", "\t$lhs, $rhs", []>; def tCMPzhir : T1pI<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr, - "cmp", " $lhs, $rhs", []>; + "cmp", "\t$lhs, $rhs", []>; } // XOR register let isCommutable = 1 in def tEOR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, - "eor", " $dst, $rhs", + "eor", "\t$dst, $rhs", [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>; // LSL immediate def tLSLri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi, - "lsl", " $dst, $lhs, $rhs", + "lsl", "\t$dst, $lhs, $rhs", [(set tGPR:$dst, (shl tGPR:$lhs, (i32 imm:$rhs)))]>; // LSL register def tLSLrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr, - "lsl", " $dst, $rhs", + "lsl", "\t$dst, $rhs", [(set tGPR:$dst, (shl tGPR:$lhs, tGPR:$rhs))]>; // LSR immediate def tLSRri : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iMOVsi, - "lsr", " $dst, $lhs, $rhs", + "lsr", "\t$dst, $lhs, $rhs", [(set tGPR:$dst, (srl tGPR:$lhs, (i32 imm:$rhs)))]>; // LSR register def tLSRrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr, - "lsr", " $dst, $rhs", + "lsr", "\t$dst, $rhs", [(set tGPR:$dst, (srl tGPR:$lhs, tGPR:$rhs))]>; // move register def tMOVi8 : T1sI<(outs tGPR:$dst), (ins i32imm:$src), IIC_iMOVi, - "mov", " $dst, $src", + "mov", "\t$dst, $src", [(set tGPR:$dst, imm0_255:$src)]>; // TODO: A7-73: MOV(2) - mov setting flag. @@ -493,45 +509,45 @@ def tMOVi8 : T1sI<(outs tGPR:$dst), (ins i32imm:$src), IIC_iMOVi, let neverHasSideEffects = 1 in { // FIXME: Make this predicable. def tMOVr : T1I<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr, - "mov $dst, $src", []>; + "mov\t$dst, $src", []>; let Defs = [CPSR] in def tMOVSr : T1I<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr, - "movs $dst, $src", []>; + "movs\t$dst, $src", []>; // FIXME: Make these predicable. def tMOVgpr2tgpr : T1I<(outs tGPR:$dst), (ins GPR:$src), IIC_iMOVr, - "mov $dst, $src", []>; + "mov\t$dst, $src", []>; def tMOVtgpr2gpr : T1I<(outs GPR:$dst), (ins tGPR:$src), IIC_iMOVr, - "mov $dst, $src", []>; + "mov\t$dst, $src", []>; def tMOVgpr2gpr : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, - "mov $dst, $src", []>; + "mov\t$dst, $src", []>; } // neverHasSideEffects // multiply register let isCommutable = 1 in def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMUL32, - "mul", " $dst, $rhs", + "mul", "\t$dst, $rhs", [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>; // move inverse register def tMVN : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iMOVr, - "mvn", " $dst, $src", + "mvn", "\t$dst, $src", [(set tGPR:$dst, (not tGPR:$src))]>; // bitwise or register let isCommutable = 1 in def tORR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, - "orr", " $dst, $rhs", + "orr", "\t$dst, $rhs", [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>; // swaps def tREV : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, - "rev", " $dst, $src", + "rev", "\t$dst, $src", [(set tGPR:$dst, (bswap tGPR:$src))]>, Requires<[IsThumb1Only, HasV6]>; def tREV16 : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, - "rev16", " $dst, $src", + "rev16", "\t$dst, $src", [(set tGPR:$dst, (or (and (srl tGPR:$src, (i32 8)), 0xFF), (or (and (shl tGPR:$src, (i32 8)), 0xFF00), @@ -540,7 +556,7 @@ def tREV16 : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, Requires<[IsThumb1Only, HasV6]>; def tREVSH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, - "revsh", " $dst, $src", + "revsh", "\t$dst, $src", [(set tGPR:$dst, (sext_inreg (or (srl (and tGPR:$src, 0xFF00), (i32 8)), @@ -549,70 +565,70 @@ def tREVSH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, // rotate right register def tROR : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr, - "ror", " $dst, $rhs", + "ror", "\t$dst, $rhs", [(set tGPR:$dst, (rotr tGPR:$lhs, tGPR:$rhs))]>; // negate register def tRSB : T1sI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iALUi, - "rsb", " $dst, $src, #0", + "rsb", "\t$dst, $src, #0", [(set tGPR:$dst, (ineg tGPR:$src))]>; // Subtract with carry register let Uses = [CPSR] in def tSBC : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, - "sbc", " $dst, $rhs", + "sbc", "\t$dst, $rhs", [(set tGPR:$dst, (sube tGPR:$lhs, tGPR:$rhs))]>; // Subtract immediate def tSUBi3 : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi, - "sub", " $dst, $lhs, $rhs", + "sub", "\t$dst, $lhs, $rhs", [(set tGPR:$dst, (add tGPR:$lhs, imm0_7_neg:$rhs))]>; def tSUBi8 : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iALUi, - "sub", " $dst, $rhs", + "sub", "\t$dst, $rhs", [(set tGPR:$dst, (add tGPR:$lhs, imm8_255_neg:$rhs))]>; // subtract register def tSUBrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALUr, - "sub", " $dst, $lhs, $rhs", + "sub", "\t$dst, $lhs, $rhs", [(set tGPR:$dst, (sub tGPR:$lhs, tGPR:$rhs))]>; // TODO: A7-96: STMIA - store multiple. // sign-extend byte def tSXTB : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, - "sxtb", " $dst, $src", + "sxtb", "\t$dst, $src", [(set tGPR:$dst, (sext_inreg tGPR:$src, i8))]>, Requires<[IsThumb1Only, HasV6]>; // sign-extend short def tSXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, - "sxth", " $dst, $src", + "sxth", "\t$dst, $src", [(set tGPR:$dst, (sext_inreg tGPR:$src, i16))]>, Requires<[IsThumb1Only, HasV6]>; // test let isCommutable = 1, Defs = [CPSR] in def tTST : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iCMPr, - "tst", " $lhs, $rhs", + "tst", "\t$lhs, $rhs", [(ARMcmpZ (and tGPR:$lhs, tGPR:$rhs), 0)]>; // zero-extend byte def tUXTB : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, - "uxtb", " $dst, $src", + "uxtb", "\t$dst, $src", [(set tGPR:$dst, (and tGPR:$src, 0xFF))]>, Requires<[IsThumb1Only, HasV6]>; // zero-extend short def tUXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, - "uxth", " $dst, $src", + "uxth", "\t$dst, $src", [(set tGPR:$dst, (and tGPR:$src, 0xFFFF))]>, Requires<[IsThumb1Only, HasV6]>; // Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation. -// Expanded by the scheduler into a branch sequence. -let usesCustomDAGSchedInserter = 1 in // Expanded by the scheduler. +// Expanded after instruction selection into a branch sequence. +let usesCustomInserter = 1 in // Expanded after instruction selection. def tMOVCCr_pseudo : PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc), NoItinerary, "@ tMOVCCr $cc", @@ -621,19 +637,19 @@ let usesCustomDAGSchedInserter = 1 in // Expanded by the scheduler. // 16-bit movcc in IT blocks for Thumb2. def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr, - "mov", " $dst, $rhs", []>; + "mov", "\t$dst, $rhs", []>; def tMOVCCi : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iCMOVi, - "mov", " $dst, $rhs", []>; + "mov", "\t$dst, $rhs", []>; // tLEApcrel - Load a pc-relative address into a register without offending the // assembler. def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, - "adr$p $dst, #$label", []>; + "adr$p\t$dst, #$label", []>; def tLEApcrelJT : T1I<(outs tGPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), - IIC_iALUi, "adr$p $dst, #${label}_${id}", []>; + IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []>; //===----------------------------------------------------------------------===// // TLS Instructions @@ -643,7 +659,7 @@ def tLEApcrelJT : T1I<(outs tGPR:$dst), let isCall = 1, Defs = [R0, LR] in { def tTPsoft : TIx2<(outs), (ins), IIC_Br, - "bl __aeabi_read_tp", + "bl\t__aeabi_read_tp", [(set R0, ARMthread_pointer)]>; } diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 2b6fa98ed3c32..5bfda370b9337 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -153,18 +153,18 @@ def t2addrmode_so_reg : Operand<i32>, multiclass T2I_un_irs<string opc, PatFrag opnode, bit Cheap = 0, bit ReMat = 0>{ // shifted imm def i : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, - opc, " $dst, $src", + opc, "\t$dst, $src", [(set GPR:$dst, (opnode t2_so_imm:$src))]> { let isAsCheapAsAMove = Cheap; let isReMaterializable = ReMat; } // register def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, - opc, ".w $dst, $src", + opc, ".w\t$dst, $src", [(set GPR:$dst, (opnode GPR:$src))]>; // shifted register def s : T2I<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi, - opc, ".w $dst, $src", + opc, ".w\t$dst, $src", [(set GPR:$dst, (opnode t2_so_reg:$src))]>; } @@ -175,17 +175,17 @@ multiclass T2I_bin_irs<string opc, PatFrag opnode, bit Commutable = 0, string wide =""> { // shifted imm def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, - opc, " $dst, $lhs, $rhs", + opc, "\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>; // register def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, - opc, !strconcat(wide, " $dst, $lhs, $rhs"), + opc, !strconcat(wide, "\t$dst, $lhs, $rhs"), [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { let isCommutable = Commutable; } // shifted register def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, - opc, !strconcat(wide, " $dst, $lhs, $rhs"), + opc, !strconcat(wide, "\t$dst, $lhs, $rhs"), [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>; } @@ -200,11 +200,11 @@ multiclass T2I_bin_w_irs<string opc, PatFrag opnode, bit Commutable = 0> : multiclass T2I_rbin_is<string opc, PatFrag opnode> { // shifted imm def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi, - opc, ".w $dst, $rhs, $lhs", + opc, ".w\t$dst, $rhs, $lhs", [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>; // shifted register def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi, - opc, " $dst, $rhs, $lhs", + opc, "\t$dst, $rhs, $lhs", [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>; } @@ -214,17 +214,17 @@ let Defs = [CPSR] in { multiclass T2I_bin_s_irs<string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, - !strconcat(opc, "s"), ".w $dst, $lhs, $rhs", + !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>; // register def rr : T2I<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, - !strconcat(opc, "s"), ".w $dst, $lhs, $rhs", + !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { let isCommutable = Commutable; } // shifted register def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, - !strconcat(opc, "s"), ".w $dst, $lhs, $rhs", + !strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>; } } @@ -234,21 +234,21 @@ multiclass T2I_bin_s_irs<string opc, PatFrag opnode, bit Commutable = 0> { multiclass T2I_bin_ii12rs<string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, - opc, ".w $dst, $lhs, $rhs", + opc, ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>; // 12-bit imm def ri12 : T2sI<(outs GPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi, - !strconcat(opc, "w"), " $dst, $lhs, $rhs", + !strconcat(opc, "w"), "\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]>; // register def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, - opc, ".w $dst, $lhs, $rhs", + opc, ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> { let isCommutable = Commutable; } // shifted register def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, - opc, ".w $dst, $lhs, $rhs", + opc, ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>; } @@ -259,32 +259,32 @@ let Uses = [CPSR] in { multiclass T2I_adde_sube_irs<string opc, PatFrag opnode, bit Commutable = 0> { // shifted imm def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, - opc, " $dst, $lhs, $rhs", + opc, "\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, Requires<[IsThumb2, CarryDefIsUnused]>; // register def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, - opc, ".w $dst, $lhs, $rhs", + opc, ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, Requires<[IsThumb2, CarryDefIsUnused]> { let isCommutable = Commutable; } // shifted register def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, - opc, ".w $dst, $lhs, $rhs", + opc, ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, Requires<[IsThumb2, CarryDefIsUnused]>; // Carry setting variants // shifted imm def Sri : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi, - !strconcat(opc, "s $dst, $lhs, $rhs"), + !strconcat(opc, "s\t$dst, $lhs, $rhs"), [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>, Requires<[IsThumb2, CarryDefIsUsed]> { let Defs = [CPSR]; } // register def Srr : T2XI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, - !strconcat(opc, "s.w $dst, $lhs, $rhs"), + !strconcat(opc, "s.w\t$dst, $lhs, $rhs"), [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>, Requires<[IsThumb2, CarryDefIsUsed]> { let Defs = [CPSR]; @@ -292,7 +292,7 @@ multiclass T2I_adde_sube_irs<string opc, PatFrag opnode, bit Commutable = 0> { } // shifted register def Srs : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi, - !strconcat(opc, "s.w $dst, $lhs, $rhs"), + !strconcat(opc, "s.w\t$dst, $lhs, $rhs"), [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>, Requires<[IsThumb2, CarryDefIsUsed]> { let Defs = [CPSR]; @@ -306,12 +306,12 @@ multiclass T2I_rbin_s_is<string opc, PatFrag opnode> { // shifted imm def ri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs, cc_out:$s), IIC_iALUi, - !strconcat(opc, "${s}.w $dst, $rhs, $lhs"), + !strconcat(opc, "${s}.w\t$dst, $rhs, $lhs"), [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>; // shifted register def rs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs, cc_out:$s), IIC_iALUsi, - !strconcat(opc, "${s} $dst, $rhs, $lhs"), + !strconcat(opc, "${s}\t$dst, $rhs, $lhs"), [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>; } } @@ -321,11 +321,11 @@ multiclass T2I_rbin_s_is<string opc, PatFrag opnode> { multiclass T2I_sh_ir<string opc, PatFrag opnode> { // 5-bit imm def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iMOVsi, - opc, ".w $dst, $lhs, $rhs", + opc, ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, imm1_31:$rhs))]>; // register def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iMOVsr, - opc, ".w $dst, $lhs, $rhs", + opc, ".w\t$dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>; } @@ -336,15 +336,15 @@ let Defs = [CPSR] in { multiclass T2I_cmp_is<string opc, PatFrag opnode> { // shifted imm def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iCMPi, - opc, ".w $lhs, $rhs", + opc, ".w\t$lhs, $rhs", [(opnode GPR:$lhs, t2_so_imm:$rhs)]>; // register def rr : T2I<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr, - opc, ".w $lhs, $rhs", + opc, ".w\t$lhs, $rhs", [(opnode GPR:$lhs, GPR:$rhs)]>; // shifted register def rs : T2I<(outs), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iCMPsi, - opc, ".w $lhs, $rhs", + opc, ".w\t$lhs, $rhs", [(opnode GPR:$lhs, t2_so_reg:$rhs)]>; } } @@ -352,42 +352,44 @@ multiclass T2I_cmp_is<string opc, PatFrag opnode> { /// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns. multiclass T2I_ld<string opc, PatFrag opnode> { def i12 : T2Ii12<(outs GPR:$dst), (ins t2addrmode_imm12:$addr), IIC_iLoadi, - opc, ".w $dst, $addr", + opc, ".w\t$dst, $addr", [(set GPR:$dst, (opnode t2addrmode_imm12:$addr))]>; def i8 : T2Ii8 <(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi, - opc, " $dst, $addr", + opc, "\t$dst, $addr", [(set GPR:$dst, (opnode t2addrmode_imm8:$addr))]>; def s : T2Iso <(outs GPR:$dst), (ins t2addrmode_so_reg:$addr), IIC_iLoadr, - opc, ".w $dst, $addr", + opc, ".w\t$dst, $addr", [(set GPR:$dst, (opnode t2addrmode_so_reg:$addr))]>; def pci : T2Ipc <(outs GPR:$dst), (ins i32imm:$addr), IIC_iLoadi, - opc, ".w $dst, $addr", - [(set GPR:$dst, (opnode (ARMWrapper tconstpool:$addr)))]>; + opc, ".w\t$dst, $addr", + [(set GPR:$dst, (opnode (ARMWrapper tconstpool:$addr)))]> { + let isReMaterializable = 1; + } } /// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns. multiclass T2I_st<string opc, PatFrag opnode> { def i12 : T2Ii12<(outs), (ins GPR:$src, t2addrmode_imm12:$addr), IIC_iStorei, - opc, ".w $src, $addr", + opc, ".w\t$src, $addr", [(opnode GPR:$src, t2addrmode_imm12:$addr)]>; def i8 : T2Ii8 <(outs), (ins GPR:$src, t2addrmode_imm8:$addr), IIC_iStorei, - opc, " $src, $addr", + opc, "\t$src, $addr", [(opnode GPR:$src, t2addrmode_imm8:$addr)]>; def s : T2Iso <(outs), (ins GPR:$src, t2addrmode_so_reg:$addr), IIC_iStorer, - opc, ".w $src, $addr", + opc, ".w\t$src, $addr", [(opnode GPR:$src, t2addrmode_so_reg:$addr)]>; } /// T2I_picld - Defines the PIC load pattern. class T2I_picld<string opc, PatFrag opnode> : T2I<(outs GPR:$dst), (ins addrmodepc:$addr), IIC_iLoadi, - !strconcat("\n${addr:label}:\n\t", opc), " $dst, $addr", + !strconcat("\n${addr:label}:\n\t", opc), "\t$dst, $addr", [(set GPR:$dst, (opnode addrmodepc:$addr))]>; /// T2I_picst - Defines the PIC store pattern. class T2I_picst<string opc, PatFrag opnode> : T2I<(outs), (ins GPR:$src, addrmodepc:$addr), IIC_iStorer, - !strconcat("\n${addr:label}:\n\t", opc), " $src, $addr", + !strconcat("\n${addr:label}:\n\t", opc), "\t$src, $addr", [(opnode GPR:$src, addrmodepc:$addr)]>; @@ -395,10 +397,10 @@ class T2I_picst<string opc, PatFrag opnode> : /// register and one whose operand is a register rotated by 8/16/24. multiclass T2I_unary_rrot<string opc, PatFrag opnode> { def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - opc, ".w $dst, $src", + opc, ".w\t$dst, $src", [(set GPR:$dst, (opnode GPR:$src))]>; def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, - opc, ".w $dst, $src, ror $rot", + opc, ".w\t$dst, $src, ror $rot", [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>; } @@ -406,10 +408,10 @@ multiclass T2I_unary_rrot<string opc, PatFrag opnode> { /// register and one whose operand is a register rotated by 8/16/24. multiclass T2I_bin_rrot<string opc, PatFrag opnode> { def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr, - opc, " $dst, $LHS, $RHS", + opc, "\t$dst, $LHS, $RHS", [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>; def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot), - IIC_iALUsr, opc, " $dst, $LHS, $RHS, ror $rot", + IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", [(set GPR:$dst, (opnode GPR:$LHS, (rotr GPR:$RHS, rot_imm:$rot)))]>; } @@ -425,43 +427,43 @@ multiclass T2I_bin_rrot<string opc, PatFrag opnode> { // LEApcrel - Load a pc-relative address into a register without offending the // assembler. def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, - "adr$p.w $dst, #$label", []>; + "adr$p.w\t$dst, #$label", []>; def t2LEApcrelJT : T2XI<(outs GPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi, - "adr$p.w $dst, #${label}_${id}", []>; + "adr$p.w\t$dst, #${label}_${id}", []>; // ADD r, sp, {so_imm|i12} def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), - IIC_iALUi, "add", ".w $dst, $sp, $imm", []>; + IIC_iALUi, "add", ".w\t$dst, $sp, $imm", []>; def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), - IIC_iALUi, "addw", " $dst, $sp, $imm", []>; + IIC_iALUi, "addw", "\t$dst, $sp, $imm", []>; // ADD r, sp, so_reg def t2ADDrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), - IIC_iALUsi, "add", ".w $dst, $sp, $rhs", []>; + IIC_iALUsi, "add", ".w\t$dst, $sp, $rhs", []>; // SUB r, sp, {so_imm|i12} def t2SUBrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), - IIC_iALUi, "sub", ".w $dst, $sp, $imm", []>; + IIC_iALUi, "sub", ".w\t$dst, $sp, $imm", []>; def t2SUBrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), - IIC_iALUi, "subw", " $dst, $sp, $imm", []>; + IIC_iALUi, "subw", "\t$dst, $sp, $imm", []>; // SUB r, sp, so_reg def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), IIC_iALUsi, - "sub", " $dst, $sp, $rhs", []>; + "sub", "\t$dst, $sp, $rhs", []>; // Pseudo instruction that will expand into a t2SUBrSPi + a copy. -let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. +let usesCustomInserter = 1 in { // Expanded after instruction selection. def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), - NoItinerary, "@ sub.w $dst, $sp, $imm", []>; + NoItinerary, "@ sub.w\t$dst, $sp, $imm", []>; def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), - NoItinerary, "@ subw $dst, $sp, $imm", []>; + NoItinerary, "@ subw\t$dst, $sp, $imm", []>; def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), - NoItinerary, "@ sub $dst, $sp, $rhs", []>; -} // usesCustomDAGSchedInserter + NoItinerary, "@ sub\t$dst, $sp, $rhs", []>; +} // usesCustomInserter //===----------------------------------------------------------------------===// @@ -484,10 +486,10 @@ let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword def t2LDRDi8 : T2Ii8s4<(outs GPR:$dst1, GPR:$dst2), (ins t2addrmode_imm8s4:$addr), - IIC_iLoadi, "ldrd", " $dst1, $addr", []>; + IIC_iLoadi, "ldrd", "\t$dst1, $addr", []>; def t2LDRDpci : T2Ii8s4<(outs GPR:$dst1, GPR:$dst2), (ins i32imm:$addr), IIC_iLoadi, - "ldrd", " $dst1, $addr", []>; + "ldrd", "\t$dst1, $addr", []>; } // zextload i1 -> zextload i8 @@ -535,57 +537,57 @@ let mayLoad = 1 in { def t2LDR_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoadiu, - "ldr", " $dst, $addr!", "$addr.base = $base_wb", + "ldr", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def t2LDR_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoadiu, - "ldr", " $dst, [$base], $offset", "$base = $base_wb", + "ldr", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def t2LDRB_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoadiu, - "ldrb", " $dst, $addr!", "$addr.base = $base_wb", + "ldrb", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def t2LDRB_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoadiu, - "ldrb", " $dst, [$base], $offset", "$base = $base_wb", + "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def t2LDRH_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoadiu, - "ldrh", " $dst, $addr!", "$addr.base = $base_wb", + "ldrh", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def t2LDRH_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoadiu, - "ldrh", " $dst, [$base], $offset", "$base = $base_wb", + "ldrh", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def t2LDRSB_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoadiu, - "ldrsb", " $dst, $addr!", "$addr.base = $base_wb", + "ldrsb", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def t2LDRSB_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoadiu, - "ldrsb", " $dst, [$base], $offset", "$base = $base_wb", + "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def t2LDRSH_PRE : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins t2addrmode_imm8:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoadiu, - "ldrsh", " $dst, $addr!", "$addr.base = $base_wb", + "ldrsh", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def t2LDRSH_POST : T2Iidxldst<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoadiu, - "ldrsh", " $dst, [$base], $offset", "$base = $base_wb", + "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb", []>; } @@ -598,48 +600,48 @@ defm t2STRH : T2I_st<"strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; let mayLoad = 1, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<(outs), (ins GPR:$src1, GPR:$src2, t2addrmode_imm8s4:$addr), - IIC_iStorer, "strd", " $src1, $addr", []>; + IIC_iStorer, "strd", "\t$src1, $addr", []>; // Indexed stores def t2STR_PRE : T2Iidxldst<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePre, IIC_iStoreiu, - "str", " $src, [$base, $offset]!", "$base = $base_wb", + "str", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; def t2STR_POST : T2Iidxldst<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iStoreiu, - "str", " $src, [$base], $offset", "$base = $base_wb", + "str", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; def t2STRH_PRE : T2Iidxldst<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePre, IIC_iStoreiu, - "strh", " $src, [$base, $offset]!", "$base = $base_wb", + "strh", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; def t2STRH_POST : T2Iidxldst<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iStoreiu, - "strh", " $src, [$base], $offset", "$base = $base_wb", + "strh", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti16 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; def t2STRB_PRE : T2Iidxldst<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePre, IIC_iStoreiu, - "strb", " $src, [$base, $offset]!", "$base = $base_wb", + "strb", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; def t2STRB_POST : T2Iidxldst<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iStoreiu, - "strb", " $src, [$base], $offset", "$base = $base_wb", + "strb", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; @@ -653,12 +655,12 @@ def t2STRB_POST : T2Iidxldst<(outs GPR:$base_wb), let mayLoad = 1, hasExtraDefRegAllocReq = 1 in def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide} $addr, $wb", []>; + IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb", []>; let mayStore = 1, hasExtraSrcRegAllocReq = 1 in def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - IIC_iStorem, "stm${addr:submode}${p}${addr:wide} $addr, $wb", []>; + IIC_iStorem, "stm${addr:submode}${p}${addr:wide}\t$addr, $wb", []>; //===----------------------------------------------------------------------===// // Move Instructions. @@ -666,22 +668,22 @@ def t2STM : T2XI<(outs), let neverHasSideEffects = 1 in def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, - "mov", ".w $dst, $src", []>; + "mov", ".w\t$dst, $src", []>; // AddedComplexity to ensure isel tries t2MOVi before t2MOVi16. let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi, - "mov", ".w $dst, $src", + "mov", ".w\t$dst, $src", [(set GPR:$dst, t2_so_imm:$src)]>; let isReMaterializable = 1, isAsCheapAsAMove = 1 in def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, - "movw", " $dst, $src", + "movw", "\t$dst, $src", [(set GPR:$dst, imm0_65535:$src)]>; let Constraints = "$src = $dst" in def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi, - "movt", " $dst, $imm", + "movt", "\t$dst, $imm", [(set GPR:$dst, (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>; @@ -760,16 +762,16 @@ defm t2ROR : T2I_sh_ir<"ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>; let Uses = [CPSR] in { def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, - "rrx", " $dst, $src", + "rrx", "\t$dst, $src", [(set GPR:$dst, (ARMrrx GPR:$src))]>; } let Defs = [CPSR] in { def t2MOVsrl_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, - "lsrs.w $dst, $src, #1", + "lsrs.w\t$dst, $src, #1", [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>; def t2MOVsra_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, - "asrs.w $dst, $src, #1", + "asrs.w\t$dst, $src, #1", [(set GPR:$dst, (ARMsra_flag GPR:$src))]>; } @@ -785,14 +787,14 @@ defm t2BIC : T2I_bin_w_irs<"bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>; let Constraints = "$src = $dst" in def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), - IIC_iALUi, "bfc", " $dst, $imm", + IIC_iUNAsi, "bfc", "\t$dst, $imm", [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>; def t2SBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), - IIC_iALUi, "sbfx", " $dst, $src, $lsb, $width", []>; + IIC_iALUi, "sbfx", "\t$dst, $src, $lsb, $width", []>; def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), - IIC_iALUi, "ubfx", " $dst, $src, $lsb, $width", []>; + IIC_iALUi, "ubfx", "\t$dst, $src, $lsb, $width", []>; // FIXME: A8.6.18 BFI - Bitfield insert (Encoding T1) @@ -819,80 +821,80 @@ def : T2Pat<(t2_so_imm_not:$src), // let isCommutable = 1 in def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, - "mul", " $dst, $a, $b", + "mul", "\t$dst, $a, $b", [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>; def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, - "mla", " $dst, $a, $b, $c", + "mla", "\t$dst, $a, $b, $c", [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>; def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, - "mls", " $dst, $a, $b, $c", + "mls", "\t$dst, $a, $b, $c", [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>; // Extra precision multiplies with low / high results let neverHasSideEffects = 1 in { let isCommutable = 1 in { def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, - "smull", " $ldst, $hdst, $a, $b", []>; + "smull", "\t$ldst, $hdst, $a, $b", []>; def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64, - "umull", " $ldst, $hdst, $a, $b", []>; + "umull", "\t$ldst, $hdst, $a, $b", []>; } // Multiply + accumulate def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, - "smlal", " $ldst, $hdst, $a, $b", []>; + "smlal", "\t$ldst, $hdst, $a, $b", []>; def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, - "umlal", " $ldst, $hdst, $a, $b", []>; + "umlal", "\t$ldst, $hdst, $a, $b", []>; def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64, - "umaal", " $ldst, $hdst, $a, $b", []>; + "umaal", "\t$ldst, $hdst, $a, $b", []>; } // neverHasSideEffects // Most significant word multiply def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, - "smmul", " $dst, $a, $b", + "smmul", "\t$dst, $a, $b", [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>; def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, - "smmla", " $dst, $a, $b, $c", + "smmla", "\t$dst, $a, $b, $c", [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>; def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32, - "smmls", " $dst, $a, $b, $c", + "smmls", "\t$dst, $a, $b, $c", [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>; multiclass T2I_smul<string opc, PatFrag opnode> { def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, - !strconcat(opc, "bb"), " $dst, $a, $b", + !strconcat(opc, "bb"), "\t$dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sext_inreg GPR:$b, i16)))]>; def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, - !strconcat(opc, "bt"), " $dst, $a, $b", + !strconcat(opc, "bt"), "\t$dst, $a, $b", [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), (sra GPR:$b, (i32 16))))]>; def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, - !strconcat(opc, "tb"), " $dst, $a, $b", + !strconcat(opc, "tb"), "\t$dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16)))]>; def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32, - !strconcat(opc, "tt"), " $dst, $a, $b", + !strconcat(opc, "tt"), "\t$dst, $a, $b", [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), (sra GPR:$b, (i32 16))))]>; def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16, - !strconcat(opc, "wb"), " $dst, $a, $b", + !strconcat(opc, "wb"), "\t$dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, (sext_inreg GPR:$b, i16)), (i32 16)))]>; def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16, - !strconcat(opc, "wt"), " $dst, $a, $b", + !strconcat(opc, "wt"), "\t$dst, $a, $b", [(set GPR:$dst, (sra (opnode GPR:$a, (sra GPR:$b, (i32 16))), (i32 16)))]>; } @@ -900,33 +902,33 @@ multiclass T2I_smul<string opc, PatFrag opnode> { multiclass T2I_smla<string opc, PatFrag opnode> { def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, - !strconcat(opc, "bb"), " $dst, $a, $b, $acc", + !strconcat(opc, "bb"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), (sext_inreg GPR:$b, i16))))]>; def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, - !strconcat(opc, "bt"), " $dst, $a, $b, $acc", + !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), (sra GPR:$b, (i32 16)))))]>; def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, - !strconcat(opc, "tb"), " $dst, $a, $b, $acc", + !strconcat(opc, "tb"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), (sext_inreg GPR:$b, i16))))]>; def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, - !strconcat(opc, "tt"), " $dst, $a, $b, $acc", + !strconcat(opc, "tt"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), (sra GPR:$b, (i32 16)))))]>; def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, - !strconcat(opc, "wb"), " $dst, $a, $b, $acc", + !strconcat(opc, "wb"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, (sext_inreg GPR:$b, i16)), (i32 16))))]>; def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16, - !strconcat(opc, "wt"), " $dst, $a, $b, $acc", + !strconcat(opc, "wt"), "\t$dst, $a, $b, $acc", [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, (sra GPR:$b, (i32 16))), (i32 16))))]>; } @@ -943,15 +945,15 @@ defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>; // def t2CLZ : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "clz", " $dst, $src", + "clz", "\t$dst, $src", [(set GPR:$dst, (ctlz GPR:$src))]>; def t2REV : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "rev", ".w $dst, $src", + "rev", ".w\t$dst, $src", [(set GPR:$dst, (bswap GPR:$src))]>; def t2REV16 : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "rev16", ".w $dst, $src", + "rev16", ".w\t$dst, $src", [(set GPR:$dst, (or (and (srl GPR:$src, (i32 8)), 0xFF), (or (and (shl GPR:$src, (i32 8)), 0xFF00), @@ -959,14 +961,14 @@ def t2REV16 : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, (and (shl GPR:$src, (i32 8)), 0xFF000000)))))]>; def t2REVSH : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, - "revsh", ".w $dst, $src", + "revsh", ".w\t$dst, $src", [(set GPR:$dst, (sext_inreg (or (srl (and GPR:$src, 0xFF00), (i32 8)), (shl GPR:$src, (i32 8))), i16))]>; def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhbt", " $dst, $src1, $src2, LSL $shamt", + IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, LSL $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF), (and (shl GPR:$src2, (i32 imm:$shamt)), 0xFFFF0000)))]>; @@ -978,7 +980,7 @@ def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)), (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>; def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt), - IIC_iALUsi, "pkhtb", " $dst, $src1, $src2, ASR $shamt", + IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, ASR $shamt", [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000), (and (sra GPR:$src2, imm16_31:$shamt), 0xFFFF)))]>; @@ -1025,26 +1027,26 @@ defm t2TEQ : T2I_cmp_is<"teq", // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr, - "mov", ".w $dst, $true", + "mov", ".w\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $dst">; def t2MOVCCi : T2I<(outs GPR:$dst), (ins GPR:$false, t2_so_imm:$true), - IIC_iCMOVi, "mov", ".w $dst, $true", + IIC_iCMOVi, "mov", ".w\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $dst">; def t2MOVCClsl : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs), - IIC_iCMOVsi, "lsl", ".w $dst, $true, $rhs", []>, + IIC_iCMOVsi, "lsl", ".w\t$dst, $true, $rhs", []>, RegConstraint<"$false = $dst">; def t2MOVCClsr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs), - IIC_iCMOVsi, "lsr", ".w $dst, $true, $rhs", []>, + IIC_iCMOVsi, "lsr", ".w\t$dst, $true, $rhs", []>, RegConstraint<"$false = $dst">; def t2MOVCCasr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs), - IIC_iCMOVsi, "asr", ".w $dst, $true, $rhs", []>, + IIC_iCMOVsi, "asr", ".w\t$dst, $true, $rhs", []>, RegConstraint<"$false = $dst">; def t2MOVCCror : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs), - IIC_iCMOVsi, "ror", ".w $dst, $true, $rhs", []>, + IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>, RegConstraint<"$false = $dst">; //===----------------------------------------------------------------------===// @@ -1055,7 +1057,7 @@ def t2MOVCCror : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs), let isCall = 1, Defs = [R0, R12, LR, CPSR] in { def t2TPsoft : T2XI<(outs), (ins), IIC_Br, - "bl __aeabi_read_tp", + "bl\t__aeabi_read_tp", [(set R0, ARMthread_pointer)]>; } @@ -1078,13 +1080,13 @@ let Defs = D31 ] in { def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src), AddrModeNone, SizeSpecial, NoItinerary, - "str.w sp, [$src, #+8] @ eh_setjmp begin\n" - "\tadr r12, 0f\n" - "\torr r12, #1\n" - "\tstr.w r12, [$src, #+4]\n" - "\tmovs r0, #0\n" - "\tb 1f\n" - "0:\tmovs r0, #1 @ eh_setjmp end\n" + "str.w\tsp, [$src, #+8] @ eh_setjmp begin\n" + "\tadr\tr12, 0f\n" + "\torr.w\tr12, r12, #1\n" + "\tstr.w\tr12, [$src, #+4]\n" + "\tmovs\tr0, #0\n" + "\tb\t1f\n" + "0:\tmovs\tr0, #1 @ eh_setjmp end\n" "1:", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>; } @@ -1103,32 +1105,32 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1 in def t2LDM_RET : T2XI<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - IIC_Br, "ldm${addr:submode}${p}${addr:wide} $addr, $wb", + IIC_Br, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb", []>; let isBranch = 1, isTerminator = 1, isBarrier = 1 in { let isPredicable = 1 in def t2B : T2XI<(outs), (ins brtarget:$target), IIC_Br, - "b.w $target", + "b.w\t$target", [(br bb:$target)]>; let isNotDuplicable = 1, isIndirectBranch = 1 in { def t2BR_JT : T2JTI<(outs), (ins GPR:$target, GPR:$index, jt2block_operand:$jt, i32imm:$id), - IIC_Br, "mov pc, $target\n$jt", + IIC_Br, "mov\tpc, $target\n$jt", [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>; // FIXME: Add a non-pc based case that can be predicated. def t2TBB : T2JTI<(outs), (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id), - IIC_Br, "tbb $index\n$jt", []>; + IIC_Br, "tbb\t$index\n$jt", []>; def t2TBH : T2JTI<(outs), (ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id), - IIC_Br, "tbh $index\n$jt", []>; + IIC_Br, "tbh\t$index\n$jt", []>; } // isNotDuplicable, isIndirectBranch } // isBranch, isTerminator, isBarrier @@ -1137,14 +1139,14 @@ def t2TBH : // a two-value operand where a dag node expects two operands. :( let isBranch = 1, isTerminator = 1 in def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, - "b", ".w $target", + "b", ".w\t$target", [/*(ARMbrcond bb:$target, imm:$cc)*/]>; // IT block def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), AddrModeNone, Size2Bytes, IIC_iALUx, - "it$mask $cc", "", []>; + "it$mask\t$cc", "", []>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -1175,5 +1177,5 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), // when we can do generalized remat. let isReMaterializable = 1 in def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, - "movw", " $dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}", + "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}", [(set GPR:$dst, (i32 imm:$src))]>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 56336d131abc3..455c33b7959aa 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -31,25 +31,45 @@ def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>; def arm_fmdrr : SDNode<"ARMISD::FMDRR", SDT_FMDRR>; //===----------------------------------------------------------------------===// +// Operand Definitions. +// + + +def vfp_f32imm : Operand<f32>, + PatLeaf<(f32 fpimm), [{ + return ARM::getVFPf32Imm(N->getValueAPF()) != -1; + }]> { + let PrintMethod = "printVFPf32ImmOperand"; +} + +def vfp_f64imm : Operand<f64>, + PatLeaf<(f64 fpimm), [{ + return ARM::getVFPf64Imm(N->getValueAPF()) != -1; + }]> { + let PrintMethod = "printVFPf64ImmOperand"; +} + + +//===----------------------------------------------------------------------===// // Load / store Instructions. // let canFoldAsLoad = 1 in { def FLDD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr), - IIC_fpLoad64, "fldd", " $dst, $addr", + IIC_fpLoad64, "fldd", "\t$dst, $addr", [(set DPR:$dst, (load addrmode5:$addr))]>; def FLDS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr), - IIC_fpLoad32, "flds", " $dst, $addr", + IIC_fpLoad32, "flds", "\t$dst, $addr", [(set SPR:$dst, (load addrmode5:$addr))]>; } // canFoldAsLoad def FSTD : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr), - IIC_fpStore64, "fstd", " $src, $addr", + IIC_fpStore64, "fstd", "\t$src, $addr", [(store DPR:$src, addrmode5:$addr)]>; def FSTS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), - IIC_fpStore32, "fsts", " $src, $addr", + IIC_fpStore32, "fsts", "\t$src, $addr", [(store SPR:$src, addrmode5:$addr)]>; //===----------------------------------------------------------------------===// @@ -59,14 +79,14 @@ def FSTS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { def FLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, variable_ops), IIC_fpLoadm, - "fldm${addr:submode}d${p} ${addr:base}, $wb", + "fldm${addr:submode}d${p}\t${addr:base}, $wb", []> { let Inst{20} = 1; } def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, variable_ops), IIC_fpLoadm, - "fldm${addr:submode}s${p} ${addr:base}, $wb", + "fldm${addr:submode}s${p}\t${addr:base}, $wb", []> { let Inst{20} = 1; } @@ -75,14 +95,14 @@ def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { def FSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, variable_ops), IIC_fpStorem, - "fstm${addr:submode}d${p} ${addr:base}, $wb", + "fstm${addr:submode}d${p}\t${addr:base}, $wb", []> { let Inst{20} = 0; } def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, variable_ops), IIC_fpStorem, - "fstm${addr:submode}s${p} ${addr:base}, $wb", + "fstm${addr:submode}s${p}\t${addr:base}, $wb", []> { let Inst{20} = 0; } @@ -95,48 +115,48 @@ def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, // def FADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpALU64, "faddd", " $dst, $a, $b", + IIC_fpALU64, "faddd", "\t$dst, $a, $b", [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>; def FADDS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpALU32, "fadds", " $dst, $a, $b", + IIC_fpALU32, "fadds", "\t$dst, $a, $b", [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>; // These are encoded as unary instructions. let Defs = [FPSCR] in { def FCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b), - IIC_fpCMP64, "fcmped", " $a, $b", + IIC_fpCMP64, "fcmped", "\t$a, $b", [(arm_cmpfp DPR:$a, DPR:$b)]>; def FCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b), - IIC_fpCMP32, "fcmpes", " $a, $b", + IIC_fpCMP32, "fcmpes", "\t$a, $b", [(arm_cmpfp SPR:$a, SPR:$b)]>; } def FDIVD : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpDIV64, "fdivd", " $dst, $a, $b", + IIC_fpDIV64, "fdivd", "\t$dst, $a, $b", [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>; def FDIVS : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpDIV32, "fdivs", " $dst, $a, $b", + IIC_fpDIV32, "fdivs", "\t$dst, $a, $b", [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>; def FMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpMUL64, "fmuld", " $dst, $a, $b", + IIC_fpMUL64, "fmuld", "\t$dst, $a, $b", [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>; def FMULS : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpMUL32, "fmuls", " $dst, $a, $b", + IIC_fpMUL32, "fmuls", "\t$dst, $a, $b", [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>; def FNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpMUL64, "fnmuld", " $dst, $a, $b", + IIC_fpMUL64, "fnmuld", "\t$dst, $a, $b", [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]> { let Inst{6} = 1; } def FNMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpMUL32, "fnmuls", " $dst, $a, $b", + IIC_fpMUL32, "fnmuls", "\t$dst, $a, $b", [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]> { let Inst{6} = 1; } @@ -149,13 +169,13 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b), def FSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpALU64, "fsubd", " $dst, $a, $b", + IIC_fpALU64, "fsubd", "\t$dst, $a, $b", [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]> { let Inst{6} = 1; } def FSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpALU32, "fsubs", " $dst, $a, $b", + IIC_fpALU32, "fsubs", "\t$dst, $a, $b", [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> { let Inst{6} = 1; } @@ -165,30 +185,30 @@ def FSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), // def FABSD : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpUNA64, "fabsd", " $dst, $a", + IIC_fpUNA64, "fabsd", "\t$dst, $a", [(set DPR:$dst, (fabs DPR:$a))]>; def FABSS : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpUNA32, "fabss", " $dst, $a", + IIC_fpUNA32, "fabss", "\t$dst, $a", [(set SPR:$dst, (fabs SPR:$a))]>; let Defs = [FPSCR] in { def FCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a), - IIC_fpCMP64, "fcmpezd", " $a", + IIC_fpCMP64, "fcmpezd", "\t$a", [(arm_cmpfp0 DPR:$a)]>; def FCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a), - IIC_fpCMP32, "fcmpezs", " $a", + IIC_fpCMP32, "fcmpezs", "\t$a", [(arm_cmpfp0 SPR:$a)]>; } def FCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a), - IIC_fpCVTDS, "fcvtds", " $dst, $a", + IIC_fpCVTDS, "fcvtds", "\t$dst, $a", [(set DPR:$dst, (fextend SPR:$a))]>; // Special case encoding: bits 11-8 is 0b1011. def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, - IIC_fpCVTSD, "fcvtsd", " $dst, $a", + IIC_fpCVTSD, "fcvtsd", "\t$dst, $a", [(set SPR:$dst, (fround DPR:$a))]> { let Inst{27-23} = 0b11101; let Inst{21-16} = 0b110111; @@ -198,26 +218,26 @@ def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, let neverHasSideEffects = 1 in { def FCPYD : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpUNA64, "fcpyd", " $dst, $a", []>; + IIC_fpUNA64, "fcpyd", "\t$dst, $a", []>; def FCPYS : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpUNA32, "fcpys", " $dst, $a", []>; + IIC_fpUNA32, "fcpys", "\t$dst, $a", []>; } // neverHasSideEffects def FNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpUNA64, "fnegd", " $dst, $a", + IIC_fpUNA64, "fnegd", "\t$dst, $a", [(set DPR:$dst, (fneg DPR:$a))]>; def FNEGS : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpUNA32, "fnegs", " $dst, $a", + IIC_fpUNA32, "fnegs", "\t$dst, $a", [(set SPR:$dst, (fneg SPR:$a))]>; def FSQRTD : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpSQRT64, "fsqrtd", " $dst, $a", + IIC_fpSQRT64, "fsqrtd", "\t$dst, $a", [(set DPR:$dst, (fsqrt DPR:$a))]>; def FSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpSQRT32, "fsqrts", " $dst, $a", + IIC_fpSQRT32, "fsqrts", "\t$dst, $a", [(set SPR:$dst, (fsqrt SPR:$a))]>; //===----------------------------------------------------------------------===// @@ -225,16 +245,16 @@ def FSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a), // def FMRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src), - IIC_VMOVSI, "fmrs", " $dst, $src", + IIC_VMOVSI, "fmrs", "\t$dst, $src", [(set GPR:$dst, (bitconvert SPR:$src))]>; def FMSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), - IIC_VMOVIS, "fmsr", " $dst, $src", + IIC_VMOVIS, "fmsr", "\t$dst, $src", [(set SPR:$dst, (bitconvert GPR:$src))]>; def FMRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$wb, GPR:$dst2), (ins DPR:$src), - IIC_VMOVDI, "fmrrd", " $wb, $dst2, $src", + IIC_VMOVDI, "fmrrd", "\t$wb, $dst2, $src", [/* FIXME: Can't write pattern for multiple result instr*/]>; // FMDHR: GPR -> SPR @@ -242,7 +262,7 @@ def FMRRD : AVConv3I<0b11000101, 0b1011, def FMDRR : AVConv5I<0b11000100, 0b1011, (outs DPR:$dst), (ins GPR:$src1, GPR:$src2), - IIC_VMOVID, "fmdrr", " $dst, $src1, $src2", + IIC_VMOVID, "fmdrr", "\t$dst, $src1, $src2", [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>; // FMRDH: SPR -> GPR @@ -258,23 +278,23 @@ def FMDRR : AVConv5I<0b11000100, 0b1011, // Int to FP: def FSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), - IIC_fpCVTID, "fsitod", " $dst, $a", + IIC_fpCVTID, "fsitod", "\t$dst, $a", [(set DPR:$dst, (arm_sitof SPR:$a))]> { let Inst{7} = 1; } def FSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), - IIC_fpCVTIS, "fsitos", " $dst, $a", + IIC_fpCVTIS, "fsitos", "\t$dst, $a", [(set SPR:$dst, (arm_sitof SPR:$a))]> { let Inst{7} = 1; } def FUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), - IIC_fpCVTID, "fuitod", " $dst, $a", + IIC_fpCVTID, "fuitod", "\t$dst, $a", [(set DPR:$dst, (arm_uitof SPR:$a))]>; def FUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), - IIC_fpCVTIS, "fuitos", " $dst, $a", + IIC_fpCVTIS, "fuitos", "\t$dst, $a", [(set SPR:$dst, (arm_uitof SPR:$a))]>; // FP to Int: @@ -282,28 +302,28 @@ def FUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), def FTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011, (outs SPR:$dst), (ins DPR:$a), - IIC_fpCVTDI, "ftosizd", " $dst, $a", + IIC_fpCVTDI, "ftosizd", "\t$dst, $a", [(set SPR:$dst, (arm_ftosi DPR:$a))]> { let Inst{7} = 1; // Z bit } def FTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010, (outs SPR:$dst), (ins SPR:$a), - IIC_fpCVTSI, "ftosizs", " $dst, $a", + IIC_fpCVTSI, "ftosizs", "\t$dst, $a", [(set SPR:$dst, (arm_ftosi SPR:$a))]> { let Inst{7} = 1; // Z bit } def FTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011, (outs SPR:$dst), (ins DPR:$a), - IIC_fpCVTDI, "ftouizd", " $dst, $a", + IIC_fpCVTDI, "ftouizd", "\t$dst, $a", [(set SPR:$dst, (arm_ftoui DPR:$a))]> { let Inst{7} = 1; // Z bit } def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010, (outs SPR:$dst), (ins SPR:$a), - IIC_fpCVTSI, "ftouizs", " $dst, $a", + IIC_fpCVTSI, "ftouizs", "\t$dst, $a", [(set SPR:$dst, (arm_ftoui SPR:$a))]> { let Inst{7} = 1; // Z bit } @@ -313,34 +333,34 @@ def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010, // def FMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "fmacd", " $dst, $a, $b", + IIC_fpMAC64, "fmacd", "\t$dst, $a, $b", [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, RegConstraint<"$dstin = $dst">; def FMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "fmacs", " $dst, $a, $b", + IIC_fpMAC32, "fmacs", "\t$dst, $a, $b", [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, RegConstraint<"$dstin = $dst">; def FMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "fmscd", " $dst, $a, $b", + IIC_fpMAC64, "fmscd", "\t$dst, $a, $b", [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, RegConstraint<"$dstin = $dst">; def FMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "fmscs", " $dst, $a, $b", + IIC_fpMAC32, "fmscs", "\t$dst, $a, $b", [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, RegConstraint<"$dstin = $dst">; def FNMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "fnmacd", " $dst, $a, $b", + IIC_fpMAC64, "fnmacd", "\t$dst, $a, $b", [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; } def FNMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "fnmacs", " $dst, $a, $b", + IIC_fpMAC32, "fnmacs", "\t$dst, $a, $b", [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; @@ -352,14 +372,14 @@ def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)), (FNMACS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>; def FNMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "fnmscd", " $dst, $a, $b", + IIC_fpMAC64, "fnmscd", "\t$dst, $a, $b", [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; } def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "fnmscs", " $dst, $a, $b", + IIC_fpMAC32, "fnmscs", "\t$dst, $a, $b", [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; @@ -371,25 +391,25 @@ def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), def FCPYDcc : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$false, DPR:$true), - IIC_fpUNA64, "fcpyd", " $dst, $true", + IIC_fpUNA64, "fcpyd", "\t$dst, $true", [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; def FCPYScc : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$false, SPR:$true), - IIC_fpUNA32, "fcpys", " $dst, $true", + IIC_fpUNA32, "fcpys", "\t$dst, $true", [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; def FNEGDcc : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$false, DPR:$true), - IIC_fpUNA64, "fnegd", " $dst, $true", + IIC_fpUNA64, "fnegd", "\t$dst, $true", [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$false, SPR:$true), - IIC_fpUNA32, "fnegs", " $dst, $true", + IIC_fpUNA32, "fnegs", "\t$dst, $true", [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; @@ -399,7 +419,8 @@ def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100, // let Defs = [CPSR], Uses = [FPSCR] in -def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "", [(arm_fmstat)]> { +def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "", + [(arm_fmstat)]> { let Inst{27-20} = 0b11101111; let Inst{19-16} = 0b0001; let Inst{15-12} = 0b1111; @@ -407,3 +428,29 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "", [(arm_fm let Inst{7} = 0; let Inst{4} = 1; } + + +// Materialize FP immediates. VFP3 only. +let isReMaterializable = 1 in +def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm), + VFPMiscFrm, IIC_VMOVImm, + "fconsts", "\t$dst, $imm", + [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { + let Inst{27-23} = 0b11101; + let Inst{21-20} = 0b11; + let Inst{11-9} = 0b101; + let Inst{8} = 0; + let Inst{7-4} = 0b0000; +} + +let isReMaterializable = 1 in +def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm), + VFPMiscFrm, IIC_VMOVImm, + "fconstd", "\t$dst, $imm", + [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> { + let Inst{27-23} = 0b11101; + let Inst{21-20} = 0b11; + let Inst{11-9} = 0b101; + let Inst{8} = 1; + let Inst{7-4} = 0b0000; +} diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index c9b9e84c2a3b6..7e1783bd44bc5 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -30,7 +30,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -56,7 +55,7 @@ STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's"); /// load / store instructions to form ldm / stm instructions. namespace { - struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass { + struct ARMLoadStoreOpt : public MachineFunctionPass { static char ID; ARMLoadStoreOpt() : MachineFunctionPass(&ID) {} @@ -1106,7 +1105,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { /// likely they will be combined later. namespace { - struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ + struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ static char ID; ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {} diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index e0be784329738..d393e8d7e3e26 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -129,9 +129,6 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; - // FIXME: We are reserving r12 in case the PEI needs to use it to - // generate large stack offset. Make it available once we have register - // scavenging. Similarly r3 is reserved in Thumb mode for now. let MethodBodies = [{ // FP is R11, R9 is available. static const unsigned ARM_GPR_AO_1[] = { @@ -169,10 +166,20 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, ARM::R4, ARM::R5, ARM::R6, ARM::R8, ARM::R9, ARM::R10,ARM::R11,ARM::R7 }; + // For Thumb1 mode, we don't want to allocate hi regs at all, as we + // don't know how to spill them. If we make our prologue/epilogue code + // smarter at some point, we can go back to using the above allocation + // orders for the Thumb1 instructions that know how to use hi regs. + static const unsigned THUMB_GPR_AO[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R4, ARM::R5, ARM::R6, ARM::R7 }; + GPRClass::iterator GPRClass::allocation_order_begin(const MachineFunction &MF) const { const TargetMachine &TM = MF.getTarget(); const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); + if (Subtarget.isThumb1Only()) + return THUMB_GPR_AO; if (Subtarget.isTargetDarwin()) { if (Subtarget.isR9Reserved()) return ARM_GPR_AO_4; @@ -195,6 +202,12 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); GPRClass::iterator I; + if (Subtarget.isThumb1Only()) { + I = THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned)); + // Mac OS X requires FP not to be clobbered for backtracing purpose. + return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I; + } + if (Subtarget.isTargetDarwin()) { if (Subtarget.isR9Reserved()) I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned)); @@ -316,7 +329,7 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, // Subset of DPR that are accessible with VFP2 (and so that also have // 32-bit SPR subregs). -def DPR_VFP2 : RegisterClass<"ARM", [f64, v2i32, v2f32], 64, +def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15]> { let SubRegClassList = [SPR, SPR]; @@ -324,7 +337,7 @@ def DPR_VFP2 : RegisterClass<"ARM", [f64, v2i32, v2f32], 64, // Subset of DPR which can be used as a source of NEON scalars for 16-bit // operations -def DPR_8 : RegisterClass<"ARM", [f64, v4i16, v2f32], 64, +def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64, [D0, D1, D2, D3, D4, D5, D6, D7]> { let SubRegClassList = [SPR_8, SPR_8]; } @@ -344,6 +357,13 @@ def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], let SubRegClassList = [SPR, SPR, SPR, SPR, DPR_VFP2, DPR_VFP2]; } +// Subset of QPR that have DPR_8 and SPR_8 subregs. +def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + 128, + [Q0, Q1, Q2, Q3]> { + let SubRegClassList = [SPR_8, SPR_8, SPR_8, SPR_8, DPR_8, DPR_8]; +} + // Condition code registers. def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 74781593a0d9f..e721a7fd68049 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -130,7 +130,7 @@ protected: /// for Thumb1. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, TargetSubtarget::AntiDepBreakMode& mode) const { - mode = TargetSubtarget::ANTIDEP_NONE; + mode = TargetSubtarget::ANTIDEP_CRITICAL; return PostRAScheduler && OptLevel >= CodeGenOpt::Default; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index c1da6ce88b9a5..b4ce1d7760cc4 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -112,8 +112,12 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { // FIXME: temporarily disabling load / store optimization pass for Thumb1. - if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) - PM.add(createIfConverterPass()); + if (OptLevel != CodeGenOpt::None) { + if (!Subtarget.isThumb1Only()) + PM.add(createIfConverterPass()); + if (Subtarget.hasNEON()) + PM.add(createNEONMoveFixPass()); + } if (Subtarget.isThumb2()) { PM.add(createThumb2ITBlockPass()); diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 71a53488f164c..dd9542ea8095f 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -103,7 +103,7 @@ public: ThumbTargetMachine(const Target &T, const std::string &TT, const std::string &FS); - /// returns either Thumb1RegisterInfo of Thumb2RegisterInfo + /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo virtual const ARMBaseRegisterInfo *getRegisterInfo() const { return &InstrInfo->getRegisterInfo(); } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 403f96c69e581..894f913a77c13 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -44,13 +44,21 @@ private: bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } - bool ParseRegister(ARMOperand &Op); + bool MaybeParseRegister(ARMOperand &Op, bool ParseWriteBack); bool ParseRegisterList(ARMOperand &Op); bool ParseMemory(ARMOperand &Op); - bool ParseShift(enum ShiftType *St, const MCExpr *&ShiftAmount); + bool ParseMemoryOffsetReg(bool &Negative, + bool &OffsetRegShifted, + enum ShiftType &ShiftType, + const MCExpr *&ShiftAmount, + const MCExpr *&Offset, + bool &OffsetIsReg, + int &OffsetRegNum); + + bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount); bool ParseOperand(ARMOperand &Op); @@ -123,16 +131,17 @@ struct ARMOperand { // This is for all forms of ARM address expressions struct { unsigned BaseRegNum; - bool OffsetIsReg; - const MCExpr *Offset; // used when OffsetIsReg is false unsigned OffsetRegNum; // used when OffsetIsReg is true - bool OffsetRegShifted; // only used when OffsetIsReg is true - enum ShiftType ShiftType; // used when OffsetRegShifted is true + const MCExpr *Offset; // used when OffsetIsReg is false const MCExpr *ShiftAmount; // used when OffsetRegShifted is true - bool Preindexed; - bool Postindexed; - bool Negative; // only used when OffsetIsReg is true - bool Writeback; + enum ShiftType ShiftType; // used when OffsetRegShifted is true + unsigned + OffsetRegShifted : 1, // only used when OffsetIsReg is true + Preindexed : 1, + Postindexed : 1, + OffsetIsReg : 1, + Negative : 1, // only used when OffsetIsReg is true + Writeback : 1; } Mem; }; @@ -208,12 +217,12 @@ struct ARMOperand { } // end anonymous namespace. -// Try to parse a register name. The token must be an Identifier when called, -// and if it is a register name a Reg operand is created, the token is eaten -// and false is returned. Else true is returned and no token is eaten. -// TODO this is likely to change to allow different register types and or to -// parse for a specific register type. -bool ARMAsmParser::ParseRegister(ARMOperand &Op) { +/// Try to parse a register name. The token must be an Identifier when called, +/// and if it is a register name a Reg operand is created, the token is eaten +/// and false is returned. Else true is returned and no token is eaten. +/// TODO this is likely to change to allow different register types and or to +/// parse for a specific register type. +bool ARMAsmParser::MaybeParseRegister(ARMOperand &Op, bool ParseWriteBack) { const AsmToken &Tok = getLexer().getTok(); assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); @@ -227,10 +236,12 @@ bool ARMAsmParser::ParseRegister(ARMOperand &Op) { getLexer().Lex(); // Eat identifier token. bool Writeback = false; - const AsmToken &ExclaimTok = getLexer().getTok(); - if (ExclaimTok.is(AsmToken::Exclaim)) { - Writeback = true; - getLexer().Lex(); // Eat exclaim token + if (ParseWriteBack) { + const AsmToken &ExclaimTok = getLexer().getTok(); + if (ExclaimTok.is(AsmToken::Exclaim)) { + Writeback = true; + getLexer().Lex(); // Eat exclaim token + } } Op = ARMOperand::CreateReg(RegNum, Writeback); @@ -238,8 +249,8 @@ bool ARMAsmParser::ParseRegister(ARMOperand &Op) { return false; } -// Parse a register list, return false if successful else return true or an -// error. The first token must be a '{' when called. +/// Parse a register list, return false if successful else return true or an +/// error. The first token must be a '{' when called. bool ARMAsmParser::ParseRegisterList(ARMOperand &Op) { assert(getLexer().getTok().is(AsmToken::LCurly) && "Token is not an Left Curly Brace"); @@ -285,10 +296,10 @@ bool ARMAsmParser::ParseRegisterList(ARMOperand &Op) { return false; } -// Parse an arm memory expression, return false if successful else return true -// or an error. The first token must be a '[' when called. -// TODO Only preindexing and postindexing addressing are started, unindexed -// with option, etc are still to do. +/// Parse an arm memory expression, return false if successful else return true +/// or an error. The first token must be a '[' when called. +/// TODO Only preindexing and postindexing addressing are started, unindexed +/// with option, etc are still to do. bool ARMAsmParser::ParseMemory(ARMOperand &Op) { assert(getLexer().getTok().is(AsmToken::LBrac) && "Token is not an Left Bracket"); @@ -297,10 +308,9 @@ bool ARMAsmParser::ParseMemory(ARMOperand &Op) { const AsmToken &BaseRegTok = getLexer().getTok(); if (BaseRegTok.isNot(AsmToken::Identifier)) return Error(BaseRegTok.getLoc(), "register expected"); - int BaseRegNum = MatchRegisterName(BaseRegTok.getString()); - if (BaseRegNum == -1) + if (MaybeParseRegister(Op, false)) return Error(BaseRegTok.getLoc(), "register expected"); - getLexer().Lex(); // Eat identifier token. + int BaseRegNum = Op.getReg(); bool Preindexed = false; bool Postindexed = false; @@ -308,55 +318,20 @@ bool ARMAsmParser::ParseMemory(ARMOperand &Op) { bool Negative = false; bool Writeback = false; - // First look for preindexed address forms: - // [Rn, +/-Rm] - // [Rn, #offset] - // [Rn, +/-Rm, shift] - // that is after the "[Rn" we now have see if the next token is a comma. + // First look for preindexed address forms, that is after the "[Rn" we now + // have to see if the next token is a comma. const AsmToken &Tok = getLexer().getTok(); if (Tok.is(AsmToken::Comma)) { Preindexed = true; getLexer().Lex(); // Eat comma token. - - const AsmToken &NextTok = getLexer().getTok(); - if (NextTok.is(AsmToken::Plus)) - getLexer().Lex(); // Eat plus token. - else if (NextTok.is(AsmToken::Minus)) { - Negative = true; - getLexer().Lex(); // Eat minus token - } - - // See if there is a register following the "[Rn," we have so far. - const AsmToken &OffsetRegTok = getLexer().getTok(); - int OffsetRegNum = MatchRegisterName(OffsetRegTok.getString()); - bool OffsetRegShifted = false; + int OffsetRegNum; + bool OffsetRegShifted; enum ShiftType ShiftType; const MCExpr *ShiftAmount; const MCExpr *Offset; - if (OffsetRegNum != -1) { - OffsetIsReg = true; - getLexer().Lex(); // Eat identifier token for the offset register. - // Look for a comma then a shift - const AsmToken &Tok = getLexer().getTok(); - if (Tok.is(AsmToken::Comma)) { - getLexer().Lex(); // Eat comma token. - - const AsmToken &Tok = getLexer().getTok(); - if (ParseShift(&ShiftType, ShiftAmount)) - return Error(Tok.getLoc(), "shift expected"); - OffsetRegShifted = true; - } - } - else { // "[Rn," we have so far was not followed by "Rm" - // Look for #offset following the "[Rn," - const AsmToken &HashTok = getLexer().getTok(); - if (HashTok.isNot(AsmToken::Hash)) - return Error(HashTok.getLoc(), "'#' expected"); - getLexer().Lex(); // Eat hash token. - - if (getParser().ParseExpression(Offset)) - return true; - } + if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount, + Offset, OffsetIsReg, OffsetRegNum)) + return true; const AsmToken &RBracTok = getLexer().getTok(); if (RBracTok.isNot(AsmToken::RBrac)) return Error(RBracTok.getLoc(), "']' expected"); @@ -374,11 +349,8 @@ bool ARMAsmParser::ParseMemory(ARMOperand &Op) { } // The "[Rn" we have so far was not followed by a comma. else if (Tok.is(AsmToken::RBrac)) { - // This is a post indexing addressing forms: - // [Rn], #offset - // [Rn], +/-Rm - // [Rn], +/-Rm, shift - // that is a ']' follows after the "[Rn". + // This is a post indexing addressing forms, that is a ']' follows after + // the "[Rn". Postindexed = true; Writeback = true; getLexer().Lex(); // Eat right bracket token. @@ -394,42 +366,9 @@ bool ARMAsmParser::ParseMemory(ARMOperand &Op) { if (NextTok.isNot(AsmToken::Comma)) return Error(NextTok.getLoc(), "',' expected"); getLexer().Lex(); // Eat comma token. - - const AsmToken &PlusMinusTok = getLexer().getTok(); - if (PlusMinusTok.is(AsmToken::Plus)) - getLexer().Lex(); // Eat plus token. - else if (PlusMinusTok.is(AsmToken::Minus)) { - Negative = true; - getLexer().Lex(); // Eat minus token - } - - // See if there is a register following the "[Rn]," we have so far. - const AsmToken &OffsetRegTok = getLexer().getTok(); - OffsetRegNum = MatchRegisterName(OffsetRegTok.getString()); - if (OffsetRegNum != -1) { - OffsetIsReg = true; - getLexer().Lex(); // Eat identifier token for the offset register. - // Look for a comma then a shift - const AsmToken &Tok = getLexer().getTok(); - if (Tok.is(AsmToken::Comma)) { - getLexer().Lex(); // Eat comma token. - - const AsmToken &Tok = getLexer().getTok(); - if (ParseShift(&ShiftType, ShiftAmount)) - return Error(Tok.getLoc(), "shift expected"); - OffsetRegShifted = true; - } - } - else { // "[Rn]," we have so far was not followed by "Rm" - // Look for #offset following the "[Rn]," - const AsmToken &HashTok = getLexer().getTok(); - if (HashTok.isNot(AsmToken::Hash)) - return Error(HashTok.getLoc(), "'#' expected"); - getLexer().Lex(); // Eat hash token. - - if (getParser().ParseExpression(Offset)) - return true; - } + if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, + ShiftAmount, Offset, OffsetIsReg, OffsetRegNum)) + return true; } Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, @@ -441,45 +380,105 @@ bool ARMAsmParser::ParseMemory(ARMOperand &Op) { return true; } +/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn]," +/// we will parse the following (were +/- means that a plus or minus is +/// optional): +/// +/-Rm +/// +/-Rm, shift +/// #offset +/// we return false on success or an error otherwise. +bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, + bool &OffsetRegShifted, + enum ShiftType &ShiftType, + const MCExpr *&ShiftAmount, + const MCExpr *&Offset, + bool &OffsetIsReg, + int &OffsetRegNum) { + ARMOperand Op; + Negative = false; + OffsetRegShifted = false; + OffsetIsReg = false; + OffsetRegNum = -1; + const AsmToken &NextTok = getLexer().getTok(); + if (NextTok.is(AsmToken::Plus)) + getLexer().Lex(); // Eat plus token. + else if (NextTok.is(AsmToken::Minus)) { + Negative = true; + getLexer().Lex(); // Eat minus token + } + // See if there is a register following the "[Rn," or "[Rn]," we have so far. + const AsmToken &OffsetRegTok = getLexer().getTok(); + if (OffsetRegTok.is(AsmToken::Identifier)) { + OffsetIsReg = !MaybeParseRegister(Op, false); + if (OffsetIsReg) + OffsetRegNum = Op.getReg(); + } + // If we parsed a register as the offset then their can be a shift after that + if (OffsetRegNum != -1) { + // Look for a comma then a shift + const AsmToken &Tok = getLexer().getTok(); + if (Tok.is(AsmToken::Comma)) { + getLexer().Lex(); // Eat comma token. + + const AsmToken &Tok = getLexer().getTok(); + if (ParseShift(ShiftType, ShiftAmount)) + return Error(Tok.getLoc(), "shift expected"); + OffsetRegShifted = true; + } + } + else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm" + // Look for #offset following the "[Rn," or "[Rn]," + const AsmToken &HashTok = getLexer().getTok(); + if (HashTok.isNot(AsmToken::Hash)) + return Error(HashTok.getLoc(), "'#' expected"); + getLexer().Lex(); // Eat hash token. + + if (getParser().ParseExpression(Offset)) + return true; + } + return false; +} + /// ParseShift as one of these two: /// ( lsl | lsr | asr | ror ) , # shift_amount /// rrx /// and returns true if it parses a shift otherwise it returns false. -bool ARMAsmParser::ParseShift(ShiftType *St, const MCExpr *&ShiftAmount) { +bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount) { const AsmToken &Tok = getLexer().getTok(); if (Tok.isNot(AsmToken::Identifier)) return true; const StringRef &ShiftName = Tok.getString(); if (ShiftName == "lsl" || ShiftName == "LSL") - *St = Lsl; + St = Lsl; else if (ShiftName == "lsr" || ShiftName == "LSR") - *St = Lsr; + St = Lsr; else if (ShiftName == "asr" || ShiftName == "ASR") - *St = Asr; + St = Asr; else if (ShiftName == "ror" || ShiftName == "ROR") - *St = Ror; + St = Ror; else if (ShiftName == "rrx" || ShiftName == "RRX") - *St = Rrx; + St = Rrx; else return true; getLexer().Lex(); // Eat shift type token. - // For all but a Rotate right there must be a '#' and a shift amount - if (*St != Rrx) { - // Look for # following the shift type - const AsmToken &HashTok = getLexer().getTok(); - if (HashTok.isNot(AsmToken::Hash)) - return Error(HashTok.getLoc(), "'#' expected"); - getLexer().Lex(); // Eat hash token. + // Rrx stands alone. + if (St == Rrx) + return false; - if (getParser().ParseExpression(ShiftAmount)) - return true; - } + // Otherwise, there must be a '#' and a shift amount. + const AsmToken &HashTok = getLexer().getTok(); + if (HashTok.isNot(AsmToken::Hash)) + return Error(HashTok.getLoc(), "'#' expected"); + getLexer().Lex(); // Eat hash token. + + if (getParser().ParseExpression(ShiftAmount)) + return true; return false; } -// A hack to allow some testing, to be replaced by a real table gen version. +/// A hack to allow some testing, to be replaced by a real table gen version. int ARMAsmParser::MatchRegisterName(const StringRef &Name) { if (Name == "r0" || Name == "R0") return 0; @@ -518,7 +517,7 @@ int ARMAsmParser::MatchRegisterName(const StringRef &Name) { return -1; } -// A hack to allow some testing, to be replaced by a real table gen version. +/// A hack to allow some testing, to be replaced by a real table gen version. bool ARMAsmParser::MatchInstruction(SmallVectorImpl<ARMOperand> &Operands, MCInst &Inst) { struct ARMOperand Op0 = Operands[0]; @@ -549,12 +548,12 @@ bool ARMAsmParser::MatchInstruction(SmallVectorImpl<ARMOperand> &Operands, return true; } -// Parse a arm instruction operand. For now this parses the operand regardless -// of the mnemonic. +/// Parse a arm instruction operand. For now this parses the operand regardless +/// of the mnemonic. bool ARMAsmParser::ParseOperand(ARMOperand &Op) { switch (getLexer().getKind()) { case AsmToken::Identifier: - if (!ParseRegister(Op)) + if (!MaybeParseRegister(Op, true)) return false; // This was not a register so parse other operands that start with an // identifier (like labels) as expressions and create them as immediates. @@ -581,7 +580,7 @@ bool ARMAsmParser::ParseOperand(ARMOperand &Op) { } } -// Parse an arm instruction mnemonic followed by its operands. +/// Parse an arm instruction mnemonic followed by its operands. bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { SmallVector<ARMOperand, 7> Operands; @@ -739,7 +738,7 @@ bool ARMAsmParser::ParseDirectiveCode(SMLoc L) { return false; } -// Force static initialization. +/// Force static initialization. extern "C" void LLVMInitializeARMAsmParser() { RegisterAsmParser<ARMAsmParser> X(TheARMTarget); RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget); diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 8719e4c339032..19db411408a8f 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -135,6 +135,8 @@ namespace { void printJT2BlockOperand(const MachineInstr *MI, int OpNum); void printTBAddrMode(const MachineInstr *MI, int OpNum); void printNoHashImmediate(const MachineInstr *MI, int OpNum); + void printVFPf32ImmOperand(const MachineInstr *MI, int OpNum); + void printVFPf64ImmOperand(const MachineInstr *MI, int OpNum); virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode); @@ -157,7 +159,6 @@ namespace { printDataDirective(MCPV->getType()); ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV); - GlobalValue *GV = ACPV->getGV(); std::string Name; if (ACPV->isLSDA()) { @@ -165,7 +166,10 @@ namespace { raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber(); Name = LSDAName.str(); - } else if (GV) { + } else if (ACPV->isBlockAddress()) { + Name = GetBlockAddressSymbol(ACPV->getBlockAddress())->getName(); + } else if (ACPV->isGlobalValue()) { + GlobalValue *GV = ACPV->getGV(); bool isIndirect = Subtarget->isTargetDarwin() && Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()); if (!isIndirect) @@ -186,8 +190,10 @@ namespace { StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); } } - } else + } else { + assert(ACPV->isExtSymbol() && "unrecognized constant pool value"); Name = Mang->makeNameProper(ACPV->getSymbol()); + } O << Name; if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")"; @@ -393,9 +399,11 @@ static void printSOImm(formatted_raw_ostream &O, int64_t V, bool VerboseAsm, if (Rot) { O << "#" << Imm << ", " << Rot; // Pretty printed version. - if (VerboseAsm) - O << ' ' << MAI->getCommentString() - << ' ' << (int)ARM_AM::rotr32(Imm, Rot); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; + O << (int)ARM_AM::rotr32(Imm, Rot); + } } else { O << "#" << Imm; } @@ -419,7 +427,7 @@ void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum) { printSOImm(O, V1, VerboseAsm, MAI); O << "\n\torr"; printPredicateOperand(MI, 2); - O << " "; + O << "\t"; printOperand(MI, 0); O << ", "; printOperand(MI, 0); @@ -970,6 +978,26 @@ void ARMAsmPrinter::printNoHashImmediate(const MachineInstr *MI, int OpNum) { O << MI->getOperand(OpNum).getImm(); } +void ARMAsmPrinter::printVFPf32ImmOperand(const MachineInstr *MI, int OpNum) { + const ConstantFP *FP = MI->getOperand(OpNum).getFPImm(); + O << '#' << ARM::getVFPf32Imm(FP->getValueAPF()); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; + WriteAsOperand(O, FP, /*PrintType=*/false); + } +} + +void ARMAsmPrinter::printVFPf64ImmOperand(const MachineInstr *MI, int OpNum) { + const ConstantFP *FP = MI->getOperand(OpNum).getFPImm(); + O << '#' << ARM::getVFPf64Imm(FP->getValueAPF()); + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; + WriteAsOperand(O, FP, /*PrintType=*/false); + } +} + bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode){ // Does this asm operand have a single letter operand modifier? @@ -1182,7 +1210,8 @@ void ARMAsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) { EmitAlignment(Align, GVar); O << name << ":"; if (VerboseAsm) { - O << "\t\t\t\t" << MAI->getCommentString() << ' '; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); } O << '\n'; @@ -1205,7 +1234,8 @@ void ARMAsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) { O << "," << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align); } if (VerboseAsm) { - O << "\t\t" << MAI->getCommentString() << " "; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); } O << "\n"; @@ -1243,7 +1273,8 @@ void ARMAsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) { EmitAlignment(Align, GVar); O << name << ":"; if (VerboseAsm) { - O << "\t\t\t\t" << MAI->getCommentString() << " "; + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); } O << "\n"; diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index 492513768b272..5bf966b8272cf 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -78,6 +78,8 @@ public: void printJT2BlockOperand(const MCInst *MI, unsigned OpNum) {} void printTBAddrMode(const MCInst *MI, unsigned OpNum) {} void printNoHashImmediate(const MCInst *MI, unsigned OpNum); + void printVFPf32ImmOperand(const MCInst *MI, int OpNum) {} + void printVFPf64ImmOperand(const MCInst *MI, int OpNum) {} void printPCLabel(const MCInst *MI, unsigned OpNum); // FIXME: Implement. diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp index 757164e682af6..8686961db45eb 100644 --- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp @@ -158,6 +158,10 @@ void ARMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { case MachineOperand::MO_ConstantPoolIndex: MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO)); break; + case MachineOperand::MO_BlockAddress: + MCOp = LowerSymbolOperand(MO, Printer.GetBlockAddressSymbol( + MO.getBlockAddress())); + break; } OutMI.addOperand(MCOp); diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 6e09eb2ff4d50..e071b6110ae97 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -17,15 +17,16 @@ add_llvm_target(ARMCodeGen ARMCodeEmitter.cpp ARMConstantIslandPass.cpp ARMConstantPoolValue.cpp - ARMInstrInfo.cpp ARMISelDAGToDAG.cpp ARMISelLowering.cpp + ARMInstrInfo.cpp ARMJITInfo.cpp ARMLoadStoreOptimizer.cpp ARMMCAsmInfo.cpp ARMRegisterInfo.cpp ARMSubtarget.cpp ARMTargetMachine.cpp + NEONMoveFix.cpp NEONPreAllocPass.cpp Thumb1InstrInfo.cpp Thumb1RegisterInfo.cpp diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp new file mode 100644 index 0000000000000..f307e3b3108b3 --- /dev/null +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -0,0 +1,141 @@ +//===-- NEONMoveFix.cpp - Convert vfp reg-reg moves into neon ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "neon-mov-fix" +#include "ARM.h" +#include "ARMMachineFunctionInfo.h" +#include "ARMInstrInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +STATISTIC(NumVMovs, "Number of reg-reg moves converted"); + +namespace { + struct NEONMoveFixPass : public MachineFunctionPass { + static char ID; + NEONMoveFixPass() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "NEON reg-reg move conversion"; + } + + private: + const TargetRegisterInfo *TRI; + const ARMBaseInstrInfo *TII; + + typedef DenseMap<unsigned, const MachineInstr*> RegMap; + + bool InsertMoves(MachineBasicBlock &MBB); + }; + char NEONMoveFixPass::ID = 0; +} + +bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { + RegMap Defs; + bool Modified = false; + + // Walk over MBB tracking the def points of the registers. + MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); + MachineBasicBlock::iterator NextMII; + for (; MII != E; MII = NextMII) { + NextMII = next(MII); + MachineInstr *MI = &*MII; + + if (MI->getOpcode() == ARM::FCPYD && + !TII->isPredicated(MI)) { + unsigned SrcReg = MI->getOperand(1).getReg(); + // If we do not found an instruction defining the reg, this means the + // register should be live-in for this BB. It's always to better to use + // NEON reg-reg moves. + unsigned Domain = ARMII::DomainNEON; + RegMap::iterator DefMI = Defs.find(SrcReg); + if (DefMI != Defs.end()) { + Domain = DefMI->second->getDesc().TSFlags & ARMII::DomainMask; + // Instructions in general domain are subreg accesses. + // Map them to NEON reg-reg moves. + if (Domain == ARMII::DomainGeneral) + Domain = ARMII::DomainNEON; + } + + if (Domain & ARMII::DomainNEON) { + // Convert FCPYD to VMOVD. + unsigned DestReg = MI->getOperand(0).getReg(); + + DEBUG({errs() << "vmov convert: "; MI->dump();}); + + // It's safe to ignore imp-defs / imp-uses here, since: + // - We're running late, no intelligent condegen passes should be run + // afterwards + // - The imp-defs / imp-uses are superregs only, we don't care about + // them. + BuildMI(MBB, *MI, MI->getDebugLoc(), + TII->get(ARM::VMOVD), DestReg).addReg(SrcReg); + MBB.erase(MI); + MachineBasicBlock::iterator I = prior(NextMII); + MI = &*I; + + DEBUG({errs() << " into: "; MI->dump();}); + + Modified = true; + ++NumVMovs; + } else { + assert((Domain & ARMII::DomainVFP) && "Invalid domain!"); + // Do nothing. + } + } + + // Update def information. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand& MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned MOReg = MO.getReg(); + + Defs[MOReg] = MI; + // Catch subregs as well. + for (const unsigned *R = TRI->getSubRegisters(MOReg); *R; ++R) + Defs[*R] = MI; + } + } + + return Modified; +} + +bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) { + ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>(); + const TargetMachine &TM = Fn.getTarget(); + + if (AFI->isThumbFunction()) + return false; + + TRI = TM.getRegisterInfo(); + TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); + + bool Modified = false; + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) { + MachineBasicBlock &MBB = *MFI; + Modified |= InsertMoves(MBB); + } + + return Modified; +} + +/// createNEONMoveFixPass - Returns an instance of the NEON reg-reg moves fix +/// pass. +FunctionPass *llvm::createNEONMoveFixPass() { + return new NEONMoveFixPass(); +} diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index 821b872ac7cd1..8b2bcd0e73064 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -16,7 +16,7 @@ using namespace llvm; namespace { - class VISIBILITY_HIDDEN NEONPreAllocPass : public MachineFunctionPass { + class NEONPreAllocPass : public MachineFunctionPass { const TargetInstrInfo *TII; public: diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 8fb1da30088f9..fb64d9fb4ce5b 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -8,12 +8,8 @@ Reimplement 'select' in terms of 'SEL'. add doesn't need to overflow between the two 16-bit chunks. * Implement pre/post increment support. (e.g. PR935) -* Coalesce stack slots! * Implement smarter constant generation for binops with large immediates. -* Consider materializing FP constants like 0.0f and 1.0f using integer - immediate instructions then copy to FPU. Slower than load into FPU? - //===---------------------------------------------------------------------===// Crazy idea: Consider code that uses lots of 8-bit or 16-bit values. By the @@ -422,14 +418,6 @@ are not remembered when the same two values are compared twice. //===---------------------------------------------------------------------===// -More register scavenging work: - -1. Use the register scavenger to track frame index materialized into registers - (those that do not fit in addressing modes) to allow reuse in the same BB. -2. Finish scavenging for Thumb. - -//===---------------------------------------------------------------------===// - More LSR enhancements possible: 1. Teach LSR about pre- and post- indexed ops to allow iv increment be merged @@ -540,10 +528,6 @@ while ARMConstantIslandPass only need to worry about LDR (literal). //===---------------------------------------------------------------------===// -We need to fix constant isel for ARMv6t2 to use MOVT. - -//===---------------------------------------------------------------------===// - Constant island pass should make use of full range SoImm values for LEApcrel. Be careful though as the last attempt caused infinite looping on lencod. @@ -593,10 +577,17 @@ it saves an instruction and a register. //===---------------------------------------------------------------------===// -add/sub/and/or + i32 imm can be simplified by folding part of the immediate -into the operation. +It might be profitable to cse MOVi16 if there are lots of 32-bit immediates +with the same bottom half. //===---------------------------------------------------------------------===// -It might be profitable to cse MOVi16 if there are lots of 32-bit immediates -with the same bottom half. +Robert Muth started working on an alternate jump table implementation that +does not put the tables in-line in the text. This is more like the llvm +default jump table implementation. This might be useful sometime. Several +revisions of patches are on the mailing list, beginning at: +http://lists.cs.uiuc.edu/pipermail/llvmdev/2009-June/022763.html + +//===---------------------------------------------------------------------===// + +Make use of the "rbit" instruction. diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 7eed30edf25c3..b6dd56c7abfda 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -17,12 +17,15 @@ #include "ARMMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/ADT/SmallVector.h" #include "Thumb1InstrInfo.h" using namespace llvm; -Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) : RI(*this, STI) { +Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI) + : ARMBaseInstrInfo(STI), RI(*this, STI) { } unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const { @@ -38,6 +41,7 @@ Thumb1InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { case ARM::tBX_RET_vararg: case ARM::tPOP_RET: case ARM::tB: + case ARM::tBRIND: case ARM::tBR_JTr: return true; default: @@ -121,9 +125,16 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, isARMLowRegister(SrcReg))) && "Unknown regclass!"); if (RC == ARM::tGPRRegisterClass) { + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), + MachineMemOperand::MOStore, 0, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSpill)) .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0)); + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } } @@ -139,8 +150,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, isARMLowRegister(DestReg))) && "Unknown regclass!"); if (RC == ARM::tGPRRegisterClass) { + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), + MachineMemOperand::MOLoad, 0, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg) - .addFrameIndex(FI).addImm(0)); + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } } diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 6207177b9969c..5aaaf9c997cd6 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -76,18 +76,6 @@ Thumb1RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, EVT VT) const { return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT); } -bool -Thumb1RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { - return true; -} - -bool -Thumb1RegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) - const { - return true; -} - - bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { const MachineFrameInfo *FFI = MF.getFrameInfo(); unsigned CFSize = FFI->getMaxCallFrameSize(); diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 570a5bc8c2ec1..241f1cc7ea1c4 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -40,9 +40,6 @@ public: const TargetRegisterClass * getPhysicalRegisterRegClass(unsigned Reg, EVT VT = MVT::Other) const; - bool requiresRegisterScavenging(const MachineFunction &MF) const; - bool requiresFrameIndexScavenging(const MachineFunction &MF) const; - bool hasReservedCallFrame(MachineFunction &MF) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index 427c0bb22b268..462844bdca8ea 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -14,14 +14,13 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Support/Compiler.h" #include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumITs, "Number of IT blocks inserted"); namespace { - struct VISIBILITY_HIDDEN Thumb2ITBlockPass : public MachineFunctionPass { + struct Thumb2ITBlockPass : public MachineFunctionPass { static char ID; Thumb2ITBlockPass() : MachineFunctionPass(&ID) {} diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 264601bf4143e..21fff51cb755d 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -18,12 +18,15 @@ #include "ARMMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/ADT/SmallVector.h" #include "Thumb2InstrInfo.h" using namespace llvm; -Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) : RI(*this, STI) { +Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) + : ARMBaseInstrInfo(STI), RI(*this, STI) { } unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const { @@ -46,6 +49,7 @@ Thumb2InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { case ARM::tBX_RET_vararg: case ARM::tPOP_RET: case ARM::tB: + case ARM::tBRIND: return true; default: break; @@ -89,9 +93,16 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (I != MBB.end()) DL = I->getDebugLoc(); if (RC == ARM::GPRRegisterClass) { + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), + MachineMemOperand::MOStore, 0, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12)) .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0)); + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); return; } @@ -106,8 +117,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (I != MBB.end()) DL = I->getDebugLoc(); if (RC == ARM::GPRRegisterClass) { + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), + MachineMemOperand::MOLoad, 0, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg) - .addFrameIndex(FI).addImm(0)); + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); return; } diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index f217e0e28f751..f24d3e256ff3f 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- Thumb2RegisterInfo.cpp - Thumb-2 Register Information -------*- C++ -*-===// +//===- Thumb2RegisterInfo.cpp - Thumb-2 Register Information ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file contains the Thumb-2 implementation of the TargetRegisterInfo class. +// This file contains the Thumb-2 implementation of the TargetRegisterInfo +// class. // //===----------------------------------------------------------------------===// @@ -59,8 +60,3 @@ void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, .addReg(DestReg, getDefRegState(true), SubIdx) .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0); } - -bool Thumb2RegisterInfo:: -requiresRegisterScavenging(const MachineFunction &MF) const { - return true; -} diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h index a63c60b73b804..a295630586875 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.h +++ b/lib/Target/ARM/Thumb2RegisterInfo.h @@ -35,8 +35,6 @@ public: unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) const; - - bool requiresRegisterScavenging(const MachineFunction &MF) const; }; } diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index b8879d2ed1fd0..9ce30aa4eba82 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -17,7 +17,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" @@ -126,7 +125,7 @@ namespace { { ARM::t2STM, ARM::tSTM, ARM::tPUSH, 0, 0, 1, 1, 1,1, 1 }, }; - class VISIBILITY_HIDDEN Thumb2SizeReduce : public MachineFunctionPass { + class Thumb2SizeReduce : public MachineFunctionPass { public: static char ID; Thumb2SizeReduce(); |